xref: /aosp_15_r20/external/licenseclassifier/serializer/serializer_test.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc.
2*46c4c49dSIbrahim Kanouche//
3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License");
4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License.
5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at
6*46c4c49dSIbrahim Kanouche//
7*46c4c49dSIbrahim Kanouche//	http://www.apache.org/licenses/LICENSE-2.0
8*46c4c49dSIbrahim Kanouche//
9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software
10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS,
11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and
13*46c4c49dSIbrahim Kanouche// limitations under the License.
14*46c4c49dSIbrahim Kanouchepackage serializer
15*46c4c49dSIbrahim Kanouche
16*46c4c49dSIbrahim Kanoucheimport (
17*46c4c49dSIbrahim Kanouche	"archive/tar"
18*46c4c49dSIbrahim Kanouche	"bytes"
19*46c4c49dSIbrahim Kanouche	"compress/gzip"
20*46c4c49dSIbrahim Kanouche	"fmt"
21*46c4c49dSIbrahim Kanouche	"io"
22*46c4c49dSIbrahim Kanouche	"log"
23*46c4c49dSIbrahim Kanouche	"os"
24*46c4c49dSIbrahim Kanouche	"reflect"
25*46c4c49dSIbrahim Kanouche	"sort"
26*46c4c49dSIbrahim Kanouche	"strings"
27*46c4c49dSIbrahim Kanouche	"testing"
28*46c4c49dSIbrahim Kanouche
29*46c4c49dSIbrahim Kanouche	"github.com/google/licenseclassifier"
30*46c4c49dSIbrahim Kanouche	"github.com/google/licenseclassifier/stringclassifier/searchset"
31*46c4c49dSIbrahim Kanouche)
32*46c4c49dSIbrahim Kanouche
33*46c4c49dSIbrahim Kanouchevar (
34*46c4c49dSIbrahim Kanouche	apache20Header, mit []byte
35*46c4c49dSIbrahim Kanouche	normApache, normMIT string
36*46c4c49dSIbrahim Kanouche)
37*46c4c49dSIbrahim Kanouche
38*46c4c49dSIbrahim Kanouchefunc TestMain(m *testing.M) {
39*46c4c49dSIbrahim Kanouche	var err error
40*46c4c49dSIbrahim Kanouche	apache20Header, err = licenseclassifier.ReadLicenseFile("Apache-2.0.header.txt")
41*46c4c49dSIbrahim Kanouche	if err != nil {
42*46c4c49dSIbrahim Kanouche		log.Fatalf("error reading contents of Apache-2.0.header.txt: %v", err)
43*46c4c49dSIbrahim Kanouche	}
44*46c4c49dSIbrahim Kanouche	normApache = normalize(string(apache20Header))
45*46c4c49dSIbrahim Kanouche
46*46c4c49dSIbrahim Kanouche	mit, err = licenseclassifier.ReadLicenseFile("MIT.txt")
47*46c4c49dSIbrahim Kanouche	if err != nil {
48*46c4c49dSIbrahim Kanouche		log.Fatalf("error reading contents of MIT.txt: %v", err)
49*46c4c49dSIbrahim Kanouche	}
50*46c4c49dSIbrahim Kanouche	normMIT = normalize(string(mit))
51*46c4c49dSIbrahim Kanouche
52*46c4c49dSIbrahim Kanouche	os.Exit(m.Run())
53*46c4c49dSIbrahim Kanouche}
54*46c4c49dSIbrahim Kanouche
55*46c4c49dSIbrahim Kanouchetype entry struct {
56*46c4c49dSIbrahim Kanouche	name     string
57*46c4c49dSIbrahim Kanouche	size     int64
58*46c4c49dSIbrahim Kanouche	contents string
59*46c4c49dSIbrahim Kanouche}
60*46c4c49dSIbrahim Kanouche
61*46c4c49dSIbrahim Kanouchefunc TestSerializer_ArchiveLicense(t *testing.T) {
62*46c4c49dSIbrahim Kanouche	tests := []struct {
63*46c4c49dSIbrahim Kanouche		description string
64*46c4c49dSIbrahim Kanouche		licenses    []string
65*46c4c49dSIbrahim Kanouche		want        []entry
66*46c4c49dSIbrahim Kanouche	}{
67*46c4c49dSIbrahim Kanouche		{
68*46c4c49dSIbrahim Kanouche			description: "Archiving Apache 2.0 header",
69*46c4c49dSIbrahim Kanouche			licenses:    []string{"Apache-2.0.header.txt"},
70*46c4c49dSIbrahim Kanouche			want: []entry{
71*46c4c49dSIbrahim Kanouche				{
72*46c4c49dSIbrahim Kanouche					name:     "Apache-2.0.header.txt",
73*46c4c49dSIbrahim Kanouche					size:     int64(len(normApache)),
74*46c4c49dSIbrahim Kanouche					contents: normApache,
75*46c4c49dSIbrahim Kanouche				},
76*46c4c49dSIbrahim Kanouche			},
77*46c4c49dSIbrahim Kanouche		},
78*46c4c49dSIbrahim Kanouche		{
79*46c4c49dSIbrahim Kanouche			description: "Archiving Apache 2.0 header + MIT",
80*46c4c49dSIbrahim Kanouche			licenses:    []string{"Apache-2.0.header.txt", "MIT.txt"},
81*46c4c49dSIbrahim Kanouche			want: []entry{
82*46c4c49dSIbrahim Kanouche				{
83*46c4c49dSIbrahim Kanouche					name:     "Apache-2.0.header.txt",
84*46c4c49dSIbrahim Kanouche					size:     int64(len(normApache)),
85*46c4c49dSIbrahim Kanouche					contents: normApache,
86*46c4c49dSIbrahim Kanouche				},
87*46c4c49dSIbrahim Kanouche				{
88*46c4c49dSIbrahim Kanouche					name:     "MIT.txt",
89*46c4c49dSIbrahim Kanouche					size:     int64(len(normMIT)),
90*46c4c49dSIbrahim Kanouche					contents: normMIT,
91*46c4c49dSIbrahim Kanouche				},
92*46c4c49dSIbrahim Kanouche			},
93*46c4c49dSIbrahim Kanouche		},
94*46c4c49dSIbrahim Kanouche	}
95*46c4c49dSIbrahim Kanouche
96*46c4c49dSIbrahim Kanouche	for _, tt := range tests {
97*46c4c49dSIbrahim Kanouche		var writer bytes.Buffer
98*46c4c49dSIbrahim Kanouche		if err := ArchiveLicenses(tt.licenses, &writer); err != nil {
99*46c4c49dSIbrahim Kanouche			t.Errorf("ArchiveLicenses(%q): cannot archive license: %v", tt.description, err)
100*46c4c49dSIbrahim Kanouche			continue
101*46c4c49dSIbrahim Kanouche		}
102*46c4c49dSIbrahim Kanouche
103*46c4c49dSIbrahim Kanouche		reader := bytes.NewReader(writer.Bytes())
104*46c4c49dSIbrahim Kanouche		gr, err := gzip.NewReader(reader)
105*46c4c49dSIbrahim Kanouche		if err != nil {
106*46c4c49dSIbrahim Kanouche			t.Errorf("ArchiveLicenses(%q): cannot create gzip reader: %v", tt.description, err)
107*46c4c49dSIbrahim Kanouche			continue
108*46c4c49dSIbrahim Kanouche		}
109*46c4c49dSIbrahim Kanouche
110*46c4c49dSIbrahim Kanouche		tr := tar.NewReader(gr)
111*46c4c49dSIbrahim Kanouche		for i := 0; ; i++ {
112*46c4c49dSIbrahim Kanouche			hdr, err := tr.Next()
113*46c4c49dSIbrahim Kanouche			if err == io.EOF {
114*46c4c49dSIbrahim Kanouche				break
115*46c4c49dSIbrahim Kanouche			}
116*46c4c49dSIbrahim Kanouche			if err != nil {
117*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q): cannot read header: %v", tt.description, err)
118*46c4c49dSIbrahim Kanouche				break
119*46c4c49dSIbrahim Kanouche			}
120*46c4c49dSIbrahim Kanouche
121*46c4c49dSIbrahim Kanouche			if i >= len(tt.want)+1 {
122*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q): too many files in tar, %d want %d", tt.description, i, len(tt.want))
123*46c4c49dSIbrahim Kanouche				break
124*46c4c49dSIbrahim Kanouche			}
125*46c4c49dSIbrahim Kanouche
126*46c4c49dSIbrahim Kanouche			if hdr.Name != tt.want[i].name {
127*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q) = %+v, want %+v", tt.description, hdr.Name, tt.want[i].name)
128*46c4c49dSIbrahim Kanouche			}
129*46c4c49dSIbrahim Kanouche			if hdr.Size != tt.want[i].size {
130*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q) = %v, want %v", tt.description, hdr.Size, tt.want[i].size)
131*46c4c49dSIbrahim Kanouche			}
132*46c4c49dSIbrahim Kanouche
133*46c4c49dSIbrahim Kanouche			var b bytes.Buffer
134*46c4c49dSIbrahim Kanouche			if _, err = io.Copy(&b, tr); err != nil {
135*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q): cannot read contents: %v", tt.description, err)
136*46c4c49dSIbrahim Kanouche				break
137*46c4c49dSIbrahim Kanouche			}
138*46c4c49dSIbrahim Kanouche
139*46c4c49dSIbrahim Kanouche			if got, want := b.String(), tt.want[i].contents; got != want {
140*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q) = got\n%s\nwant:\n%s", tt.description, got, want)
141*46c4c49dSIbrahim Kanouche			}
142*46c4c49dSIbrahim Kanouche
143*46c4c49dSIbrahim Kanouche			hdr, err = tr.Next()
144*46c4c49dSIbrahim Kanouche			if err != nil {
145*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q): no hash file found in archive: %v", tt.description, err)
146*46c4c49dSIbrahim Kanouche				break
147*46c4c49dSIbrahim Kanouche			}
148*46c4c49dSIbrahim Kanouche
149*46c4c49dSIbrahim Kanouche			if hdr.Name != strings.TrimSuffix(tt.want[i].name, "txt")+"hash" {
150*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q) = %+v, want %+v", tt.description, hdr.Name, strings.TrimSuffix(tt.want[i].name, "txt")+"hash")
151*46c4c49dSIbrahim Kanouche			}
152*46c4c49dSIbrahim Kanouche
153*46c4c49dSIbrahim Kanouche			b.Reset()
154*46c4c49dSIbrahim Kanouche			if _, err = io.Copy(&b, tr); err != nil {
155*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q): cannot read contents: %v", tt.description, err)
156*46c4c49dSIbrahim Kanouche				break
157*46c4c49dSIbrahim Kanouche			}
158*46c4c49dSIbrahim Kanouche
159*46c4c49dSIbrahim Kanouche			var got searchset.SearchSet
160*46c4c49dSIbrahim Kanouche			if err := searchset.Deserialize(&b, &got); err != nil {
161*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q): cannot deserialize search set: %v", tt.description, err)
162*46c4c49dSIbrahim Kanouche				break
163*46c4c49dSIbrahim Kanouche			}
164*46c4c49dSIbrahim Kanouche
165*46c4c49dSIbrahim Kanouche			want := searchset.New(tt.want[i].contents, searchset.DefaultGranularity)
166*46c4c49dSIbrahim Kanouche			if err := compareSearchSets(want, &got); err != nil {
167*46c4c49dSIbrahim Kanouche				t.Errorf("ArchiveLicenses(%q): search sets not equal: %v", tt.description, err)
168*46c4c49dSIbrahim Kanouche				break
169*46c4c49dSIbrahim Kanouche			}
170*46c4c49dSIbrahim Kanouche		}
171*46c4c49dSIbrahim Kanouche	}
172*46c4c49dSIbrahim Kanouche}
173*46c4c49dSIbrahim Kanouche
174*46c4c49dSIbrahim Kanouchetype sortUInt32 []uint32
175*46c4c49dSIbrahim Kanouche
176*46c4c49dSIbrahim Kanouchefunc (s sortUInt32) Len() int           { return len(s) }
177*46c4c49dSIbrahim Kanouchefunc (s sortUInt32) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
178*46c4c49dSIbrahim Kanouchefunc (s sortUInt32) Less(i, j int) bool { return s[i] < s[j] }
179*46c4c49dSIbrahim Kanouche
180*46c4c49dSIbrahim Kanouchefunc compareSearchSets(x, y *searchset.SearchSet) error {
181*46c4c49dSIbrahim Kanouche	// Check to see that the tokens are equal.
182*46c4c49dSIbrahim Kanouche	if len(x.Tokens) != len(y.Tokens) {
183*46c4c49dSIbrahim Kanouche		return fmt.Errorf("Lengths differ = %d vs %d", len(x.Tokens), len(y.Tokens))
184*46c4c49dSIbrahim Kanouche	}
185*46c4c49dSIbrahim Kanouche	for i := 0; i < len(x.Tokens); i++ {
186*46c4c49dSIbrahim Kanouche		if x.Tokens[i].Text != y.Tokens[i].Text {
187*46c4c49dSIbrahim Kanouche			return fmt.Errorf("Token values at %d differ = %q vs %q", i, x.Tokens[i].Text, y.Tokens[i].Text)
188*46c4c49dSIbrahim Kanouche		}
189*46c4c49dSIbrahim Kanouche		if x.Tokens[i].Offset != y.Tokens[i].Offset {
190*46c4c49dSIbrahim Kanouche			return fmt.Errorf("Token offsets at %d differ = %d vs %d", i, x.Tokens[i].Offset, y.Tokens[i].Offset)
191*46c4c49dSIbrahim Kanouche		}
192*46c4c49dSIbrahim Kanouche	}
193*46c4c49dSIbrahim Kanouche
194*46c4c49dSIbrahim Kanouche	// Now check that the hash maps are equal.
195*46c4c49dSIbrahim Kanouche	var xKeys []uint32
196*46c4c49dSIbrahim Kanouche	for k := range x.Hashes {
197*46c4c49dSIbrahim Kanouche		xKeys = append(xKeys, k)
198*46c4c49dSIbrahim Kanouche	}
199*46c4c49dSIbrahim Kanouche	var yKeys []uint32
200*46c4c49dSIbrahim Kanouche	for k := range y.Hashes {
201*46c4c49dSIbrahim Kanouche		yKeys = append(yKeys, k)
202*46c4c49dSIbrahim Kanouche	}
203*46c4c49dSIbrahim Kanouche
204*46c4c49dSIbrahim Kanouche	if len(xKeys) != len(yKeys) {
205*46c4c49dSIbrahim Kanouche		return fmt.Errorf("Lengths of hashes differ = %d vs %d", len(xKeys), len(yKeys))
206*46c4c49dSIbrahim Kanouche	}
207*46c4c49dSIbrahim Kanouche
208*46c4c49dSIbrahim Kanouche	sort.Sort(sortUInt32(xKeys))
209*46c4c49dSIbrahim Kanouche	sort.Sort(sortUInt32(yKeys))
210*46c4c49dSIbrahim Kanouche
211*46c4c49dSIbrahim Kanouche	for i := 0; i < len(xKeys); i++ {
212*46c4c49dSIbrahim Kanouche		if xKeys[i] != yKeys[i] {
213*46c4c49dSIbrahim Kanouche			return fmt.Errorf("Hash keys differ = %d vs %d", xKeys[i], yKeys[i])
214*46c4c49dSIbrahim Kanouche		}
215*46c4c49dSIbrahim Kanouche		if !reflect.DeepEqual(x.Hashes[xKeys[i]], y.Hashes[yKeys[i]]) {
216*46c4c49dSIbrahim Kanouche			return fmt.Errorf("Hash values differ = %v vs %v", x.Hashes[xKeys[i]], y.Hashes[yKeys[i]])
217*46c4c49dSIbrahim Kanouche		}
218*46c4c49dSIbrahim Kanouche	}
219*46c4c49dSIbrahim Kanouche
220*46c4c49dSIbrahim Kanouche	return nil
221*46c4c49dSIbrahim Kanouche}
222*46c4c49dSIbrahim Kanouche
223*46c4c49dSIbrahim Kanouchefunc normalize(s string) string {
224*46c4c49dSIbrahim Kanouche	for _, n := range licenseclassifier.Normalizers {
225*46c4c49dSIbrahim Kanouche		s = n(s)
226*46c4c49dSIbrahim Kanouche	}
227*46c4c49dSIbrahim Kanouche	return s
228*46c4c49dSIbrahim Kanouche}
229