xref: /aosp_15_r20/external/licenseclassifier/serializer/serializer_test.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1// Copyright 2017 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//	http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14package serializer
15
16import (
17	"archive/tar"
18	"bytes"
19	"compress/gzip"
20	"fmt"
21	"io"
22	"log"
23	"os"
24	"reflect"
25	"sort"
26	"strings"
27	"testing"
28
29	"github.com/google/licenseclassifier"
30	"github.com/google/licenseclassifier/stringclassifier/searchset"
31)
32
33var (
34	apache20Header, mit []byte
35	normApache, normMIT string
36)
37
38func TestMain(m *testing.M) {
39	var err error
40	apache20Header, err = licenseclassifier.ReadLicenseFile("Apache-2.0.header.txt")
41	if err != nil {
42		log.Fatalf("error reading contents of Apache-2.0.header.txt: %v", err)
43	}
44	normApache = normalize(string(apache20Header))
45
46	mit, err = licenseclassifier.ReadLicenseFile("MIT.txt")
47	if err != nil {
48		log.Fatalf("error reading contents of MIT.txt: %v", err)
49	}
50	normMIT = normalize(string(mit))
51
52	os.Exit(m.Run())
53}
54
55type entry struct {
56	name     string
57	size     int64
58	contents string
59}
60
61func TestSerializer_ArchiveLicense(t *testing.T) {
62	tests := []struct {
63		description string
64		licenses    []string
65		want        []entry
66	}{
67		{
68			description: "Archiving Apache 2.0 header",
69			licenses:    []string{"Apache-2.0.header.txt"},
70			want: []entry{
71				{
72					name:     "Apache-2.0.header.txt",
73					size:     int64(len(normApache)),
74					contents: normApache,
75				},
76			},
77		},
78		{
79			description: "Archiving Apache 2.0 header + MIT",
80			licenses:    []string{"Apache-2.0.header.txt", "MIT.txt"},
81			want: []entry{
82				{
83					name:     "Apache-2.0.header.txt",
84					size:     int64(len(normApache)),
85					contents: normApache,
86				},
87				{
88					name:     "MIT.txt",
89					size:     int64(len(normMIT)),
90					contents: normMIT,
91				},
92			},
93		},
94	}
95
96	for _, tt := range tests {
97		var writer bytes.Buffer
98		if err := ArchiveLicenses(tt.licenses, &writer); err != nil {
99			t.Errorf("ArchiveLicenses(%q): cannot archive license: %v", tt.description, err)
100			continue
101		}
102
103		reader := bytes.NewReader(writer.Bytes())
104		gr, err := gzip.NewReader(reader)
105		if err != nil {
106			t.Errorf("ArchiveLicenses(%q): cannot create gzip reader: %v", tt.description, err)
107			continue
108		}
109
110		tr := tar.NewReader(gr)
111		for i := 0; ; i++ {
112			hdr, err := tr.Next()
113			if err == io.EOF {
114				break
115			}
116			if err != nil {
117				t.Errorf("ArchiveLicenses(%q): cannot read header: %v", tt.description, err)
118				break
119			}
120
121			if i >= len(tt.want)+1 {
122				t.Errorf("ArchiveLicenses(%q): too many files in tar, %d want %d", tt.description, i, len(tt.want))
123				break
124			}
125
126			if hdr.Name != tt.want[i].name {
127				t.Errorf("ArchiveLicenses(%q) = %+v, want %+v", tt.description, hdr.Name, tt.want[i].name)
128			}
129			if hdr.Size != tt.want[i].size {
130				t.Errorf("ArchiveLicenses(%q) = %v, want %v", tt.description, hdr.Size, tt.want[i].size)
131			}
132
133			var b bytes.Buffer
134			if _, err = io.Copy(&b, tr); err != nil {
135				t.Errorf("ArchiveLicenses(%q): cannot read contents: %v", tt.description, err)
136				break
137			}
138
139			if got, want := b.String(), tt.want[i].contents; got != want {
140				t.Errorf("ArchiveLicenses(%q) = got\n%s\nwant:\n%s", tt.description, got, want)
141			}
142
143			hdr, err = tr.Next()
144			if err != nil {
145				t.Errorf("ArchiveLicenses(%q): no hash file found in archive: %v", tt.description, err)
146				break
147			}
148
149			if hdr.Name != strings.TrimSuffix(tt.want[i].name, "txt")+"hash" {
150				t.Errorf("ArchiveLicenses(%q) = %+v, want %+v", tt.description, hdr.Name, strings.TrimSuffix(tt.want[i].name, "txt")+"hash")
151			}
152
153			b.Reset()
154			if _, err = io.Copy(&b, tr); err != nil {
155				t.Errorf("ArchiveLicenses(%q): cannot read contents: %v", tt.description, err)
156				break
157			}
158
159			var got searchset.SearchSet
160			if err := searchset.Deserialize(&b, &got); err != nil {
161				t.Errorf("ArchiveLicenses(%q): cannot deserialize search set: %v", tt.description, err)
162				break
163			}
164
165			want := searchset.New(tt.want[i].contents, searchset.DefaultGranularity)
166			if err := compareSearchSets(want, &got); err != nil {
167				t.Errorf("ArchiveLicenses(%q): search sets not equal: %v", tt.description, err)
168				break
169			}
170		}
171	}
172}
173
174type sortUInt32 []uint32
175
176func (s sortUInt32) Len() int           { return len(s) }
177func (s sortUInt32) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
178func (s sortUInt32) Less(i, j int) bool { return s[i] < s[j] }
179
180func compareSearchSets(x, y *searchset.SearchSet) error {
181	// Check to see that the tokens are equal.
182	if len(x.Tokens) != len(y.Tokens) {
183		return fmt.Errorf("Lengths differ = %d vs %d", len(x.Tokens), len(y.Tokens))
184	}
185	for i := 0; i < len(x.Tokens); i++ {
186		if x.Tokens[i].Text != y.Tokens[i].Text {
187			return fmt.Errorf("Token values at %d differ = %q vs %q", i, x.Tokens[i].Text, y.Tokens[i].Text)
188		}
189		if x.Tokens[i].Offset != y.Tokens[i].Offset {
190			return fmt.Errorf("Token offsets at %d differ = %d vs %d", i, x.Tokens[i].Offset, y.Tokens[i].Offset)
191		}
192	}
193
194	// Now check that the hash maps are equal.
195	var xKeys []uint32
196	for k := range x.Hashes {
197		xKeys = append(xKeys, k)
198	}
199	var yKeys []uint32
200	for k := range y.Hashes {
201		yKeys = append(yKeys, k)
202	}
203
204	if len(xKeys) != len(yKeys) {
205		return fmt.Errorf("Lengths of hashes differ = %d vs %d", len(xKeys), len(yKeys))
206	}
207
208	sort.Sort(sortUInt32(xKeys))
209	sort.Sort(sortUInt32(yKeys))
210
211	for i := 0; i < len(xKeys); i++ {
212		if xKeys[i] != yKeys[i] {
213			return fmt.Errorf("Hash keys differ = %d vs %d", xKeys[i], yKeys[i])
214		}
215		if !reflect.DeepEqual(x.Hashes[xKeys[i]], y.Hashes[yKeys[i]]) {
216			return fmt.Errorf("Hash values differ = %v vs %v", x.Hashes[xKeys[i]], y.Hashes[yKeys[i]])
217		}
218	}
219
220	return nil
221}
222
223func normalize(s string) string {
224	for _, n := range licenseclassifier.Normalizers {
225		s = n(s)
226	}
227	return s
228}
229