xref: /aosp_15_r20/build/make/tools/generate-notice-files.py (revision 9e94795a3d4ef5c1d47486f9a02bb378756cea8a)
1*9e94795aSAndroid Build Coastguard Worker#!/usr/bin/env python3
2*9e94795aSAndroid Build Coastguard Worker#
3*9e94795aSAndroid Build Coastguard Worker# Copyright (C) 2012 The Android Open Source Project
4*9e94795aSAndroid Build Coastguard Worker#
5*9e94795aSAndroid Build Coastguard Worker# Licensed under the Apache License, Version 2.0 (the "License");
6*9e94795aSAndroid Build Coastguard Worker# you may not use this file except in compliance with the License.
7*9e94795aSAndroid Build Coastguard Worker# You may obtain a copy of the License at
8*9e94795aSAndroid Build Coastguard Worker#
9*9e94795aSAndroid Build Coastguard Worker#      http://www.apache.org/licenses/LICENSE-2.0
10*9e94795aSAndroid Build Coastguard Worker#
11*9e94795aSAndroid Build Coastguard Worker# Unless required by applicable law or agreed to in writing, software
12*9e94795aSAndroid Build Coastguard Worker# distributed under the License is distributed on an "AS IS" BASIS,
13*9e94795aSAndroid Build Coastguard Worker# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*9e94795aSAndroid Build Coastguard Worker# See the License for the specific language governing permissions and
15*9e94795aSAndroid Build Coastguard Worker# limitations under the License.
16*9e94795aSAndroid Build Coastguard Worker"""
17*9e94795aSAndroid Build Coastguard WorkerUsage: generate-notice-files --text-output [plain text output file] \
18*9e94795aSAndroid Build Coastguard Worker               --html-output [html output file] \
19*9e94795aSAndroid Build Coastguard Worker               --xml-output [xml output file] \
20*9e94795aSAndroid Build Coastguard Worker               -t [file title] -s [directory of notices]
21*9e94795aSAndroid Build Coastguard Worker
22*9e94795aSAndroid Build Coastguard WorkerGenerate the Android notice files, including both text and html files.
23*9e94795aSAndroid Build Coastguard Worker
24*9e94795aSAndroid Build Coastguard Worker-h to display this usage message and exit.
25*9e94795aSAndroid Build Coastguard Worker"""
26*9e94795aSAndroid Build Coastguard Workerfrom collections import defaultdict
27*9e94795aSAndroid Build Coastguard Workerimport argparse
28*9e94795aSAndroid Build Coastguard Workerimport hashlib
29*9e94795aSAndroid Build Coastguard Workerimport itertools
30*9e94795aSAndroid Build Coastguard Workerimport os
31*9e94795aSAndroid Build Coastguard Workerimport os.path
32*9e94795aSAndroid Build Coastguard Workerimport re
33*9e94795aSAndroid Build Coastguard Workerimport struct
34*9e94795aSAndroid Build Coastguard Workerimport sys
35*9e94795aSAndroid Build Coastguard Worker
36*9e94795aSAndroid Build Coastguard WorkerMD5_BLOCKSIZE = 1024 * 1024
37*9e94795aSAndroid Build Coastguard WorkerHTML_ESCAPE_TABLE = {
38*9e94795aSAndroid Build Coastguard Worker    b"&": b"&",
39*9e94795aSAndroid Build Coastguard Worker    b'"': b""",
40*9e94795aSAndroid Build Coastguard Worker    b"'": b"'",
41*9e94795aSAndroid Build Coastguard Worker    b">": b">",
42*9e94795aSAndroid Build Coastguard Worker    b"<": b"&lt;",
43*9e94795aSAndroid Build Coastguard Worker    }
44*9e94795aSAndroid Build Coastguard Worker
45*9e94795aSAndroid Build Coastguard Workerdef md5sum(filename):
46*9e94795aSAndroid Build Coastguard Worker    """Calculate an MD5 of the file given by FILENAME,
47*9e94795aSAndroid Build Coastguard Worker    and return hex digest as a string.
48*9e94795aSAndroid Build Coastguard Worker    Output should be compatible with md5sum command"""
49*9e94795aSAndroid Build Coastguard Worker
50*9e94795aSAndroid Build Coastguard Worker    f = open(filename, "rb")
51*9e94795aSAndroid Build Coastguard Worker    sum = hashlib.md5()
52*9e94795aSAndroid Build Coastguard Worker    while 1:
53*9e94795aSAndroid Build Coastguard Worker        block = f.read(MD5_BLOCKSIZE)
54*9e94795aSAndroid Build Coastguard Worker        if not block:
55*9e94795aSAndroid Build Coastguard Worker            break
56*9e94795aSAndroid Build Coastguard Worker        sum.update(block)
57*9e94795aSAndroid Build Coastguard Worker    f.close()
58*9e94795aSAndroid Build Coastguard Worker    return sum.hexdigest()
59*9e94795aSAndroid Build Coastguard Worker
60*9e94795aSAndroid Build Coastguard Worker
61*9e94795aSAndroid Build Coastguard Workerdef html_escape(text):
62*9e94795aSAndroid Build Coastguard Worker    """Produce entities within text."""
63*9e94795aSAndroid Build Coastguard Worker    # Using for i in text doesn't work since i will be an int, not a byte.
64*9e94795aSAndroid Build Coastguard Worker    # There are multiple ways to solve this, but the most performant way
65*9e94795aSAndroid Build Coastguard Worker    # to iterate over a byte array is to use unpack. Using the
66*9e94795aSAndroid Build Coastguard Worker    # for i in range(len(text)) and using that to get a byte using array
67*9e94795aSAndroid Build Coastguard Worker    # slices is twice as slow as this method.
68*9e94795aSAndroid Build Coastguard Worker    return b"".join(HTML_ESCAPE_TABLE.get(i,i) for i in struct.unpack(str(len(text)) + 'c', text))
69*9e94795aSAndroid Build Coastguard Worker
70*9e94795aSAndroid Build Coastguard WorkerHTML_OUTPUT_CSS=b"""
71*9e94795aSAndroid Build Coastguard Worker<style type="text/css">
72*9e94795aSAndroid Build Coastguard Workerbody { padding: 0; font-family: sans-serif; }
73*9e94795aSAndroid Build Coastguard Worker.same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; }
74*9e94795aSAndroid Build Coastguard Worker.label { font-weight: bold; }
75*9e94795aSAndroid Build Coastguard Worker.file-list { margin-left: 1em; color: blue; }
76*9e94795aSAndroid Build Coastguard Worker</style>
77*9e94795aSAndroid Build Coastguard Worker
78*9e94795aSAndroid Build Coastguard Worker"""
79*9e94795aSAndroid Build Coastguard Worker
80*9e94795aSAndroid Build Coastguard Workerdef combine_notice_files_html(file_hash, input_dirs, output_filename):
81*9e94795aSAndroid Build Coastguard Worker    """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME."""
82*9e94795aSAndroid Build Coastguard Worker
83*9e94795aSAndroid Build Coastguard Worker    SRC_DIR_STRIP_RE = re.compile("(?:" + "|".join(input_dirs) + ")(/.*).txt")
84*9e94795aSAndroid Build Coastguard Worker
85*9e94795aSAndroid Build Coastguard Worker    # Set up a filename to row id table (anchors inside tables don't work in
86*9e94795aSAndroid Build Coastguard Worker    # most browsers, but href's to table row ids do)
87*9e94795aSAndroid Build Coastguard Worker    id_table = {}
88*9e94795aSAndroid Build Coastguard Worker    id_count = 0
89*9e94795aSAndroid Build Coastguard Worker    for value in file_hash:
90*9e94795aSAndroid Build Coastguard Worker        for filename in value:
91*9e94795aSAndroid Build Coastguard Worker             id_table[filename] = id_count
92*9e94795aSAndroid Build Coastguard Worker        id_count += 1
93*9e94795aSAndroid Build Coastguard Worker
94*9e94795aSAndroid Build Coastguard Worker    # Open the output file, and output the header pieces
95*9e94795aSAndroid Build Coastguard Worker    output_file = open(output_filename, "wb")
96*9e94795aSAndroid Build Coastguard Worker
97*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"<html><head>\n")
98*9e94795aSAndroid Build Coastguard Worker    output_file.write(HTML_OUTPUT_CSS)
99*9e94795aSAndroid Build Coastguard Worker    output_file.write(b'</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">\n')
100*9e94795aSAndroid Build Coastguard Worker
101*9e94795aSAndroid Build Coastguard Worker    # Output our table of contents
102*9e94795aSAndroid Build Coastguard Worker    output_file.write(b'<div class="toc">\n')
103*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"<ul>\n")
104*9e94795aSAndroid Build Coastguard Worker
105*9e94795aSAndroid Build Coastguard Worker    # Flatten the list of lists into a single list of filenames
106*9e94795aSAndroid Build Coastguard Worker    sorted_filenames = sorted(itertools.chain.from_iterable(file_hash))
107*9e94795aSAndroid Build Coastguard Worker
108*9e94795aSAndroid Build Coastguard Worker    # Print out a nice table of contents
109*9e94795aSAndroid Build Coastguard Worker    for filename in sorted_filenames:
110*9e94795aSAndroid Build Coastguard Worker        stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
111*9e94795aSAndroid Build Coastguard Worker        output_file.write(('<li><a href="#id%d">%s</a></li>\n' % (id_table.get(filename), stripped_filename)).encode())
112*9e94795aSAndroid Build Coastguard Worker
113*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"</ul>\n")
114*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"</div><!-- table of contents -->\n")
115*9e94795aSAndroid Build Coastguard Worker    # Output the individual notice file lists
116*9e94795aSAndroid Build Coastguard Worker    output_file.write(b'<table cellpadding="0" cellspacing="0" border="0">\n')
117*9e94795aSAndroid Build Coastguard Worker    for value in file_hash:
118*9e94795aSAndroid Build Coastguard Worker        output_file.write(b'<tr id="id%d"><td class="same-license">\n' % id_table.get(value[0]))
119*9e94795aSAndroid Build Coastguard Worker        output_file.write(b'<div class="label">Notices for file(s):</div>\n')
120*9e94795aSAndroid Build Coastguard Worker        output_file.write(b'<div class="file-list">\n')
121*9e94795aSAndroid Build Coastguard Worker        for filename in value:
122*9e94795aSAndroid Build Coastguard Worker            output_file.write(("%s <br/>\n" % SRC_DIR_STRIP_RE.sub(r"\1", filename)).encode())
123*9e94795aSAndroid Build Coastguard Worker        output_file.write(b"</div><!-- file-list -->\n")
124*9e94795aSAndroid Build Coastguard Worker        output_file.write(b"\n")
125*9e94795aSAndroid Build Coastguard Worker        output_file.write(b'<pre class="license-text">\n')
126*9e94795aSAndroid Build Coastguard Worker        with open(value[0], "rb") as notice_file:
127*9e94795aSAndroid Build Coastguard Worker            output_file.write(html_escape(notice_file.read()))
128*9e94795aSAndroid Build Coastguard Worker        output_file.write(b"\n</pre><!-- license-text -->\n")
129*9e94795aSAndroid Build Coastguard Worker        output_file.write(b"</td></tr><!-- same-license -->\n\n\n\n")
130*9e94795aSAndroid Build Coastguard Worker
131*9e94795aSAndroid Build Coastguard Worker    # Finish off the file output
132*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"</table>\n")
133*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"</body></html>\n")
134*9e94795aSAndroid Build Coastguard Worker    output_file.close()
135*9e94795aSAndroid Build Coastguard Worker
136*9e94795aSAndroid Build Coastguard Workerdef combine_notice_files_text(file_hash, input_dirs, output_filename, file_title):
137*9e94795aSAndroid Build Coastguard Worker    """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME."""
138*9e94795aSAndroid Build Coastguard Worker
139*9e94795aSAndroid Build Coastguard Worker    SRC_DIR_STRIP_RE = re.compile("(?:" + "|".join(input_dirs) + ")(/.*).txt")
140*9e94795aSAndroid Build Coastguard Worker    output_file = open(output_filename, "wb")
141*9e94795aSAndroid Build Coastguard Worker    output_file.write(file_title.encode())
142*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"\n")
143*9e94795aSAndroid Build Coastguard Worker    for value in file_hash:
144*9e94795aSAndroid Build Coastguard Worker        output_file.write(b"============================================================\n")
145*9e94795aSAndroid Build Coastguard Worker        output_file.write(b"Notices for file(s):\n")
146*9e94795aSAndroid Build Coastguard Worker        for filename in value:
147*9e94795aSAndroid Build Coastguard Worker            output_file.write(SRC_DIR_STRIP_RE.sub(r"\1", filename).encode())
148*9e94795aSAndroid Build Coastguard Worker            output_file.write(b"\n")
149*9e94795aSAndroid Build Coastguard Worker        output_file.write(b"------------------------------------------------------------\n")
150*9e94795aSAndroid Build Coastguard Worker        with open(value[0], "rb") as notice_file:
151*9e94795aSAndroid Build Coastguard Worker            output_file.write(notice_file.read())
152*9e94795aSAndroid Build Coastguard Worker            output_file.write(b"\n")
153*9e94795aSAndroid Build Coastguard Worker    output_file.close()
154*9e94795aSAndroid Build Coastguard Worker
155*9e94795aSAndroid Build Coastguard Workerdef combine_notice_files_xml(files_with_same_hash, input_dirs, output_filename):
156*9e94795aSAndroid Build Coastguard Worker    """Combine notice files in FILE_HASH and output a XML version to OUTPUT_FILENAME."""
157*9e94795aSAndroid Build Coastguard Worker
158*9e94795aSAndroid Build Coastguard Worker    SRC_DIR_STRIP_RE = re.compile("(?:" + "|".join(input_dirs) + ")(/.*).txt")
159*9e94795aSAndroid Build Coastguard Worker
160*9e94795aSAndroid Build Coastguard Worker    # Set up a filename to row id table (anchors inside tables don't work in
161*9e94795aSAndroid Build Coastguard Worker    # most browsers, but href's to table row ids do)
162*9e94795aSAndroid Build Coastguard Worker    id_table = {}
163*9e94795aSAndroid Build Coastguard Worker    for file_key, files in files_with_same_hash.items():
164*9e94795aSAndroid Build Coastguard Worker        for filename in files:
165*9e94795aSAndroid Build Coastguard Worker             id_table[filename] = file_key
166*9e94795aSAndroid Build Coastguard Worker
167*9e94795aSAndroid Build Coastguard Worker    # Open the output file, and output the header pieces
168*9e94795aSAndroid Build Coastguard Worker    output_file = open(output_filename, "wb")
169*9e94795aSAndroid Build Coastguard Worker
170*9e94795aSAndroid Build Coastguard Worker    output_file.write(b'<?xml version="1.0" encoding="utf-8"?>\n')
171*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"<licenses>\n")
172*9e94795aSAndroid Build Coastguard Worker
173*9e94795aSAndroid Build Coastguard Worker    # Flatten the list of lists into a single list of filenames
174*9e94795aSAndroid Build Coastguard Worker    sorted_filenames = sorted(id_table.keys())
175*9e94795aSAndroid Build Coastguard Worker
176*9e94795aSAndroid Build Coastguard Worker    # Print out a nice table of contents
177*9e94795aSAndroid Build Coastguard Worker    for filename in sorted_filenames:
178*9e94795aSAndroid Build Coastguard Worker        stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
179*9e94795aSAndroid Build Coastguard Worker        output_file.write(('<file-name contentId="%s">%s</file-name>\n' % (id_table.get(filename), stripped_filename)).encode())
180*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"\n\n")
181*9e94795aSAndroid Build Coastguard Worker
182*9e94795aSAndroid Build Coastguard Worker    processed_file_keys = []
183*9e94795aSAndroid Build Coastguard Worker    # Output the individual notice file lists
184*9e94795aSAndroid Build Coastguard Worker    for filename in sorted_filenames:
185*9e94795aSAndroid Build Coastguard Worker        file_key = id_table.get(filename)
186*9e94795aSAndroid Build Coastguard Worker        if file_key in processed_file_keys:
187*9e94795aSAndroid Build Coastguard Worker            continue
188*9e94795aSAndroid Build Coastguard Worker        processed_file_keys.append(file_key)
189*9e94795aSAndroid Build Coastguard Worker
190*9e94795aSAndroid Build Coastguard Worker        output_file.write(('<file-content contentId="%s"><![CDATA[' % file_key).encode())
191*9e94795aSAndroid Build Coastguard Worker        with open(filename, "rb") as notice_file:
192*9e94795aSAndroid Build Coastguard Worker            output_file.write(html_escape(notice_file.read()))
193*9e94795aSAndroid Build Coastguard Worker        output_file.write(b"]]></file-content>\n\n")
194*9e94795aSAndroid Build Coastguard Worker
195*9e94795aSAndroid Build Coastguard Worker    # Finish off the file output
196*9e94795aSAndroid Build Coastguard Worker    output_file.write(b"</licenses>\n")
197*9e94795aSAndroid Build Coastguard Worker    output_file.close()
198*9e94795aSAndroid Build Coastguard Worker
199*9e94795aSAndroid Build Coastguard Workerdef get_args():
200*9e94795aSAndroid Build Coastguard Worker    parser = argparse.ArgumentParser()
201*9e94795aSAndroid Build Coastguard Worker    parser.add_argument(
202*9e94795aSAndroid Build Coastguard Worker        '--text-output', required=True,
203*9e94795aSAndroid Build Coastguard Worker        help='The text output file path.')
204*9e94795aSAndroid Build Coastguard Worker    parser.add_argument(
205*9e94795aSAndroid Build Coastguard Worker        '--html-output',
206*9e94795aSAndroid Build Coastguard Worker        help='The html output file path.')
207*9e94795aSAndroid Build Coastguard Worker    parser.add_argument(
208*9e94795aSAndroid Build Coastguard Worker        '--xml-output',
209*9e94795aSAndroid Build Coastguard Worker        help='The xml output file path.')
210*9e94795aSAndroid Build Coastguard Worker    parser.add_argument(
211*9e94795aSAndroid Build Coastguard Worker        '-t', '--title', required=True,
212*9e94795aSAndroid Build Coastguard Worker        help='The file title.')
213*9e94795aSAndroid Build Coastguard Worker    parser.add_argument(
214*9e94795aSAndroid Build Coastguard Worker        '-s', '--source-dir', required=True, action='append',
215*9e94795aSAndroid Build Coastguard Worker        help='The directory containing notices.')
216*9e94795aSAndroid Build Coastguard Worker    parser.add_argument(
217*9e94795aSAndroid Build Coastguard Worker        '-i', '--included-subdirs', action='append',
218*9e94795aSAndroid Build Coastguard Worker        help='The sub directories which should be included.')
219*9e94795aSAndroid Build Coastguard Worker    parser.add_argument(
220*9e94795aSAndroid Build Coastguard Worker        '-e', '--excluded-subdirs', action='append',
221*9e94795aSAndroid Build Coastguard Worker        help='The sub directories which should be excluded.')
222*9e94795aSAndroid Build Coastguard Worker    return parser.parse_args()
223*9e94795aSAndroid Build Coastguard Worker
224*9e94795aSAndroid Build Coastguard Workerdef main(argv):
225*9e94795aSAndroid Build Coastguard Worker    args = get_args()
226*9e94795aSAndroid Build Coastguard Worker
227*9e94795aSAndroid Build Coastguard Worker    txt_output_file = args.text_output
228*9e94795aSAndroid Build Coastguard Worker    html_output_file = args.html_output
229*9e94795aSAndroid Build Coastguard Worker    xml_output_file = args.xml_output
230*9e94795aSAndroid Build Coastguard Worker    file_title = args.title
231*9e94795aSAndroid Build Coastguard Worker    included_subdirs = []
232*9e94795aSAndroid Build Coastguard Worker    excluded_subdirs = []
233*9e94795aSAndroid Build Coastguard Worker    if args.included_subdirs is not None:
234*9e94795aSAndroid Build Coastguard Worker        included_subdirs = args.included_subdirs
235*9e94795aSAndroid Build Coastguard Worker    if args.excluded_subdirs is not None:
236*9e94795aSAndroid Build Coastguard Worker        excluded_subdirs = args.excluded_subdirs
237*9e94795aSAndroid Build Coastguard Worker
238*9e94795aSAndroid Build Coastguard Worker    input_dirs = [os.path.normpath(source_dir) for source_dir in args.source_dir]
239*9e94795aSAndroid Build Coastguard Worker    # Find all the notice files and md5 them
240*9e94795aSAndroid Build Coastguard Worker    files_with_same_hash = defaultdict(list)
241*9e94795aSAndroid Build Coastguard Worker    for input_dir in input_dirs:
242*9e94795aSAndroid Build Coastguard Worker        for root, dir, files in os.walk(input_dir):
243*9e94795aSAndroid Build Coastguard Worker            for file in files:
244*9e94795aSAndroid Build Coastguard Worker                matched = True
245*9e94795aSAndroid Build Coastguard Worker                if len(included_subdirs) > 0:
246*9e94795aSAndroid Build Coastguard Worker                    matched = False
247*9e94795aSAndroid Build Coastguard Worker                    for subdir in included_subdirs:
248*9e94795aSAndroid Build Coastguard Worker                        if (root == (input_dir + '/' + subdir) or
249*9e94795aSAndroid Build Coastguard Worker                            root.startswith(input_dir + '/' + subdir + '/')):
250*9e94795aSAndroid Build Coastguard Worker                            matched = True
251*9e94795aSAndroid Build Coastguard Worker                            break
252*9e94795aSAndroid Build Coastguard Worker                elif len(excluded_subdirs) > 0:
253*9e94795aSAndroid Build Coastguard Worker                    for subdir in excluded_subdirs:
254*9e94795aSAndroid Build Coastguard Worker                        if (root == (input_dir + '/' + subdir) or
255*9e94795aSAndroid Build Coastguard Worker                            root.startswith(input_dir + '/' + subdir + '/')):
256*9e94795aSAndroid Build Coastguard Worker                            matched = False
257*9e94795aSAndroid Build Coastguard Worker                            break
258*9e94795aSAndroid Build Coastguard Worker                if matched and file.endswith(".txt"):
259*9e94795aSAndroid Build Coastguard Worker                    filename = os.path.join(root, file)
260*9e94795aSAndroid Build Coastguard Worker                    file_md5sum = md5sum(filename)
261*9e94795aSAndroid Build Coastguard Worker                    files_with_same_hash[file_md5sum].append(filename)
262*9e94795aSAndroid Build Coastguard Worker
263*9e94795aSAndroid Build Coastguard Worker    filesets = [sorted(files_with_same_hash[md5]) for md5 in sorted(list(files_with_same_hash))]
264*9e94795aSAndroid Build Coastguard Worker    combine_notice_files_text(filesets, input_dirs, txt_output_file, file_title)
265*9e94795aSAndroid Build Coastguard Worker
266*9e94795aSAndroid Build Coastguard Worker    if html_output_file is not None:
267*9e94795aSAndroid Build Coastguard Worker        combine_notice_files_html(filesets, input_dirs, html_output_file)
268*9e94795aSAndroid Build Coastguard Worker
269*9e94795aSAndroid Build Coastguard Worker    if xml_output_file is not None:
270*9e94795aSAndroid Build Coastguard Worker        combine_notice_files_xml(files_with_same_hash, input_dirs, xml_output_file)
271*9e94795aSAndroid Build Coastguard Worker
272*9e94795aSAndroid Build Coastguard Workerif __name__ == "__main__":
273*9e94795aSAndroid Build Coastguard Worker    main(sys.argv)
274