1*9e94795aSAndroid Build Coastguard Worker#!/usr/bin/env python3 2*9e94795aSAndroid Build Coastguard Worker# 3*9e94795aSAndroid Build Coastguard Worker# Copyright (C) 2012 The Android Open Source Project 4*9e94795aSAndroid Build Coastguard Worker# 5*9e94795aSAndroid Build Coastguard Worker# Licensed under the Apache License, Version 2.0 (the "License"); 6*9e94795aSAndroid Build Coastguard Worker# you may not use this file except in compliance with the License. 7*9e94795aSAndroid Build Coastguard Worker# You may obtain a copy of the License at 8*9e94795aSAndroid Build Coastguard Worker# 9*9e94795aSAndroid Build Coastguard Worker# http://www.apache.org/licenses/LICENSE-2.0 10*9e94795aSAndroid Build Coastguard Worker# 11*9e94795aSAndroid Build Coastguard Worker# Unless required by applicable law or agreed to in writing, software 12*9e94795aSAndroid Build Coastguard Worker# distributed under the License is distributed on an "AS IS" BASIS, 13*9e94795aSAndroid Build Coastguard Worker# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*9e94795aSAndroid Build Coastguard Worker# See the License for the specific language governing permissions and 15*9e94795aSAndroid Build Coastguard Worker# limitations under the License. 16*9e94795aSAndroid Build Coastguard Worker""" 17*9e94795aSAndroid Build Coastguard WorkerUsage: generate-notice-files --text-output [plain text output file] \ 18*9e94795aSAndroid Build Coastguard Worker --html-output [html output file] \ 19*9e94795aSAndroid Build Coastguard Worker --xml-output [xml output file] \ 20*9e94795aSAndroid Build Coastguard Worker -t [file title] -s [directory of notices] 21*9e94795aSAndroid Build Coastguard Worker 22*9e94795aSAndroid Build Coastguard WorkerGenerate the Android notice files, including both text and html files. 23*9e94795aSAndroid Build Coastguard Worker 24*9e94795aSAndroid Build Coastguard Worker-h to display this usage message and exit. 25*9e94795aSAndroid Build Coastguard Worker""" 26*9e94795aSAndroid Build Coastguard Workerfrom collections import defaultdict 27*9e94795aSAndroid Build Coastguard Workerimport argparse 28*9e94795aSAndroid Build Coastguard Workerimport hashlib 29*9e94795aSAndroid Build Coastguard Workerimport itertools 30*9e94795aSAndroid Build Coastguard Workerimport os 31*9e94795aSAndroid Build Coastguard Workerimport os.path 32*9e94795aSAndroid Build Coastguard Workerimport re 33*9e94795aSAndroid Build Coastguard Workerimport struct 34*9e94795aSAndroid Build Coastguard Workerimport sys 35*9e94795aSAndroid Build Coastguard Worker 36*9e94795aSAndroid Build Coastguard WorkerMD5_BLOCKSIZE = 1024 * 1024 37*9e94795aSAndroid Build Coastguard WorkerHTML_ESCAPE_TABLE = { 38*9e94795aSAndroid Build Coastguard Worker b"&": b"&", 39*9e94795aSAndroid Build Coastguard Worker b'"': b""", 40*9e94795aSAndroid Build Coastguard Worker b"'": b"'", 41*9e94795aSAndroid Build Coastguard Worker b">": b">", 42*9e94795aSAndroid Build Coastguard Worker b"<": b"<", 43*9e94795aSAndroid Build Coastguard Worker } 44*9e94795aSAndroid Build Coastguard Worker 45*9e94795aSAndroid Build Coastguard Workerdef md5sum(filename): 46*9e94795aSAndroid Build Coastguard Worker """Calculate an MD5 of the file given by FILENAME, 47*9e94795aSAndroid Build Coastguard Worker and return hex digest as a string. 48*9e94795aSAndroid Build Coastguard Worker Output should be compatible with md5sum command""" 49*9e94795aSAndroid Build Coastguard Worker 50*9e94795aSAndroid Build Coastguard Worker f = open(filename, "rb") 51*9e94795aSAndroid Build Coastguard Worker sum = hashlib.md5() 52*9e94795aSAndroid Build Coastguard Worker while 1: 53*9e94795aSAndroid Build Coastguard Worker block = f.read(MD5_BLOCKSIZE) 54*9e94795aSAndroid Build Coastguard Worker if not block: 55*9e94795aSAndroid Build Coastguard Worker break 56*9e94795aSAndroid Build Coastguard Worker sum.update(block) 57*9e94795aSAndroid Build Coastguard Worker f.close() 58*9e94795aSAndroid Build Coastguard Worker return sum.hexdigest() 59*9e94795aSAndroid Build Coastguard Worker 60*9e94795aSAndroid Build Coastguard Worker 61*9e94795aSAndroid Build Coastguard Workerdef html_escape(text): 62*9e94795aSAndroid Build Coastguard Worker """Produce entities within text.""" 63*9e94795aSAndroid Build Coastguard Worker # Using for i in text doesn't work since i will be an int, not a byte. 64*9e94795aSAndroid Build Coastguard Worker # There are multiple ways to solve this, but the most performant way 65*9e94795aSAndroid Build Coastguard Worker # to iterate over a byte array is to use unpack. Using the 66*9e94795aSAndroid Build Coastguard Worker # for i in range(len(text)) and using that to get a byte using array 67*9e94795aSAndroid Build Coastguard Worker # slices is twice as slow as this method. 68*9e94795aSAndroid Build Coastguard Worker return b"".join(HTML_ESCAPE_TABLE.get(i,i) for i in struct.unpack(str(len(text)) + 'c', text)) 69*9e94795aSAndroid Build Coastguard Worker 70*9e94795aSAndroid Build Coastguard WorkerHTML_OUTPUT_CSS=b""" 71*9e94795aSAndroid Build Coastguard Worker<style type="text/css"> 72*9e94795aSAndroid Build Coastguard Workerbody { padding: 0; font-family: sans-serif; } 73*9e94795aSAndroid Build Coastguard Worker.same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; } 74*9e94795aSAndroid Build Coastguard Worker.label { font-weight: bold; } 75*9e94795aSAndroid Build Coastguard Worker.file-list { margin-left: 1em; color: blue; } 76*9e94795aSAndroid Build Coastguard Worker</style> 77*9e94795aSAndroid Build Coastguard Worker 78*9e94795aSAndroid Build Coastguard Worker""" 79*9e94795aSAndroid Build Coastguard Worker 80*9e94795aSAndroid Build Coastguard Workerdef combine_notice_files_html(file_hash, input_dirs, output_filename): 81*9e94795aSAndroid Build Coastguard Worker """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME.""" 82*9e94795aSAndroid Build Coastguard Worker 83*9e94795aSAndroid Build Coastguard Worker SRC_DIR_STRIP_RE = re.compile("(?:" + "|".join(input_dirs) + ")(/.*).txt") 84*9e94795aSAndroid Build Coastguard Worker 85*9e94795aSAndroid Build Coastguard Worker # Set up a filename to row id table (anchors inside tables don't work in 86*9e94795aSAndroid Build Coastguard Worker # most browsers, but href's to table row ids do) 87*9e94795aSAndroid Build Coastguard Worker id_table = {} 88*9e94795aSAndroid Build Coastguard Worker id_count = 0 89*9e94795aSAndroid Build Coastguard Worker for value in file_hash: 90*9e94795aSAndroid Build Coastguard Worker for filename in value: 91*9e94795aSAndroid Build Coastguard Worker id_table[filename] = id_count 92*9e94795aSAndroid Build Coastguard Worker id_count += 1 93*9e94795aSAndroid Build Coastguard Worker 94*9e94795aSAndroid Build Coastguard Worker # Open the output file, and output the header pieces 95*9e94795aSAndroid Build Coastguard Worker output_file = open(output_filename, "wb") 96*9e94795aSAndroid Build Coastguard Worker 97*9e94795aSAndroid Build Coastguard Worker output_file.write(b"<html><head>\n") 98*9e94795aSAndroid Build Coastguard Worker output_file.write(HTML_OUTPUT_CSS) 99*9e94795aSAndroid Build Coastguard Worker output_file.write(b'</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">\n') 100*9e94795aSAndroid Build Coastguard Worker 101*9e94795aSAndroid Build Coastguard Worker # Output our table of contents 102*9e94795aSAndroid Build Coastguard Worker output_file.write(b'<div class="toc">\n') 103*9e94795aSAndroid Build Coastguard Worker output_file.write(b"<ul>\n") 104*9e94795aSAndroid Build Coastguard Worker 105*9e94795aSAndroid Build Coastguard Worker # Flatten the list of lists into a single list of filenames 106*9e94795aSAndroid Build Coastguard Worker sorted_filenames = sorted(itertools.chain.from_iterable(file_hash)) 107*9e94795aSAndroid Build Coastguard Worker 108*9e94795aSAndroid Build Coastguard Worker # Print out a nice table of contents 109*9e94795aSAndroid Build Coastguard Worker for filename in sorted_filenames: 110*9e94795aSAndroid Build Coastguard Worker stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename) 111*9e94795aSAndroid Build Coastguard Worker output_file.write(('<li><a href="#id%d">%s</a></li>\n' % (id_table.get(filename), stripped_filename)).encode()) 112*9e94795aSAndroid Build Coastguard Worker 113*9e94795aSAndroid Build Coastguard Worker output_file.write(b"</ul>\n") 114*9e94795aSAndroid Build Coastguard Worker output_file.write(b"</div><!-- table of contents -->\n") 115*9e94795aSAndroid Build Coastguard Worker # Output the individual notice file lists 116*9e94795aSAndroid Build Coastguard Worker output_file.write(b'<table cellpadding="0" cellspacing="0" border="0">\n') 117*9e94795aSAndroid Build Coastguard Worker for value in file_hash: 118*9e94795aSAndroid Build Coastguard Worker output_file.write(b'<tr id="id%d"><td class="same-license">\n' % id_table.get(value[0])) 119*9e94795aSAndroid Build Coastguard Worker output_file.write(b'<div class="label">Notices for file(s):</div>\n') 120*9e94795aSAndroid Build Coastguard Worker output_file.write(b'<div class="file-list">\n') 121*9e94795aSAndroid Build Coastguard Worker for filename in value: 122*9e94795aSAndroid Build Coastguard Worker output_file.write(("%s <br/>\n" % SRC_DIR_STRIP_RE.sub(r"\1", filename)).encode()) 123*9e94795aSAndroid Build Coastguard Worker output_file.write(b"</div><!-- file-list -->\n") 124*9e94795aSAndroid Build Coastguard Worker output_file.write(b"\n") 125*9e94795aSAndroid Build Coastguard Worker output_file.write(b'<pre class="license-text">\n') 126*9e94795aSAndroid Build Coastguard Worker with open(value[0], "rb") as notice_file: 127*9e94795aSAndroid Build Coastguard Worker output_file.write(html_escape(notice_file.read())) 128*9e94795aSAndroid Build Coastguard Worker output_file.write(b"\n</pre><!-- license-text -->\n") 129*9e94795aSAndroid Build Coastguard Worker output_file.write(b"</td></tr><!-- same-license -->\n\n\n\n") 130*9e94795aSAndroid Build Coastguard Worker 131*9e94795aSAndroid Build Coastguard Worker # Finish off the file output 132*9e94795aSAndroid Build Coastguard Worker output_file.write(b"</table>\n") 133*9e94795aSAndroid Build Coastguard Worker output_file.write(b"</body></html>\n") 134*9e94795aSAndroid Build Coastguard Worker output_file.close() 135*9e94795aSAndroid Build Coastguard Worker 136*9e94795aSAndroid Build Coastguard Workerdef combine_notice_files_text(file_hash, input_dirs, output_filename, file_title): 137*9e94795aSAndroid Build Coastguard Worker """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME.""" 138*9e94795aSAndroid Build Coastguard Worker 139*9e94795aSAndroid Build Coastguard Worker SRC_DIR_STRIP_RE = re.compile("(?:" + "|".join(input_dirs) + ")(/.*).txt") 140*9e94795aSAndroid Build Coastguard Worker output_file = open(output_filename, "wb") 141*9e94795aSAndroid Build Coastguard Worker output_file.write(file_title.encode()) 142*9e94795aSAndroid Build Coastguard Worker output_file.write(b"\n") 143*9e94795aSAndroid Build Coastguard Worker for value in file_hash: 144*9e94795aSAndroid Build Coastguard Worker output_file.write(b"============================================================\n") 145*9e94795aSAndroid Build Coastguard Worker output_file.write(b"Notices for file(s):\n") 146*9e94795aSAndroid Build Coastguard Worker for filename in value: 147*9e94795aSAndroid Build Coastguard Worker output_file.write(SRC_DIR_STRIP_RE.sub(r"\1", filename).encode()) 148*9e94795aSAndroid Build Coastguard Worker output_file.write(b"\n") 149*9e94795aSAndroid Build Coastguard Worker output_file.write(b"------------------------------------------------------------\n") 150*9e94795aSAndroid Build Coastguard Worker with open(value[0], "rb") as notice_file: 151*9e94795aSAndroid Build Coastguard Worker output_file.write(notice_file.read()) 152*9e94795aSAndroid Build Coastguard Worker output_file.write(b"\n") 153*9e94795aSAndroid Build Coastguard Worker output_file.close() 154*9e94795aSAndroid Build Coastguard Worker 155*9e94795aSAndroid Build Coastguard Workerdef combine_notice_files_xml(files_with_same_hash, input_dirs, output_filename): 156*9e94795aSAndroid Build Coastguard Worker """Combine notice files in FILE_HASH and output a XML version to OUTPUT_FILENAME.""" 157*9e94795aSAndroid Build Coastguard Worker 158*9e94795aSAndroid Build Coastguard Worker SRC_DIR_STRIP_RE = re.compile("(?:" + "|".join(input_dirs) + ")(/.*).txt") 159*9e94795aSAndroid Build Coastguard Worker 160*9e94795aSAndroid Build Coastguard Worker # Set up a filename to row id table (anchors inside tables don't work in 161*9e94795aSAndroid Build Coastguard Worker # most browsers, but href's to table row ids do) 162*9e94795aSAndroid Build Coastguard Worker id_table = {} 163*9e94795aSAndroid Build Coastguard Worker for file_key, files in files_with_same_hash.items(): 164*9e94795aSAndroid Build Coastguard Worker for filename in files: 165*9e94795aSAndroid Build Coastguard Worker id_table[filename] = file_key 166*9e94795aSAndroid Build Coastguard Worker 167*9e94795aSAndroid Build Coastguard Worker # Open the output file, and output the header pieces 168*9e94795aSAndroid Build Coastguard Worker output_file = open(output_filename, "wb") 169*9e94795aSAndroid Build Coastguard Worker 170*9e94795aSAndroid Build Coastguard Worker output_file.write(b'<?xml version="1.0" encoding="utf-8"?>\n') 171*9e94795aSAndroid Build Coastguard Worker output_file.write(b"<licenses>\n") 172*9e94795aSAndroid Build Coastguard Worker 173*9e94795aSAndroid Build Coastguard Worker # Flatten the list of lists into a single list of filenames 174*9e94795aSAndroid Build Coastguard Worker sorted_filenames = sorted(id_table.keys()) 175*9e94795aSAndroid Build Coastguard Worker 176*9e94795aSAndroid Build Coastguard Worker # Print out a nice table of contents 177*9e94795aSAndroid Build Coastguard Worker for filename in sorted_filenames: 178*9e94795aSAndroid Build Coastguard Worker stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename) 179*9e94795aSAndroid Build Coastguard Worker output_file.write(('<file-name contentId="%s">%s</file-name>\n' % (id_table.get(filename), stripped_filename)).encode()) 180*9e94795aSAndroid Build Coastguard Worker output_file.write(b"\n\n") 181*9e94795aSAndroid Build Coastguard Worker 182*9e94795aSAndroid Build Coastguard Worker processed_file_keys = [] 183*9e94795aSAndroid Build Coastguard Worker # Output the individual notice file lists 184*9e94795aSAndroid Build Coastguard Worker for filename in sorted_filenames: 185*9e94795aSAndroid Build Coastguard Worker file_key = id_table.get(filename) 186*9e94795aSAndroid Build Coastguard Worker if file_key in processed_file_keys: 187*9e94795aSAndroid Build Coastguard Worker continue 188*9e94795aSAndroid Build Coastguard Worker processed_file_keys.append(file_key) 189*9e94795aSAndroid Build Coastguard Worker 190*9e94795aSAndroid Build Coastguard Worker output_file.write(('<file-content contentId="%s"><![CDATA[' % file_key).encode()) 191*9e94795aSAndroid Build Coastguard Worker with open(filename, "rb") as notice_file: 192*9e94795aSAndroid Build Coastguard Worker output_file.write(html_escape(notice_file.read())) 193*9e94795aSAndroid Build Coastguard Worker output_file.write(b"]]></file-content>\n\n") 194*9e94795aSAndroid Build Coastguard Worker 195*9e94795aSAndroid Build Coastguard Worker # Finish off the file output 196*9e94795aSAndroid Build Coastguard Worker output_file.write(b"</licenses>\n") 197*9e94795aSAndroid Build Coastguard Worker output_file.close() 198*9e94795aSAndroid Build Coastguard Worker 199*9e94795aSAndroid Build Coastguard Workerdef get_args(): 200*9e94795aSAndroid Build Coastguard Worker parser = argparse.ArgumentParser() 201*9e94795aSAndroid Build Coastguard Worker parser.add_argument( 202*9e94795aSAndroid Build Coastguard Worker '--text-output', required=True, 203*9e94795aSAndroid Build Coastguard Worker help='The text output file path.') 204*9e94795aSAndroid Build Coastguard Worker parser.add_argument( 205*9e94795aSAndroid Build Coastguard Worker '--html-output', 206*9e94795aSAndroid Build Coastguard Worker help='The html output file path.') 207*9e94795aSAndroid Build Coastguard Worker parser.add_argument( 208*9e94795aSAndroid Build Coastguard Worker '--xml-output', 209*9e94795aSAndroid Build Coastguard Worker help='The xml output file path.') 210*9e94795aSAndroid Build Coastguard Worker parser.add_argument( 211*9e94795aSAndroid Build Coastguard Worker '-t', '--title', required=True, 212*9e94795aSAndroid Build Coastguard Worker help='The file title.') 213*9e94795aSAndroid Build Coastguard Worker parser.add_argument( 214*9e94795aSAndroid Build Coastguard Worker '-s', '--source-dir', required=True, action='append', 215*9e94795aSAndroid Build Coastguard Worker help='The directory containing notices.') 216*9e94795aSAndroid Build Coastguard Worker parser.add_argument( 217*9e94795aSAndroid Build Coastguard Worker '-i', '--included-subdirs', action='append', 218*9e94795aSAndroid Build Coastguard Worker help='The sub directories which should be included.') 219*9e94795aSAndroid Build Coastguard Worker parser.add_argument( 220*9e94795aSAndroid Build Coastguard Worker '-e', '--excluded-subdirs', action='append', 221*9e94795aSAndroid Build Coastguard Worker help='The sub directories which should be excluded.') 222*9e94795aSAndroid Build Coastguard Worker return parser.parse_args() 223*9e94795aSAndroid Build Coastguard Worker 224*9e94795aSAndroid Build Coastguard Workerdef main(argv): 225*9e94795aSAndroid Build Coastguard Worker args = get_args() 226*9e94795aSAndroid Build Coastguard Worker 227*9e94795aSAndroid Build Coastguard Worker txt_output_file = args.text_output 228*9e94795aSAndroid Build Coastguard Worker html_output_file = args.html_output 229*9e94795aSAndroid Build Coastguard Worker xml_output_file = args.xml_output 230*9e94795aSAndroid Build Coastguard Worker file_title = args.title 231*9e94795aSAndroid Build Coastguard Worker included_subdirs = [] 232*9e94795aSAndroid Build Coastguard Worker excluded_subdirs = [] 233*9e94795aSAndroid Build Coastguard Worker if args.included_subdirs is not None: 234*9e94795aSAndroid Build Coastguard Worker included_subdirs = args.included_subdirs 235*9e94795aSAndroid Build Coastguard Worker if args.excluded_subdirs is not None: 236*9e94795aSAndroid Build Coastguard Worker excluded_subdirs = args.excluded_subdirs 237*9e94795aSAndroid Build Coastguard Worker 238*9e94795aSAndroid Build Coastguard Worker input_dirs = [os.path.normpath(source_dir) for source_dir in args.source_dir] 239*9e94795aSAndroid Build Coastguard Worker # Find all the notice files and md5 them 240*9e94795aSAndroid Build Coastguard Worker files_with_same_hash = defaultdict(list) 241*9e94795aSAndroid Build Coastguard Worker for input_dir in input_dirs: 242*9e94795aSAndroid Build Coastguard Worker for root, dir, files in os.walk(input_dir): 243*9e94795aSAndroid Build Coastguard Worker for file in files: 244*9e94795aSAndroid Build Coastguard Worker matched = True 245*9e94795aSAndroid Build Coastguard Worker if len(included_subdirs) > 0: 246*9e94795aSAndroid Build Coastguard Worker matched = False 247*9e94795aSAndroid Build Coastguard Worker for subdir in included_subdirs: 248*9e94795aSAndroid Build Coastguard Worker if (root == (input_dir + '/' + subdir) or 249*9e94795aSAndroid Build Coastguard Worker root.startswith(input_dir + '/' + subdir + '/')): 250*9e94795aSAndroid Build Coastguard Worker matched = True 251*9e94795aSAndroid Build Coastguard Worker break 252*9e94795aSAndroid Build Coastguard Worker elif len(excluded_subdirs) > 0: 253*9e94795aSAndroid Build Coastguard Worker for subdir in excluded_subdirs: 254*9e94795aSAndroid Build Coastguard Worker if (root == (input_dir + '/' + subdir) or 255*9e94795aSAndroid Build Coastguard Worker root.startswith(input_dir + '/' + subdir + '/')): 256*9e94795aSAndroid Build Coastguard Worker matched = False 257*9e94795aSAndroid Build Coastguard Worker break 258*9e94795aSAndroid Build Coastguard Worker if matched and file.endswith(".txt"): 259*9e94795aSAndroid Build Coastguard Worker filename = os.path.join(root, file) 260*9e94795aSAndroid Build Coastguard Worker file_md5sum = md5sum(filename) 261*9e94795aSAndroid Build Coastguard Worker files_with_same_hash[file_md5sum].append(filename) 262*9e94795aSAndroid Build Coastguard Worker 263*9e94795aSAndroid Build Coastguard Worker filesets = [sorted(files_with_same_hash[md5]) for md5 in sorted(list(files_with_same_hash))] 264*9e94795aSAndroid Build Coastguard Worker combine_notice_files_text(filesets, input_dirs, txt_output_file, file_title) 265*9e94795aSAndroid Build Coastguard Worker 266*9e94795aSAndroid Build Coastguard Worker if html_output_file is not None: 267*9e94795aSAndroid Build Coastguard Worker combine_notice_files_html(filesets, input_dirs, html_output_file) 268*9e94795aSAndroid Build Coastguard Worker 269*9e94795aSAndroid Build Coastguard Worker if xml_output_file is not None: 270*9e94795aSAndroid Build Coastguard Worker combine_notice_files_xml(files_with_same_hash, input_dirs, xml_output_file) 271*9e94795aSAndroid Build Coastguard Worker 272*9e94795aSAndroid Build Coastguard Workerif __name__ == "__main__": 273*9e94795aSAndroid Build Coastguard Worker main(sys.argv) 274