1*01826a49SYabin Cui#!/usr/bin/env python3 2*01826a49SYabin Cui# ################################################################ 3*01826a49SYabin Cui# Copyright (c) Meta Platforms, Inc. and affiliates. 4*01826a49SYabin Cui# All rights reserved. 5*01826a49SYabin Cui# 6*01826a49SYabin Cui# This source code is licensed under both the BSD-style license (found in the 7*01826a49SYabin Cui# LICENSE file in the root directory of this source tree) and the GPLv2 (found 8*01826a49SYabin Cui# in the COPYING file in the root directory of this source tree). 9*01826a49SYabin Cui# You may select, at your option, one of the above-listed licenses. 10*01826a49SYabin Cui# ########################################################################## 11*01826a49SYabin Cui 12*01826a49SYabin Cuiimport argparse 13*01826a49SYabin Cuiimport contextlib 14*01826a49SYabin Cuiimport os 15*01826a49SYabin Cuiimport re 16*01826a49SYabin Cuiimport shutil 17*01826a49SYabin Cuiimport sys 18*01826a49SYabin Cuifrom typing import Optional 19*01826a49SYabin Cui 20*01826a49SYabin Cui 21*01826a49SYabin CuiINCLUDED_SUBDIRS = ["common", "compress", "decompress"] 22*01826a49SYabin Cui 23*01826a49SYabin CuiSKIPPED_FILES = [ 24*01826a49SYabin Cui "common/mem.h", 25*01826a49SYabin Cui "common/zstd_deps.h", 26*01826a49SYabin Cui "common/pool.c", 27*01826a49SYabin Cui "common/pool.h", 28*01826a49SYabin Cui "common/threading.c", 29*01826a49SYabin Cui "common/threading.h", 30*01826a49SYabin Cui "common/zstd_trace.h", 31*01826a49SYabin Cui "compress/zstdmt_compress.h", 32*01826a49SYabin Cui "compress/zstdmt_compress.c", 33*01826a49SYabin Cui] 34*01826a49SYabin Cui 35*01826a49SYabin CuiXXHASH_FILES = [ 36*01826a49SYabin Cui "common/xxhash.c", 37*01826a49SYabin Cui "common/xxhash.h", 38*01826a49SYabin Cui] 39*01826a49SYabin Cui 40*01826a49SYabin Cui 41*01826a49SYabin Cuiclass FileLines(object): 42*01826a49SYabin Cui def __init__(self, filename): 43*01826a49SYabin Cui self.filename = filename 44*01826a49SYabin Cui with open(self.filename, "r") as f: 45*01826a49SYabin Cui self.lines = f.readlines() 46*01826a49SYabin Cui 47*01826a49SYabin Cui def write(self): 48*01826a49SYabin Cui with open(self.filename, "w") as f: 49*01826a49SYabin Cui f.write("".join(self.lines)) 50*01826a49SYabin Cui 51*01826a49SYabin Cui 52*01826a49SYabin Cuiclass PartialPreprocessor(object): 53*01826a49SYabin Cui """ 54*01826a49SYabin Cui Looks for simple ifdefs and ifndefs and replaces them. 55*01826a49SYabin Cui Handles && and ||. 56*01826a49SYabin Cui Has fancy logic to handle translating elifs to ifs. 57*01826a49SYabin Cui Only looks for macros in the first part of the expression with no 58*01826a49SYabin Cui parens. 59*01826a49SYabin Cui Does not handle multi-line macros (only looks in first line). 60*01826a49SYabin Cui """ 61*01826a49SYabin Cui def __init__(self, defs: [(str, Optional[str])], replaces: [(str, str)], undefs: [str]): 62*01826a49SYabin Cui MACRO_GROUP = r"(?P<macro>[a-zA-Z_][a-zA-Z_0-9]*)" 63*01826a49SYabin Cui ELIF_GROUP = r"(?P<elif>el)?" 64*01826a49SYabin Cui OP_GROUP = r"(?P<op>&&|\|\|)?" 65*01826a49SYabin Cui 66*01826a49SYabin Cui self._defs = {macro:value for macro, value in defs} 67*01826a49SYabin Cui self._replaces = {macro:value for macro, value in replaces} 68*01826a49SYabin Cui self._defs.update(self._replaces) 69*01826a49SYabin Cui self._undefs = set(undefs) 70*01826a49SYabin Cui 71*01826a49SYabin Cui self._define = re.compile(r"\s*#\s*define") 72*01826a49SYabin Cui self._if = re.compile(r"\s*#\s*if") 73*01826a49SYabin Cui self._elif = re.compile(r"\s*#\s*(?P<elif>el)if") 74*01826a49SYabin Cui self._else = re.compile(r"\s*#\s*(?P<else>else)") 75*01826a49SYabin Cui self._endif = re.compile(r"\s*#\s*endif") 76*01826a49SYabin Cui 77*01826a49SYabin Cui self._ifdef = re.compile(fr"\s*#\s*if(?P<not>n)?def {MACRO_GROUP}\s*") 78*01826a49SYabin Cui self._if_defined = re.compile( 79*01826a49SYabin Cui fr"\s*#\s*{ELIF_GROUP}if\s+(?P<not>!)?\s*defined\s*\(\s*{MACRO_GROUP}\s*\)\s*{OP_GROUP}" 80*01826a49SYabin Cui ) 81*01826a49SYabin Cui self._if_defined_value = re.compile( 82*01826a49SYabin Cui fr"\s*#\s*{ELIF_GROUP}if\s+defined\s*\(\s*{MACRO_GROUP}\s*\)\s*" 83*01826a49SYabin Cui fr"(?P<op>&&)\s*" 84*01826a49SYabin Cui fr"(?P<openp>\()?\s*" 85*01826a49SYabin Cui fr"(?P<macro2>[a-zA-Z_][a-zA-Z_0-9]*)\s*" 86*01826a49SYabin Cui fr"(?P<cmp>[=><!]+)\s*" 87*01826a49SYabin Cui fr"(?P<value>[0-9]*)\s*" 88*01826a49SYabin Cui fr"(?P<closep>\))?\s*" 89*01826a49SYabin Cui ) 90*01826a49SYabin Cui self._if_true = re.compile( 91*01826a49SYabin Cui fr"\s*#\s*{ELIF_GROUP}if\s+{MACRO_GROUP}\s*{OP_GROUP}" 92*01826a49SYabin Cui ) 93*01826a49SYabin Cui 94*01826a49SYabin Cui self._c_comment = re.compile(r"/\*.*?\*/") 95*01826a49SYabin Cui self._cpp_comment = re.compile(r"//") 96*01826a49SYabin Cui 97*01826a49SYabin Cui def _log(self, *args, **kwargs): 98*01826a49SYabin Cui print(*args, **kwargs) 99*01826a49SYabin Cui 100*01826a49SYabin Cui def _strip_comments(self, line): 101*01826a49SYabin Cui # First strip c-style comments (may include //) 102*01826a49SYabin Cui while True: 103*01826a49SYabin Cui m = self._c_comment.search(line) 104*01826a49SYabin Cui if m is None: 105*01826a49SYabin Cui break 106*01826a49SYabin Cui line = line[:m.start()] + line[m.end():] 107*01826a49SYabin Cui 108*01826a49SYabin Cui # Then strip cpp-style comments 109*01826a49SYabin Cui m = self._cpp_comment.search(line) 110*01826a49SYabin Cui if m is not None: 111*01826a49SYabin Cui line = line[:m.start()] 112*01826a49SYabin Cui 113*01826a49SYabin Cui return line 114*01826a49SYabin Cui 115*01826a49SYabin Cui def _fixup_indentation(self, macro, replace: [str]): 116*01826a49SYabin Cui if len(replace) == 0: 117*01826a49SYabin Cui return replace 118*01826a49SYabin Cui if len(replace) == 1 and self._define.match(replace[0]) is None: 119*01826a49SYabin Cui # If there is only one line, only replace defines 120*01826a49SYabin Cui return replace 121*01826a49SYabin Cui 122*01826a49SYabin Cui 123*01826a49SYabin Cui all_pound = True 124*01826a49SYabin Cui for line in replace: 125*01826a49SYabin Cui if not line.startswith('#'): 126*01826a49SYabin Cui all_pound = False 127*01826a49SYabin Cui if all_pound: 128*01826a49SYabin Cui replace = [line[1:] for line in replace] 129*01826a49SYabin Cui 130*01826a49SYabin Cui min_spaces = len(replace[0]) 131*01826a49SYabin Cui for line in replace: 132*01826a49SYabin Cui spaces = 0 133*01826a49SYabin Cui for i, c in enumerate(line): 134*01826a49SYabin Cui if c != ' ': 135*01826a49SYabin Cui # Non-preprocessor line ==> skip the fixup 136*01826a49SYabin Cui if not all_pound and c != '#': 137*01826a49SYabin Cui return replace 138*01826a49SYabin Cui spaces = i 139*01826a49SYabin Cui break 140*01826a49SYabin Cui min_spaces = min(min_spaces, spaces) 141*01826a49SYabin Cui 142*01826a49SYabin Cui replace = [line[min_spaces:] for line in replace] 143*01826a49SYabin Cui 144*01826a49SYabin Cui if all_pound: 145*01826a49SYabin Cui replace = ["#" + line for line in replace] 146*01826a49SYabin Cui 147*01826a49SYabin Cui return replace 148*01826a49SYabin Cui 149*01826a49SYabin Cui def _handle_if_block(self, macro, idx, is_true, prepend): 150*01826a49SYabin Cui """ 151*01826a49SYabin Cui Remove the #if or #elif block starting on this line. 152*01826a49SYabin Cui """ 153*01826a49SYabin Cui REMOVE_ONE = 0 154*01826a49SYabin Cui KEEP_ONE = 1 155*01826a49SYabin Cui REMOVE_REST = 2 156*01826a49SYabin Cui 157*01826a49SYabin Cui if is_true: 158*01826a49SYabin Cui state = KEEP_ONE 159*01826a49SYabin Cui else: 160*01826a49SYabin Cui state = REMOVE_ONE 161*01826a49SYabin Cui 162*01826a49SYabin Cui line = self._inlines[idx] 163*01826a49SYabin Cui is_if = self._if.match(line) is not None 164*01826a49SYabin Cui assert is_if or self._elif.match(line) is not None 165*01826a49SYabin Cui depth = 0 166*01826a49SYabin Cui 167*01826a49SYabin Cui start_idx = idx 168*01826a49SYabin Cui 169*01826a49SYabin Cui idx += 1 170*01826a49SYabin Cui replace = prepend 171*01826a49SYabin Cui finished = False 172*01826a49SYabin Cui while idx < len(self._inlines): 173*01826a49SYabin Cui line = self._inlines[idx] 174*01826a49SYabin Cui # Nested if statement 175*01826a49SYabin Cui if self._if.match(line): 176*01826a49SYabin Cui depth += 1 177*01826a49SYabin Cui idx += 1 178*01826a49SYabin Cui continue 179*01826a49SYabin Cui # We're inside a nested statement 180*01826a49SYabin Cui if depth > 0: 181*01826a49SYabin Cui if self._endif.match(line): 182*01826a49SYabin Cui depth -= 1 183*01826a49SYabin Cui idx += 1 184*01826a49SYabin Cui continue 185*01826a49SYabin Cui 186*01826a49SYabin Cui # We're at the original depth 187*01826a49SYabin Cui 188*01826a49SYabin Cui # Looking only for an endif. 189*01826a49SYabin Cui # We've found a true statement, but haven't 190*01826a49SYabin Cui # completely elided the if block, so we just 191*01826a49SYabin Cui # remove the remainder. 192*01826a49SYabin Cui if state == REMOVE_REST: 193*01826a49SYabin Cui if self._endif.match(line): 194*01826a49SYabin Cui if is_if: 195*01826a49SYabin Cui # Remove the endif because we took the first if 196*01826a49SYabin Cui idx += 1 197*01826a49SYabin Cui finished = True 198*01826a49SYabin Cui break 199*01826a49SYabin Cui idx += 1 200*01826a49SYabin Cui continue 201*01826a49SYabin Cui 202*01826a49SYabin Cui if state == KEEP_ONE: 203*01826a49SYabin Cui m = self._elif.match(line) 204*01826a49SYabin Cui if self._endif.match(line): 205*01826a49SYabin Cui replace += self._inlines[start_idx + 1:idx] 206*01826a49SYabin Cui idx += 1 207*01826a49SYabin Cui finished = True 208*01826a49SYabin Cui break 209*01826a49SYabin Cui if self._elif.match(line) or self._else.match(line): 210*01826a49SYabin Cui replace += self._inlines[start_idx + 1:idx] 211*01826a49SYabin Cui state = REMOVE_REST 212*01826a49SYabin Cui idx += 1 213*01826a49SYabin Cui continue 214*01826a49SYabin Cui 215*01826a49SYabin Cui if state == REMOVE_ONE: 216*01826a49SYabin Cui m = self._elif.match(line) 217*01826a49SYabin Cui if m is not None: 218*01826a49SYabin Cui if is_if: 219*01826a49SYabin Cui idx += 1 220*01826a49SYabin Cui b = m.start('elif') 221*01826a49SYabin Cui e = m.end('elif') 222*01826a49SYabin Cui assert e - b == 2 223*01826a49SYabin Cui replace.append(line[:b] + line[e:]) 224*01826a49SYabin Cui finished = True 225*01826a49SYabin Cui break 226*01826a49SYabin Cui m = self._else.match(line) 227*01826a49SYabin Cui if m is not None: 228*01826a49SYabin Cui if is_if: 229*01826a49SYabin Cui idx += 1 230*01826a49SYabin Cui while self._endif.match(self._inlines[idx]) is None: 231*01826a49SYabin Cui replace.append(self._inlines[idx]) 232*01826a49SYabin Cui idx += 1 233*01826a49SYabin Cui idx += 1 234*01826a49SYabin Cui finished = True 235*01826a49SYabin Cui break 236*01826a49SYabin Cui if self._endif.match(line): 237*01826a49SYabin Cui if is_if: 238*01826a49SYabin Cui # Remove the endif because no other elifs 239*01826a49SYabin Cui idx += 1 240*01826a49SYabin Cui finished = True 241*01826a49SYabin Cui break 242*01826a49SYabin Cui idx += 1 243*01826a49SYabin Cui continue 244*01826a49SYabin Cui if not finished: 245*01826a49SYabin Cui raise RuntimeError("Unterminated if block!") 246*01826a49SYabin Cui 247*01826a49SYabin Cui replace = self._fixup_indentation(macro, replace) 248*01826a49SYabin Cui 249*01826a49SYabin Cui self._log(f"\tHardwiring {macro}") 250*01826a49SYabin Cui if start_idx > 0: 251*01826a49SYabin Cui self._log(f"\t\t {self._inlines[start_idx - 1][:-1]}") 252*01826a49SYabin Cui for x in range(start_idx, idx): 253*01826a49SYabin Cui self._log(f"\t\t- {self._inlines[x][:-1]}") 254*01826a49SYabin Cui for line in replace: 255*01826a49SYabin Cui self._log(f"\t\t+ {line[:-1]}") 256*01826a49SYabin Cui if idx < len(self._inlines): 257*01826a49SYabin Cui self._log(f"\t\t {self._inlines[idx][:-1]}") 258*01826a49SYabin Cui 259*01826a49SYabin Cui return idx, replace 260*01826a49SYabin Cui 261*01826a49SYabin Cui def _preprocess_once(self): 262*01826a49SYabin Cui outlines = [] 263*01826a49SYabin Cui idx = 0 264*01826a49SYabin Cui changed = False 265*01826a49SYabin Cui while idx < len(self._inlines): 266*01826a49SYabin Cui line = self._inlines[idx] 267*01826a49SYabin Cui sline = self._strip_comments(line) 268*01826a49SYabin Cui m = self._ifdef.fullmatch(sline) 269*01826a49SYabin Cui if_true = False 270*01826a49SYabin Cui if m is None: 271*01826a49SYabin Cui m = self._if_defined_value.fullmatch(sline) 272*01826a49SYabin Cui if m is None: 273*01826a49SYabin Cui m = self._if_defined.match(sline) 274*01826a49SYabin Cui if m is None: 275*01826a49SYabin Cui m = self._if_true.match(sline) 276*01826a49SYabin Cui if_true = (m is not None) 277*01826a49SYabin Cui if m is None: 278*01826a49SYabin Cui outlines.append(line) 279*01826a49SYabin Cui idx += 1 280*01826a49SYabin Cui continue 281*01826a49SYabin Cui 282*01826a49SYabin Cui groups = m.groupdict() 283*01826a49SYabin Cui macro = groups['macro'] 284*01826a49SYabin Cui op = groups.get('op') 285*01826a49SYabin Cui 286*01826a49SYabin Cui if not (macro in self._defs or macro in self._undefs): 287*01826a49SYabin Cui outlines.append(line) 288*01826a49SYabin Cui idx += 1 289*01826a49SYabin Cui continue 290*01826a49SYabin Cui 291*01826a49SYabin Cui defined = macro in self._defs 292*01826a49SYabin Cui 293*01826a49SYabin Cui # Needed variables set: 294*01826a49SYabin Cui # resolved: Is the statement fully resolved? 295*01826a49SYabin Cui # is_true: If resolved, is the statement true? 296*01826a49SYabin Cui ifdef = False 297*01826a49SYabin Cui if if_true: 298*01826a49SYabin Cui if not defined: 299*01826a49SYabin Cui outlines.append(line) 300*01826a49SYabin Cui idx += 1 301*01826a49SYabin Cui continue 302*01826a49SYabin Cui 303*01826a49SYabin Cui defined_value = self._defs[macro] 304*01826a49SYabin Cui is_int = True 305*01826a49SYabin Cui try: 306*01826a49SYabin Cui defined_value = int(defined_value) 307*01826a49SYabin Cui except TypeError: 308*01826a49SYabin Cui is_int = False 309*01826a49SYabin Cui except ValueError: 310*01826a49SYabin Cui is_int = False 311*01826a49SYabin Cui 312*01826a49SYabin Cui resolved = is_int 313*01826a49SYabin Cui is_true = (defined_value != 0) 314*01826a49SYabin Cui 315*01826a49SYabin Cui if resolved and op is not None: 316*01826a49SYabin Cui if op == '&&': 317*01826a49SYabin Cui resolved = not is_true 318*01826a49SYabin Cui else: 319*01826a49SYabin Cui assert op == '||' 320*01826a49SYabin Cui resolved = is_true 321*01826a49SYabin Cui 322*01826a49SYabin Cui else: 323*01826a49SYabin Cui ifdef = groups.get('not') is None 324*01826a49SYabin Cui elseif = groups.get('elif') is not None 325*01826a49SYabin Cui 326*01826a49SYabin Cui macro2 = groups.get('macro2') 327*01826a49SYabin Cui cmp = groups.get('cmp') 328*01826a49SYabin Cui value = groups.get('value') 329*01826a49SYabin Cui openp = groups.get('openp') 330*01826a49SYabin Cui closep = groups.get('closep') 331*01826a49SYabin Cui 332*01826a49SYabin Cui is_true = (ifdef == defined) 333*01826a49SYabin Cui resolved = True 334*01826a49SYabin Cui if op is not None: 335*01826a49SYabin Cui if op == '&&': 336*01826a49SYabin Cui resolved = not is_true 337*01826a49SYabin Cui else: 338*01826a49SYabin Cui assert op == '||' 339*01826a49SYabin Cui resolved = is_true 340*01826a49SYabin Cui 341*01826a49SYabin Cui if macro2 is not None and not resolved: 342*01826a49SYabin Cui assert ifdef and defined and op == '&&' and cmp is not None 343*01826a49SYabin Cui # If the statement is true, but we have a single value check, then 344*01826a49SYabin Cui # check the value. 345*01826a49SYabin Cui defined_value = self._defs[macro] 346*01826a49SYabin Cui are_ints = True 347*01826a49SYabin Cui try: 348*01826a49SYabin Cui defined_value = int(defined_value) 349*01826a49SYabin Cui value = int(value) 350*01826a49SYabin Cui except TypeError: 351*01826a49SYabin Cui are_ints = False 352*01826a49SYabin Cui except ValueError: 353*01826a49SYabin Cui are_ints = False 354*01826a49SYabin Cui if ( 355*01826a49SYabin Cui macro == macro2 and 356*01826a49SYabin Cui ((openp is None) == (closep is None)) and 357*01826a49SYabin Cui are_ints 358*01826a49SYabin Cui ): 359*01826a49SYabin Cui resolved = True 360*01826a49SYabin Cui if cmp == '<': 361*01826a49SYabin Cui is_true = defined_value < value 362*01826a49SYabin Cui elif cmp == '<=': 363*01826a49SYabin Cui is_true = defined_value <= value 364*01826a49SYabin Cui elif cmp == '==': 365*01826a49SYabin Cui is_true = defined_value == value 366*01826a49SYabin Cui elif cmp == '!=': 367*01826a49SYabin Cui is_true = defined_value != value 368*01826a49SYabin Cui elif cmp == '>=': 369*01826a49SYabin Cui is_true = defined_value >= value 370*01826a49SYabin Cui elif cmp == '>': 371*01826a49SYabin Cui is_true = defined_value > value 372*01826a49SYabin Cui else: 373*01826a49SYabin Cui resolved = False 374*01826a49SYabin Cui 375*01826a49SYabin Cui if op is not None and not resolved: 376*01826a49SYabin Cui # Remove the first op in the line + spaces 377*01826a49SYabin Cui if op == '&&': 378*01826a49SYabin Cui opre = op 379*01826a49SYabin Cui else: 380*01826a49SYabin Cui assert op == '||' 381*01826a49SYabin Cui opre = r'\|\|' 382*01826a49SYabin Cui needle = re.compile(fr"(?P<if>\s*#\s*(el)?if\s+).*?(?P<op>{opre}\s*)") 383*01826a49SYabin Cui match = needle.match(line) 384*01826a49SYabin Cui assert match is not None 385*01826a49SYabin Cui newline = line[:match.end('if')] + line[match.end('op'):] 386*01826a49SYabin Cui 387*01826a49SYabin Cui self._log(f"\tHardwiring partially resolved {macro}") 388*01826a49SYabin Cui self._log(f"\t\t- {line[:-1]}") 389*01826a49SYabin Cui self._log(f"\t\t+ {newline[:-1]}") 390*01826a49SYabin Cui 391*01826a49SYabin Cui outlines.append(newline) 392*01826a49SYabin Cui idx += 1 393*01826a49SYabin Cui continue 394*01826a49SYabin Cui 395*01826a49SYabin Cui # Skip any statements we cannot fully compute 396*01826a49SYabin Cui if not resolved: 397*01826a49SYabin Cui outlines.append(line) 398*01826a49SYabin Cui idx += 1 399*01826a49SYabin Cui continue 400*01826a49SYabin Cui 401*01826a49SYabin Cui prepend = [] 402*01826a49SYabin Cui if macro in self._replaces: 403*01826a49SYabin Cui assert not ifdef 404*01826a49SYabin Cui assert op is None 405*01826a49SYabin Cui value = self._replaces.pop(macro) 406*01826a49SYabin Cui prepend = [f"#define {macro} {value}\n"] 407*01826a49SYabin Cui 408*01826a49SYabin Cui idx, replace = self._handle_if_block(macro, idx, is_true, prepend) 409*01826a49SYabin Cui outlines += replace 410*01826a49SYabin Cui changed = True 411*01826a49SYabin Cui 412*01826a49SYabin Cui return changed, outlines 413*01826a49SYabin Cui 414*01826a49SYabin Cui def preprocess(self, filename): 415*01826a49SYabin Cui with open(filename, 'r') as f: 416*01826a49SYabin Cui self._inlines = f.readlines() 417*01826a49SYabin Cui changed = True 418*01826a49SYabin Cui iters = 0 419*01826a49SYabin Cui while changed: 420*01826a49SYabin Cui iters += 1 421*01826a49SYabin Cui changed, outlines = self._preprocess_once() 422*01826a49SYabin Cui self._inlines = outlines 423*01826a49SYabin Cui 424*01826a49SYabin Cui with open(filename, 'w') as f: 425*01826a49SYabin Cui f.write(''.join(self._inlines)) 426*01826a49SYabin Cui 427*01826a49SYabin Cui 428*01826a49SYabin Cuiclass Freestanding(object): 429*01826a49SYabin Cui def __init__( 430*01826a49SYabin Cui self, zstd_deps: str, mem: str, source_lib: str, output_lib: str, 431*01826a49SYabin Cui external_xxhash: bool, xxh64_state: Optional[str], 432*01826a49SYabin Cui xxh64_prefix: Optional[str], rewritten_includes: [(str, str)], 433*01826a49SYabin Cui defs: [(str, Optional[str])], replaces: [(str, str)], 434*01826a49SYabin Cui undefs: [str], excludes: [str], seds: [str], spdx: bool, 435*01826a49SYabin Cui ): 436*01826a49SYabin Cui self._zstd_deps = zstd_deps 437*01826a49SYabin Cui self._mem = mem 438*01826a49SYabin Cui self._src_lib = source_lib 439*01826a49SYabin Cui self._dst_lib = output_lib 440*01826a49SYabin Cui self._external_xxhash = external_xxhash 441*01826a49SYabin Cui self._xxh64_state = xxh64_state 442*01826a49SYabin Cui self._xxh64_prefix = xxh64_prefix 443*01826a49SYabin Cui self._rewritten_includes = rewritten_includes 444*01826a49SYabin Cui self._defs = defs 445*01826a49SYabin Cui self._replaces = replaces 446*01826a49SYabin Cui self._undefs = undefs 447*01826a49SYabin Cui self._excludes = excludes 448*01826a49SYabin Cui self._seds = seds 449*01826a49SYabin Cui self._spdx = spdx 450*01826a49SYabin Cui 451*01826a49SYabin Cui def _dst_lib_file_paths(self): 452*01826a49SYabin Cui """ 453*01826a49SYabin Cui Yields all the file paths in the dst_lib. 454*01826a49SYabin Cui """ 455*01826a49SYabin Cui for root, dirname, filenames in os.walk(self._dst_lib): 456*01826a49SYabin Cui for filename in filenames: 457*01826a49SYabin Cui filepath = os.path.join(root, filename) 458*01826a49SYabin Cui yield filepath 459*01826a49SYabin Cui 460*01826a49SYabin Cui def _log(self, *args, **kwargs): 461*01826a49SYabin Cui print(*args, **kwargs) 462*01826a49SYabin Cui 463*01826a49SYabin Cui def _copy_file(self, lib_path): 464*01826a49SYabin Cui suffixes = [".c", ".h", ".S"] 465*01826a49SYabin Cui if not any((lib_path.endswith(suffix) for suffix in suffixes)): 466*01826a49SYabin Cui return 467*01826a49SYabin Cui if lib_path in SKIPPED_FILES: 468*01826a49SYabin Cui self._log(f"\tSkipping file: {lib_path}") 469*01826a49SYabin Cui return 470*01826a49SYabin Cui if self._external_xxhash and lib_path in XXHASH_FILES: 471*01826a49SYabin Cui self._log(f"\tSkipping xxhash file: {lib_path}") 472*01826a49SYabin Cui return 473*01826a49SYabin Cui 474*01826a49SYabin Cui src_path = os.path.join(self._src_lib, lib_path) 475*01826a49SYabin Cui dst_path = os.path.join(self._dst_lib, lib_path) 476*01826a49SYabin Cui self._log(f"\tCopying: {src_path} -> {dst_path}") 477*01826a49SYabin Cui shutil.copyfile(src_path, dst_path) 478*01826a49SYabin Cui 479*01826a49SYabin Cui def _copy_source_lib(self): 480*01826a49SYabin Cui self._log("Copying source library into output library") 481*01826a49SYabin Cui 482*01826a49SYabin Cui assert os.path.exists(self._src_lib) 483*01826a49SYabin Cui os.makedirs(self._dst_lib, exist_ok=True) 484*01826a49SYabin Cui self._copy_file("zstd.h") 485*01826a49SYabin Cui self._copy_file("zstd_errors.h") 486*01826a49SYabin Cui for subdir in INCLUDED_SUBDIRS: 487*01826a49SYabin Cui src_dir = os.path.join(self._src_lib, subdir) 488*01826a49SYabin Cui dst_dir = os.path.join(self._dst_lib, subdir) 489*01826a49SYabin Cui 490*01826a49SYabin Cui assert os.path.exists(src_dir) 491*01826a49SYabin Cui os.makedirs(dst_dir, exist_ok=True) 492*01826a49SYabin Cui 493*01826a49SYabin Cui for filename in os.listdir(src_dir): 494*01826a49SYabin Cui lib_path = os.path.join(subdir, filename) 495*01826a49SYabin Cui self._copy_file(lib_path) 496*01826a49SYabin Cui 497*01826a49SYabin Cui def _copy_zstd_deps(self): 498*01826a49SYabin Cui dst_zstd_deps = os.path.join(self._dst_lib, "common", "zstd_deps.h") 499*01826a49SYabin Cui self._log(f"Copying zstd_deps: {self._zstd_deps} -> {dst_zstd_deps}") 500*01826a49SYabin Cui shutil.copyfile(self._zstd_deps, dst_zstd_deps) 501*01826a49SYabin Cui 502*01826a49SYabin Cui def _copy_mem(self): 503*01826a49SYabin Cui dst_mem = os.path.join(self._dst_lib, "common", "mem.h") 504*01826a49SYabin Cui self._log(f"Copying mem: {self._mem} -> {dst_mem}") 505*01826a49SYabin Cui shutil.copyfile(self._mem, dst_mem) 506*01826a49SYabin Cui 507*01826a49SYabin Cui def _hardwire_preprocessor(self, name: str, value: Optional[str] = None, undef=False): 508*01826a49SYabin Cui """ 509*01826a49SYabin Cui If value=None then hardwire that it is defined, but not what the value is. 510*01826a49SYabin Cui If undef=True then value must be None. 511*01826a49SYabin Cui If value='' then the macro is defined to '' exactly. 512*01826a49SYabin Cui """ 513*01826a49SYabin Cui assert not (undef and value is not None) 514*01826a49SYabin Cui for filepath in self._dst_lib_file_paths(): 515*01826a49SYabin Cui file = FileLines(filepath) 516*01826a49SYabin Cui 517*01826a49SYabin Cui def _hardwire_defines(self): 518*01826a49SYabin Cui self._log("Hardwiring macros") 519*01826a49SYabin Cui partial_preprocessor = PartialPreprocessor(self._defs, self._replaces, self._undefs) 520*01826a49SYabin Cui for filepath in self._dst_lib_file_paths(): 521*01826a49SYabin Cui partial_preprocessor.preprocess(filepath) 522*01826a49SYabin Cui 523*01826a49SYabin Cui def _remove_excludes(self): 524*01826a49SYabin Cui self._log("Removing excluded sections") 525*01826a49SYabin Cui for exclude in self._excludes: 526*01826a49SYabin Cui self._log(f"\tRemoving excluded sections for: {exclude}") 527*01826a49SYabin Cui begin_re = re.compile(f"BEGIN {exclude}") 528*01826a49SYabin Cui end_re = re.compile(f"END {exclude}") 529*01826a49SYabin Cui for filepath in self._dst_lib_file_paths(): 530*01826a49SYabin Cui file = FileLines(filepath) 531*01826a49SYabin Cui outlines = [] 532*01826a49SYabin Cui skipped = [] 533*01826a49SYabin Cui emit = True 534*01826a49SYabin Cui for line in file.lines: 535*01826a49SYabin Cui if emit and begin_re.search(line) is not None: 536*01826a49SYabin Cui assert end_re.search(line) is None 537*01826a49SYabin Cui emit = False 538*01826a49SYabin Cui if emit: 539*01826a49SYabin Cui outlines.append(line) 540*01826a49SYabin Cui else: 541*01826a49SYabin Cui skipped.append(line) 542*01826a49SYabin Cui if end_re.search(line) is not None: 543*01826a49SYabin Cui assert begin_re.search(line) is None 544*01826a49SYabin Cui self._log(f"\t\tRemoving excluded section: {exclude}") 545*01826a49SYabin Cui for s in skipped: 546*01826a49SYabin Cui self._log(f"\t\t\t- {s}") 547*01826a49SYabin Cui emit = True 548*01826a49SYabin Cui skipped = [] 549*01826a49SYabin Cui if not emit: 550*01826a49SYabin Cui raise RuntimeError("Excluded section unfinished!") 551*01826a49SYabin Cui file.lines = outlines 552*01826a49SYabin Cui file.write() 553*01826a49SYabin Cui 554*01826a49SYabin Cui def _rewrite_include(self, original, rewritten): 555*01826a49SYabin Cui self._log(f"\tRewriting include: {original} -> {rewritten}") 556*01826a49SYabin Cui regex = re.compile(f"\\s*#\\s*include\\s*(?P<include>{original})") 557*01826a49SYabin Cui for filepath in self._dst_lib_file_paths(): 558*01826a49SYabin Cui file = FileLines(filepath) 559*01826a49SYabin Cui for i, line in enumerate(file.lines): 560*01826a49SYabin Cui match = regex.match(line) 561*01826a49SYabin Cui if match is None: 562*01826a49SYabin Cui continue 563*01826a49SYabin Cui s = match.start('include') 564*01826a49SYabin Cui e = match.end('include') 565*01826a49SYabin Cui file.lines[i] = line[:s] + rewritten + line[e:] 566*01826a49SYabin Cui file.write() 567*01826a49SYabin Cui 568*01826a49SYabin Cui def _rewrite_includes(self): 569*01826a49SYabin Cui self._log("Rewriting includes") 570*01826a49SYabin Cui for original, rewritten in self._rewritten_includes: 571*01826a49SYabin Cui self._rewrite_include(original, rewritten) 572*01826a49SYabin Cui 573*01826a49SYabin Cui def _replace_xxh64_prefix(self): 574*01826a49SYabin Cui if self._xxh64_prefix is None: 575*01826a49SYabin Cui return 576*01826a49SYabin Cui self._log(f"Replacing XXH64 prefix with {self._xxh64_prefix}") 577*01826a49SYabin Cui replacements = [] 578*01826a49SYabin Cui if self._xxh64_state is not None: 579*01826a49SYabin Cui replacements.append( 580*01826a49SYabin Cui (re.compile(r"([^\w]|^)(?P<orig>XXH64_state_t)([^\w]|$)"), self._xxh64_state) 581*01826a49SYabin Cui ) 582*01826a49SYabin Cui if self._xxh64_prefix is not None: 583*01826a49SYabin Cui replacements.append( 584*01826a49SYabin Cui (re.compile(r"([^\w]|^)(?P<orig>XXH64)[\(_]"), self._xxh64_prefix) 585*01826a49SYabin Cui ) 586*01826a49SYabin Cui for filepath in self._dst_lib_file_paths(): 587*01826a49SYabin Cui file = FileLines(filepath) 588*01826a49SYabin Cui for i, line in enumerate(file.lines): 589*01826a49SYabin Cui modified = False 590*01826a49SYabin Cui for regex, replacement in replacements: 591*01826a49SYabin Cui match = regex.search(line) 592*01826a49SYabin Cui while match is not None: 593*01826a49SYabin Cui modified = True 594*01826a49SYabin Cui b = match.start('orig') 595*01826a49SYabin Cui e = match.end('orig') 596*01826a49SYabin Cui line = line[:b] + replacement + line[e:] 597*01826a49SYabin Cui match = regex.search(line) 598*01826a49SYabin Cui if modified: 599*01826a49SYabin Cui self._log(f"\t- {file.lines[i][:-1]}") 600*01826a49SYabin Cui self._log(f"\t+ {line[:-1]}") 601*01826a49SYabin Cui file.lines[i] = line 602*01826a49SYabin Cui file.write() 603*01826a49SYabin Cui 604*01826a49SYabin Cui def _parse_sed(self, sed): 605*01826a49SYabin Cui assert sed[0] == 's' 606*01826a49SYabin Cui delim = sed[1] 607*01826a49SYabin Cui match = re.fullmatch(f's{delim}(.+){delim}(.*){delim}(.*)', sed) 608*01826a49SYabin Cui assert match is not None 609*01826a49SYabin Cui regex = re.compile(match.group(1)) 610*01826a49SYabin Cui format_str = match.group(2) 611*01826a49SYabin Cui is_global = match.group(3) == 'g' 612*01826a49SYabin Cui return regex, format_str, is_global 613*01826a49SYabin Cui 614*01826a49SYabin Cui def _process_sed(self, sed): 615*01826a49SYabin Cui self._log(f"Processing sed: {sed}") 616*01826a49SYabin Cui regex, format_str, is_global = self._parse_sed(sed) 617*01826a49SYabin Cui 618*01826a49SYabin Cui for filepath in self._dst_lib_file_paths(): 619*01826a49SYabin Cui file = FileLines(filepath) 620*01826a49SYabin Cui for i, line in enumerate(file.lines): 621*01826a49SYabin Cui modified = False 622*01826a49SYabin Cui while True: 623*01826a49SYabin Cui match = regex.search(line) 624*01826a49SYabin Cui if match is None: 625*01826a49SYabin Cui break 626*01826a49SYabin Cui replacement = format_str.format(match.groups(''), match.groupdict('')) 627*01826a49SYabin Cui b = match.start() 628*01826a49SYabin Cui e = match.end() 629*01826a49SYabin Cui line = line[:b] + replacement + line[e:] 630*01826a49SYabin Cui modified = True 631*01826a49SYabin Cui if not is_global: 632*01826a49SYabin Cui break 633*01826a49SYabin Cui if modified: 634*01826a49SYabin Cui self._log(f"\t- {file.lines[i][:-1]}") 635*01826a49SYabin Cui self._log(f"\t+ {line[:-1]}") 636*01826a49SYabin Cui file.lines[i] = line 637*01826a49SYabin Cui file.write() 638*01826a49SYabin Cui 639*01826a49SYabin Cui def _process_seds(self): 640*01826a49SYabin Cui self._log("Processing seds") 641*01826a49SYabin Cui for sed in self._seds: 642*01826a49SYabin Cui self._process_sed(sed) 643*01826a49SYabin Cui 644*01826a49SYabin Cui def _process_spdx(self): 645*01826a49SYabin Cui if not self._spdx: 646*01826a49SYabin Cui return 647*01826a49SYabin Cui self._log("Processing spdx") 648*01826a49SYabin Cui SPDX_C = "// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause\n" 649*01826a49SYabin Cui SPDX_H_S = "/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */\n" 650*01826a49SYabin Cui for filepath in self._dst_lib_file_paths(): 651*01826a49SYabin Cui file = FileLines(filepath) 652*01826a49SYabin Cui if file.lines[0] == SPDX_C or file.lines[0] == SPDX_H_S: 653*01826a49SYabin Cui continue 654*01826a49SYabin Cui for line in file.lines: 655*01826a49SYabin Cui if "SPDX-License-Identifier" in line: 656*01826a49SYabin Cui raise RuntimeError(f"Unexpected SPDX license identifier: {file.filename} {repr(line)}") 657*01826a49SYabin Cui if file.filename.endswith(".c"): 658*01826a49SYabin Cui file.lines.insert(0, SPDX_C) 659*01826a49SYabin Cui elif file.filename.endswith(".h") or file.filename.endswith(".S"): 660*01826a49SYabin Cui file.lines.insert(0, SPDX_H_S) 661*01826a49SYabin Cui else: 662*01826a49SYabin Cui raise RuntimeError(f"Unexpected file extension: {file.filename}") 663*01826a49SYabin Cui file.write() 664*01826a49SYabin Cui 665*01826a49SYabin Cui 666*01826a49SYabin Cui 667*01826a49SYabin Cui def go(self): 668*01826a49SYabin Cui self._copy_source_lib() 669*01826a49SYabin Cui self._copy_zstd_deps() 670*01826a49SYabin Cui self._copy_mem() 671*01826a49SYabin Cui self._hardwire_defines() 672*01826a49SYabin Cui self._remove_excludes() 673*01826a49SYabin Cui self._rewrite_includes() 674*01826a49SYabin Cui self._replace_xxh64_prefix() 675*01826a49SYabin Cui self._process_seds() 676*01826a49SYabin Cui self._process_spdx() 677*01826a49SYabin Cui 678*01826a49SYabin Cui 679*01826a49SYabin Cuidef parse_optional_pair(defines: [str]) -> [(str, Optional[str])]: 680*01826a49SYabin Cui output = [] 681*01826a49SYabin Cui for define in defines: 682*01826a49SYabin Cui parsed = define.split('=') 683*01826a49SYabin Cui if len(parsed) == 1: 684*01826a49SYabin Cui output.append((parsed[0], None)) 685*01826a49SYabin Cui elif len(parsed) == 2: 686*01826a49SYabin Cui output.append((parsed[0], parsed[1])) 687*01826a49SYabin Cui else: 688*01826a49SYabin Cui raise RuntimeError(f"Bad define: {define}") 689*01826a49SYabin Cui return output 690*01826a49SYabin Cui 691*01826a49SYabin Cui 692*01826a49SYabin Cuidef parse_pair(rewritten_includes: [str]) -> [(str, str)]: 693*01826a49SYabin Cui output = [] 694*01826a49SYabin Cui for rewritten_include in rewritten_includes: 695*01826a49SYabin Cui parsed = rewritten_include.split('=') 696*01826a49SYabin Cui if len(parsed) == 2: 697*01826a49SYabin Cui output.append((parsed[0], parsed[1])) 698*01826a49SYabin Cui else: 699*01826a49SYabin Cui raise RuntimeError(f"Bad rewritten include: {rewritten_include}") 700*01826a49SYabin Cui return output 701*01826a49SYabin Cui 702*01826a49SYabin Cui 703*01826a49SYabin Cui 704*01826a49SYabin Cuidef main(name, args): 705*01826a49SYabin Cui parser = argparse.ArgumentParser(prog=name) 706*01826a49SYabin Cui parser.add_argument("--zstd-deps", default="zstd_deps.h", help="Zstd dependencies file") 707*01826a49SYabin Cui parser.add_argument("--mem", default="mem.h", help="Memory module") 708*01826a49SYabin Cui parser.add_argument("--source-lib", default="../../lib", help="Location of the zstd library") 709*01826a49SYabin Cui parser.add_argument("--output-lib", default="./freestanding_lib", help="Where to output the freestanding zstd library") 710*01826a49SYabin Cui parser.add_argument("--xxhash", default=None, help="Alternate external xxhash include e.g. --xxhash='<xxhash.h>'. If set xxhash is not included.") 711*01826a49SYabin Cui parser.add_argument("--xxh64-state", default=None, help="Alternate XXH64 state type (excluding _) e.g. --xxh64-state='struct xxh64_state'") 712*01826a49SYabin Cui parser.add_argument("--xxh64-prefix", default=None, help="Alternate XXH64 function prefix (excluding _) e.g. --xxh64-prefix=xxh64") 713*01826a49SYabin Cui parser.add_argument("--rewrite-include", default=[], dest="rewritten_includes", action="append", help="Rewrite an include REGEX=NEW (e.g. '<stddef\\.h>=<linux/types.h>')") 714*01826a49SYabin Cui parser.add_argument("--sed", default=[], dest="seds", action="append", help="Apply a sed replacement. Format: `s/REGEX/FORMAT/[g]`. REGEX is a Python regex. FORMAT is a Python format string formatted by the regex dict.") 715*01826a49SYabin Cui parser.add_argument("--spdx", action="store_true", help="Add SPDX License Identifiers") 716*01826a49SYabin Cui parser.add_argument("-D", "--define", default=[], dest="defs", action="append", help="Pre-define this macro (can be passed multiple times)") 717*01826a49SYabin Cui parser.add_argument("-U", "--undefine", default=[], dest="undefs", action="append", help="Pre-undefine this macro (can be passed multiple times)") 718*01826a49SYabin Cui parser.add_argument("-R", "--replace", default=[], dest="replaces", action="append", help="Pre-define this macro and replace the first ifndef block with its definition") 719*01826a49SYabin Cui parser.add_argument("-E", "--exclude", default=[], dest="excludes", action="append", help="Exclude all lines between 'BEGIN <EXCLUDE>' and 'END <EXCLUDE>'") 720*01826a49SYabin Cui args = parser.parse_args(args) 721*01826a49SYabin Cui 722*01826a49SYabin Cui # Always remove threading 723*01826a49SYabin Cui if "ZSTD_MULTITHREAD" not in args.undefs: 724*01826a49SYabin Cui args.undefs.append("ZSTD_MULTITHREAD") 725*01826a49SYabin Cui 726*01826a49SYabin Cui args.defs = parse_optional_pair(args.defs) 727*01826a49SYabin Cui for name, _ in args.defs: 728*01826a49SYabin Cui if name in args.undefs: 729*01826a49SYabin Cui raise RuntimeError(f"{name} is both defined and undefined!") 730*01826a49SYabin Cui 731*01826a49SYabin Cui # Always set tracing to 0 732*01826a49SYabin Cui if "ZSTD_NO_TRACE" not in (arg[0] for arg in args.defs): 733*01826a49SYabin Cui args.defs.append(("ZSTD_NO_TRACE", None)) 734*01826a49SYabin Cui args.defs.append(("ZSTD_TRACE", "0")) 735*01826a49SYabin Cui 736*01826a49SYabin Cui args.replaces = parse_pair(args.replaces) 737*01826a49SYabin Cui for name, _ in args.replaces: 738*01826a49SYabin Cui if name in args.undefs or name in args.defs: 739*01826a49SYabin Cui raise RuntimeError(f"{name} is both replaced and (un)defined!") 740*01826a49SYabin Cui 741*01826a49SYabin Cui args.rewritten_includes = parse_pair(args.rewritten_includes) 742*01826a49SYabin Cui 743*01826a49SYabin Cui external_xxhash = False 744*01826a49SYabin Cui if args.xxhash is not None: 745*01826a49SYabin Cui external_xxhash = True 746*01826a49SYabin Cui args.rewritten_includes.append(('"(\\.\\./common/)?xxhash.h"', args.xxhash)) 747*01826a49SYabin Cui 748*01826a49SYabin Cui if args.xxh64_prefix is not None: 749*01826a49SYabin Cui if not external_xxhash: 750*01826a49SYabin Cui raise RuntimeError("--xxh64-prefix may only be used with --xxhash provided") 751*01826a49SYabin Cui 752*01826a49SYabin Cui if args.xxh64_state is not None: 753*01826a49SYabin Cui if not external_xxhash: 754*01826a49SYabin Cui raise RuntimeError("--xxh64-state may only be used with --xxhash provided") 755*01826a49SYabin Cui 756*01826a49SYabin Cui Freestanding( 757*01826a49SYabin Cui args.zstd_deps, 758*01826a49SYabin Cui args.mem, 759*01826a49SYabin Cui args.source_lib, 760*01826a49SYabin Cui args.output_lib, 761*01826a49SYabin Cui external_xxhash, 762*01826a49SYabin Cui args.xxh64_state, 763*01826a49SYabin Cui args.xxh64_prefix, 764*01826a49SYabin Cui args.rewritten_includes, 765*01826a49SYabin Cui args.defs, 766*01826a49SYabin Cui args.replaces, 767*01826a49SYabin Cui args.undefs, 768*01826a49SYabin Cui args.excludes, 769*01826a49SYabin Cui args.seds, 770*01826a49SYabin Cui args.spdx, 771*01826a49SYabin Cui ).go() 772*01826a49SYabin Cui 773*01826a49SYabin Cuiif __name__ == "__main__": 774*01826a49SYabin Cui main(sys.argv[0], sys.argv[1:]) 775