1*62c56f98SSadaf Ebrahimi#!/usr/bin/env python3 2*62c56f98SSadaf Ebrahimi# 3*62c56f98SSadaf Ebrahimi# Copyright The Mbed TLS Contributors 4*62c56f98SSadaf Ebrahimi# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 5*62c56f98SSadaf Ebrahimi 6*62c56f98SSadaf Ebrahimi""" 7*62c56f98SSadaf EbrahimiThis script confirms that the naming of all symbols and identifiers in Mbed TLS 8*62c56f98SSadaf Ebrahimiare consistent with the house style and are also self-consistent. It only runs 9*62c56f98SSadaf Ebrahimion Linux and macOS since it depends on nm. 10*62c56f98SSadaf Ebrahimi 11*62c56f98SSadaf EbrahimiIt contains two major Python classes, CodeParser and NameChecker. They both have 12*62c56f98SSadaf Ebrahimia comprehensive "run-all" function (comprehensive_parse() and perform_checks()) 13*62c56f98SSadaf Ebrahimibut the individual functions can also be used for specific needs. 14*62c56f98SSadaf Ebrahimi 15*62c56f98SSadaf EbrahimiCodeParser makes heavy use of regular expressions to parse the code, and is 16*62c56f98SSadaf Ebrahimidependent on the current code formatting. Many Python C parser libraries require 17*62c56f98SSadaf Ebrahimipreprocessed C code, which means no macro parsing. Compiler tools are also not 18*62c56f98SSadaf Ebrahimivery helpful when we want the exact location in the original source (which 19*62c56f98SSadaf Ebrahimibecomes impossible when e.g. comments are stripped). 20*62c56f98SSadaf Ebrahimi 21*62c56f98SSadaf EbrahimiNameChecker performs the following checks: 22*62c56f98SSadaf Ebrahimi 23*62c56f98SSadaf Ebrahimi- All exported and available symbols in the library object files, are explicitly 24*62c56f98SSadaf Ebrahimi declared in the header files. This uses the nm command. 25*62c56f98SSadaf Ebrahimi- All macros, constants, and identifiers (function names, struct names, etc) 26*62c56f98SSadaf Ebrahimi follow the required regex pattern. 27*62c56f98SSadaf Ebrahimi- Typo checking: All words that begin with MBED|PSA exist as macros or constants. 28*62c56f98SSadaf Ebrahimi 29*62c56f98SSadaf EbrahimiThe script returns 0 on success, 1 on test failure, and 2 if there is a script 30*62c56f98SSadaf Ebrahimierror. It must be run from Mbed TLS root. 31*62c56f98SSadaf Ebrahimi""" 32*62c56f98SSadaf Ebrahimi 33*62c56f98SSadaf Ebrahimiimport abc 34*62c56f98SSadaf Ebrahimiimport argparse 35*62c56f98SSadaf Ebrahimiimport fnmatch 36*62c56f98SSadaf Ebrahimiimport glob 37*62c56f98SSadaf Ebrahimiimport textwrap 38*62c56f98SSadaf Ebrahimiimport os 39*62c56f98SSadaf Ebrahimiimport sys 40*62c56f98SSadaf Ebrahimiimport traceback 41*62c56f98SSadaf Ebrahimiimport re 42*62c56f98SSadaf Ebrahimiimport enum 43*62c56f98SSadaf Ebrahimiimport shutil 44*62c56f98SSadaf Ebrahimiimport subprocess 45*62c56f98SSadaf Ebrahimiimport logging 46*62c56f98SSadaf Ebrahimi 47*62c56f98SSadaf Ebrahimiimport scripts_path # pylint: disable=unused-import 48*62c56f98SSadaf Ebrahimifrom mbedtls_dev import build_tree 49*62c56f98SSadaf Ebrahimi 50*62c56f98SSadaf Ebrahimi 51*62c56f98SSadaf Ebrahimi# Naming patterns to check against. These are defined outside the NameCheck 52*62c56f98SSadaf Ebrahimi# class for ease of modification. 53*62c56f98SSadaf EbrahimiPUBLIC_MACRO_PATTERN = r"^(MBEDTLS|PSA)_[0-9A-Z_]*[0-9A-Z]$" 54*62c56f98SSadaf EbrahimiINTERNAL_MACRO_PATTERN = r"^[0-9A-Za-z_]*[0-9A-Z]$" 55*62c56f98SSadaf EbrahimiCONSTANTS_PATTERN = PUBLIC_MACRO_PATTERN 56*62c56f98SSadaf EbrahimiIDENTIFIER_PATTERN = r"^(mbedtls|psa)_[0-9a-z_]*[0-9a-z]$" 57*62c56f98SSadaf Ebrahimi 58*62c56f98SSadaf Ebrahimiclass Match(): # pylint: disable=too-few-public-methods 59*62c56f98SSadaf Ebrahimi """ 60*62c56f98SSadaf Ebrahimi A class representing a match, together with its found position. 61*62c56f98SSadaf Ebrahimi 62*62c56f98SSadaf Ebrahimi Fields: 63*62c56f98SSadaf Ebrahimi * filename: the file that the match was in. 64*62c56f98SSadaf Ebrahimi * line: the full line containing the match. 65*62c56f98SSadaf Ebrahimi * line_no: the line number. 66*62c56f98SSadaf Ebrahimi * pos: a tuple of (start, end) positions on the line where the match is. 67*62c56f98SSadaf Ebrahimi * name: the match itself. 68*62c56f98SSadaf Ebrahimi """ 69*62c56f98SSadaf Ebrahimi def __init__(self, filename, line, line_no, pos, name): 70*62c56f98SSadaf Ebrahimi # pylint: disable=too-many-arguments 71*62c56f98SSadaf Ebrahimi self.filename = filename 72*62c56f98SSadaf Ebrahimi self.line = line 73*62c56f98SSadaf Ebrahimi self.line_no = line_no 74*62c56f98SSadaf Ebrahimi self.pos = pos 75*62c56f98SSadaf Ebrahimi self.name = name 76*62c56f98SSadaf Ebrahimi 77*62c56f98SSadaf Ebrahimi def __str__(self): 78*62c56f98SSadaf Ebrahimi """ 79*62c56f98SSadaf Ebrahimi Return a formatted code listing representation of the erroneous line. 80*62c56f98SSadaf Ebrahimi """ 81*62c56f98SSadaf Ebrahimi gutter = format(self.line_no, "4d") 82*62c56f98SSadaf Ebrahimi underline = self.pos[0] * " " + (self.pos[1] - self.pos[0]) * "^" 83*62c56f98SSadaf Ebrahimi 84*62c56f98SSadaf Ebrahimi return ( 85*62c56f98SSadaf Ebrahimi " {0} |\n".format(" " * len(gutter)) + 86*62c56f98SSadaf Ebrahimi " {0} | {1}".format(gutter, self.line) + 87*62c56f98SSadaf Ebrahimi " {0} | {1}\n".format(" " * len(gutter), underline) 88*62c56f98SSadaf Ebrahimi ) 89*62c56f98SSadaf Ebrahimi 90*62c56f98SSadaf Ebrahimiclass Problem(abc.ABC): # pylint: disable=too-few-public-methods 91*62c56f98SSadaf Ebrahimi """ 92*62c56f98SSadaf Ebrahimi An abstract parent class representing a form of static analysis error. 93*62c56f98SSadaf Ebrahimi It extends an Abstract Base Class, which means it is not instantiable, and 94*62c56f98SSadaf Ebrahimi it also mandates certain abstract methods to be implemented in subclasses. 95*62c56f98SSadaf Ebrahimi """ 96*62c56f98SSadaf Ebrahimi # Class variable to control the quietness of all problems 97*62c56f98SSadaf Ebrahimi quiet = False 98*62c56f98SSadaf Ebrahimi def __init__(self): 99*62c56f98SSadaf Ebrahimi self.textwrapper = textwrap.TextWrapper() 100*62c56f98SSadaf Ebrahimi self.textwrapper.width = 80 101*62c56f98SSadaf Ebrahimi self.textwrapper.initial_indent = " > " 102*62c56f98SSadaf Ebrahimi self.textwrapper.subsequent_indent = " " 103*62c56f98SSadaf Ebrahimi 104*62c56f98SSadaf Ebrahimi def __str__(self): 105*62c56f98SSadaf Ebrahimi """ 106*62c56f98SSadaf Ebrahimi Unified string representation method for all Problems. 107*62c56f98SSadaf Ebrahimi """ 108*62c56f98SSadaf Ebrahimi if self.__class__.quiet: 109*62c56f98SSadaf Ebrahimi return self.quiet_output() 110*62c56f98SSadaf Ebrahimi return self.verbose_output() 111*62c56f98SSadaf Ebrahimi 112*62c56f98SSadaf Ebrahimi @abc.abstractmethod 113*62c56f98SSadaf Ebrahimi def quiet_output(self): 114*62c56f98SSadaf Ebrahimi """ 115*62c56f98SSadaf Ebrahimi The output when --quiet is enabled. 116*62c56f98SSadaf Ebrahimi """ 117*62c56f98SSadaf Ebrahimi pass 118*62c56f98SSadaf Ebrahimi 119*62c56f98SSadaf Ebrahimi @abc.abstractmethod 120*62c56f98SSadaf Ebrahimi def verbose_output(self): 121*62c56f98SSadaf Ebrahimi """ 122*62c56f98SSadaf Ebrahimi The default output with explanation and code snippet if appropriate. 123*62c56f98SSadaf Ebrahimi """ 124*62c56f98SSadaf Ebrahimi pass 125*62c56f98SSadaf Ebrahimi 126*62c56f98SSadaf Ebrahimiclass SymbolNotInHeader(Problem): # pylint: disable=too-few-public-methods 127*62c56f98SSadaf Ebrahimi """ 128*62c56f98SSadaf Ebrahimi A problem that occurs when an exported/available symbol in the object file 129*62c56f98SSadaf Ebrahimi is not explicitly declared in header files. Created with 130*62c56f98SSadaf Ebrahimi NameCheck.check_symbols_declared_in_header() 131*62c56f98SSadaf Ebrahimi 132*62c56f98SSadaf Ebrahimi Fields: 133*62c56f98SSadaf Ebrahimi * symbol_name: the name of the symbol. 134*62c56f98SSadaf Ebrahimi """ 135*62c56f98SSadaf Ebrahimi def __init__(self, symbol_name): 136*62c56f98SSadaf Ebrahimi self.symbol_name = symbol_name 137*62c56f98SSadaf Ebrahimi Problem.__init__(self) 138*62c56f98SSadaf Ebrahimi 139*62c56f98SSadaf Ebrahimi def quiet_output(self): 140*62c56f98SSadaf Ebrahimi return "{0}".format(self.symbol_name) 141*62c56f98SSadaf Ebrahimi 142*62c56f98SSadaf Ebrahimi def verbose_output(self): 143*62c56f98SSadaf Ebrahimi return self.textwrapper.fill( 144*62c56f98SSadaf Ebrahimi "'{0}' was found as an available symbol in the output of nm, " 145*62c56f98SSadaf Ebrahimi "however it was not declared in any header files." 146*62c56f98SSadaf Ebrahimi .format(self.symbol_name)) 147*62c56f98SSadaf Ebrahimi 148*62c56f98SSadaf Ebrahimiclass PatternMismatch(Problem): # pylint: disable=too-few-public-methods 149*62c56f98SSadaf Ebrahimi """ 150*62c56f98SSadaf Ebrahimi A problem that occurs when something doesn't match the expected pattern. 151*62c56f98SSadaf Ebrahimi Created with NameCheck.check_match_pattern() 152*62c56f98SSadaf Ebrahimi 153*62c56f98SSadaf Ebrahimi Fields: 154*62c56f98SSadaf Ebrahimi * pattern: the expected regex pattern 155*62c56f98SSadaf Ebrahimi * match: the Match object in question 156*62c56f98SSadaf Ebrahimi """ 157*62c56f98SSadaf Ebrahimi def __init__(self, pattern, match): 158*62c56f98SSadaf Ebrahimi self.pattern = pattern 159*62c56f98SSadaf Ebrahimi self.match = match 160*62c56f98SSadaf Ebrahimi Problem.__init__(self) 161*62c56f98SSadaf Ebrahimi 162*62c56f98SSadaf Ebrahimi 163*62c56f98SSadaf Ebrahimi def quiet_output(self): 164*62c56f98SSadaf Ebrahimi return ( 165*62c56f98SSadaf Ebrahimi "{0}:{1}:{2}" 166*62c56f98SSadaf Ebrahimi .format(self.match.filename, self.match.line_no, self.match.name) 167*62c56f98SSadaf Ebrahimi ) 168*62c56f98SSadaf Ebrahimi 169*62c56f98SSadaf Ebrahimi def verbose_output(self): 170*62c56f98SSadaf Ebrahimi return self.textwrapper.fill( 171*62c56f98SSadaf Ebrahimi "{0}:{1}: '{2}' does not match the required pattern '{3}'." 172*62c56f98SSadaf Ebrahimi .format( 173*62c56f98SSadaf Ebrahimi self.match.filename, 174*62c56f98SSadaf Ebrahimi self.match.line_no, 175*62c56f98SSadaf Ebrahimi self.match.name, 176*62c56f98SSadaf Ebrahimi self.pattern 177*62c56f98SSadaf Ebrahimi ) 178*62c56f98SSadaf Ebrahimi ) + "\n" + str(self.match) 179*62c56f98SSadaf Ebrahimi 180*62c56f98SSadaf Ebrahimiclass Typo(Problem): # pylint: disable=too-few-public-methods 181*62c56f98SSadaf Ebrahimi """ 182*62c56f98SSadaf Ebrahimi A problem that occurs when a word using MBED or PSA doesn't 183*62c56f98SSadaf Ebrahimi appear to be defined as constants nor enum values. Created with 184*62c56f98SSadaf Ebrahimi NameCheck.check_for_typos() 185*62c56f98SSadaf Ebrahimi 186*62c56f98SSadaf Ebrahimi Fields: 187*62c56f98SSadaf Ebrahimi * match: the Match object of the MBED|PSA name in question. 188*62c56f98SSadaf Ebrahimi """ 189*62c56f98SSadaf Ebrahimi def __init__(self, match): 190*62c56f98SSadaf Ebrahimi self.match = match 191*62c56f98SSadaf Ebrahimi Problem.__init__(self) 192*62c56f98SSadaf Ebrahimi 193*62c56f98SSadaf Ebrahimi def quiet_output(self): 194*62c56f98SSadaf Ebrahimi return ( 195*62c56f98SSadaf Ebrahimi "{0}:{1}:{2}" 196*62c56f98SSadaf Ebrahimi .format(self.match.filename, self.match.line_no, self.match.name) 197*62c56f98SSadaf Ebrahimi ) 198*62c56f98SSadaf Ebrahimi 199*62c56f98SSadaf Ebrahimi def verbose_output(self): 200*62c56f98SSadaf Ebrahimi return self.textwrapper.fill( 201*62c56f98SSadaf Ebrahimi "{0}:{1}: '{2}' looks like a typo. It was not found in any " 202*62c56f98SSadaf Ebrahimi "macros or any enums. If this is not a typo, put " 203*62c56f98SSadaf Ebrahimi "//no-check-names after it." 204*62c56f98SSadaf Ebrahimi .format(self.match.filename, self.match.line_no, self.match.name) 205*62c56f98SSadaf Ebrahimi ) + "\n" + str(self.match) 206*62c56f98SSadaf Ebrahimi 207*62c56f98SSadaf Ebrahimiclass CodeParser(): 208*62c56f98SSadaf Ebrahimi """ 209*62c56f98SSadaf Ebrahimi Class for retrieving files and parsing the code. This can be used 210*62c56f98SSadaf Ebrahimi independently of the checks that NameChecker performs, for example for 211*62c56f98SSadaf Ebrahimi list_internal_identifiers.py. 212*62c56f98SSadaf Ebrahimi """ 213*62c56f98SSadaf Ebrahimi def __init__(self, log): 214*62c56f98SSadaf Ebrahimi self.log = log 215*62c56f98SSadaf Ebrahimi build_tree.check_repo_path() 216*62c56f98SSadaf Ebrahimi 217*62c56f98SSadaf Ebrahimi # Memo for storing "glob expression": set(filepaths) 218*62c56f98SSadaf Ebrahimi self.files = {} 219*62c56f98SSadaf Ebrahimi 220*62c56f98SSadaf Ebrahimi # Globally excluded filenames. 221*62c56f98SSadaf Ebrahimi # Note that "*" can match directory separators in exclude lists. 222*62c56f98SSadaf Ebrahimi self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"] 223*62c56f98SSadaf Ebrahimi 224*62c56f98SSadaf Ebrahimi def comprehensive_parse(self): 225*62c56f98SSadaf Ebrahimi """ 226*62c56f98SSadaf Ebrahimi Comprehensive ("default") function to call each parsing function and 227*62c56f98SSadaf Ebrahimi retrieve various elements of the code, together with the source location. 228*62c56f98SSadaf Ebrahimi 229*62c56f98SSadaf Ebrahimi Returns a dict of parsed item key to the corresponding List of Matches. 230*62c56f98SSadaf Ebrahimi """ 231*62c56f98SSadaf Ebrahimi self.log.info("Parsing source code...") 232*62c56f98SSadaf Ebrahimi self.log.debug( 233*62c56f98SSadaf Ebrahimi "The following files are excluded from the search: {}" 234*62c56f98SSadaf Ebrahimi .format(str(self.excluded_files)) 235*62c56f98SSadaf Ebrahimi ) 236*62c56f98SSadaf Ebrahimi 237*62c56f98SSadaf Ebrahimi all_macros = {"public": [], "internal": [], "private":[]} 238*62c56f98SSadaf Ebrahimi all_macros["public"] = self.parse_macros([ 239*62c56f98SSadaf Ebrahimi "include/mbedtls/*.h", 240*62c56f98SSadaf Ebrahimi "include/psa/*.h", 241*62c56f98SSadaf Ebrahimi "3rdparty/everest/include/everest/everest.h", 242*62c56f98SSadaf Ebrahimi "3rdparty/everest/include/everest/x25519.h" 243*62c56f98SSadaf Ebrahimi ]) 244*62c56f98SSadaf Ebrahimi all_macros["internal"] = self.parse_macros([ 245*62c56f98SSadaf Ebrahimi "library/*.h", 246*62c56f98SSadaf Ebrahimi "tests/include/test/drivers/*.h", 247*62c56f98SSadaf Ebrahimi ]) 248*62c56f98SSadaf Ebrahimi all_macros["private"] = self.parse_macros([ 249*62c56f98SSadaf Ebrahimi "library/*.c", 250*62c56f98SSadaf Ebrahimi ]) 251*62c56f98SSadaf Ebrahimi enum_consts = self.parse_enum_consts([ 252*62c56f98SSadaf Ebrahimi "include/mbedtls/*.h", 253*62c56f98SSadaf Ebrahimi "include/psa/*.h", 254*62c56f98SSadaf Ebrahimi "library/*.h", 255*62c56f98SSadaf Ebrahimi "library/*.c", 256*62c56f98SSadaf Ebrahimi "3rdparty/everest/include/everest/everest.h", 257*62c56f98SSadaf Ebrahimi "3rdparty/everest/include/everest/x25519.h" 258*62c56f98SSadaf Ebrahimi ]) 259*62c56f98SSadaf Ebrahimi identifiers, excluded_identifiers = self.parse_identifiers([ 260*62c56f98SSadaf Ebrahimi "include/mbedtls/*.h", 261*62c56f98SSadaf Ebrahimi "include/psa/*.h", 262*62c56f98SSadaf Ebrahimi "library/*.h", 263*62c56f98SSadaf Ebrahimi "3rdparty/everest/include/everest/everest.h", 264*62c56f98SSadaf Ebrahimi "3rdparty/everest/include/everest/x25519.h" 265*62c56f98SSadaf Ebrahimi ], ["3rdparty/p256-m/p256-m/p256-m.h"]) 266*62c56f98SSadaf Ebrahimi mbed_psa_words = self.parse_mbed_psa_words([ 267*62c56f98SSadaf Ebrahimi "include/mbedtls/*.h", 268*62c56f98SSadaf Ebrahimi "include/psa/*.h", 269*62c56f98SSadaf Ebrahimi "library/*.h", 270*62c56f98SSadaf Ebrahimi "3rdparty/everest/include/everest/everest.h", 271*62c56f98SSadaf Ebrahimi "3rdparty/everest/include/everest/x25519.h", 272*62c56f98SSadaf Ebrahimi "library/*.c", 273*62c56f98SSadaf Ebrahimi "3rdparty/everest/library/everest.c", 274*62c56f98SSadaf Ebrahimi "3rdparty/everest/library/x25519.c" 275*62c56f98SSadaf Ebrahimi ], ["library/psa_crypto_driver_wrappers.h"]) 276*62c56f98SSadaf Ebrahimi symbols = self.parse_symbols() 277*62c56f98SSadaf Ebrahimi 278*62c56f98SSadaf Ebrahimi # Remove identifier macros like mbedtls_printf or mbedtls_calloc 279*62c56f98SSadaf Ebrahimi identifiers_justname = [x.name for x in identifiers] 280*62c56f98SSadaf Ebrahimi actual_macros = {"public": [], "internal": []} 281*62c56f98SSadaf Ebrahimi for scope in actual_macros: 282*62c56f98SSadaf Ebrahimi for macro in all_macros[scope]: 283*62c56f98SSadaf Ebrahimi if macro.name not in identifiers_justname: 284*62c56f98SSadaf Ebrahimi actual_macros[scope].append(macro) 285*62c56f98SSadaf Ebrahimi 286*62c56f98SSadaf Ebrahimi self.log.debug("Found:") 287*62c56f98SSadaf Ebrahimi # Aligns the counts on the assumption that none exceeds 4 digits 288*62c56f98SSadaf Ebrahimi for scope in actual_macros: 289*62c56f98SSadaf Ebrahimi self.log.debug(" {:4} Total {} Macros" 290*62c56f98SSadaf Ebrahimi .format(len(all_macros[scope]), scope)) 291*62c56f98SSadaf Ebrahimi self.log.debug(" {:4} {} Non-identifier Macros" 292*62c56f98SSadaf Ebrahimi .format(len(actual_macros[scope]), scope)) 293*62c56f98SSadaf Ebrahimi self.log.debug(" {:4} Enum Constants".format(len(enum_consts))) 294*62c56f98SSadaf Ebrahimi self.log.debug(" {:4} Identifiers".format(len(identifiers))) 295*62c56f98SSadaf Ebrahimi self.log.debug(" {:4} Exported Symbols".format(len(symbols))) 296*62c56f98SSadaf Ebrahimi return { 297*62c56f98SSadaf Ebrahimi "public_macros": actual_macros["public"], 298*62c56f98SSadaf Ebrahimi "internal_macros": actual_macros["internal"], 299*62c56f98SSadaf Ebrahimi "private_macros": all_macros["private"], 300*62c56f98SSadaf Ebrahimi "enum_consts": enum_consts, 301*62c56f98SSadaf Ebrahimi "identifiers": identifiers, 302*62c56f98SSadaf Ebrahimi "excluded_identifiers": excluded_identifiers, 303*62c56f98SSadaf Ebrahimi "symbols": symbols, 304*62c56f98SSadaf Ebrahimi "mbed_psa_words": mbed_psa_words 305*62c56f98SSadaf Ebrahimi } 306*62c56f98SSadaf Ebrahimi 307*62c56f98SSadaf Ebrahimi def is_file_excluded(self, path, exclude_wildcards): 308*62c56f98SSadaf Ebrahimi """Whether the given file path is excluded.""" 309*62c56f98SSadaf Ebrahimi # exclude_wildcards may be None. Also, consider the global exclusions. 310*62c56f98SSadaf Ebrahimi exclude_wildcards = (exclude_wildcards or []) + self.excluded_files 311*62c56f98SSadaf Ebrahimi for pattern in exclude_wildcards: 312*62c56f98SSadaf Ebrahimi if fnmatch.fnmatch(path, pattern): 313*62c56f98SSadaf Ebrahimi return True 314*62c56f98SSadaf Ebrahimi return False 315*62c56f98SSadaf Ebrahimi 316*62c56f98SSadaf Ebrahimi def get_all_files(self, include_wildcards, exclude_wildcards): 317*62c56f98SSadaf Ebrahimi """ 318*62c56f98SSadaf Ebrahimi Get all files that match any of the included UNIX-style wildcards 319*62c56f98SSadaf Ebrahimi and filter them into included and excluded lists. 320*62c56f98SSadaf Ebrahimi While the check_names script is designed only for use on UNIX/macOS 321*62c56f98SSadaf Ebrahimi (due to nm), this function alone will work fine on Windows even with 322*62c56f98SSadaf Ebrahimi forward slashes in the wildcard. 323*62c56f98SSadaf Ebrahimi 324*62c56f98SSadaf Ebrahimi Args: 325*62c56f98SSadaf Ebrahimi * include_wildcards: a List of shell-style wildcards to match filepaths. 326*62c56f98SSadaf Ebrahimi * exclude_wildcards: a List of shell-style wildcards to exclude. 327*62c56f98SSadaf Ebrahimi 328*62c56f98SSadaf Ebrahimi Returns: 329*62c56f98SSadaf Ebrahimi * inc_files: A List of relative filepaths for included files. 330*62c56f98SSadaf Ebrahimi * exc_files: A List of relative filepaths for excluded files. 331*62c56f98SSadaf Ebrahimi """ 332*62c56f98SSadaf Ebrahimi accumulator = set() 333*62c56f98SSadaf Ebrahimi all_wildcards = include_wildcards + (exclude_wildcards or []) 334*62c56f98SSadaf Ebrahimi for wildcard in all_wildcards: 335*62c56f98SSadaf Ebrahimi accumulator = accumulator.union(glob.iglob(wildcard)) 336*62c56f98SSadaf Ebrahimi 337*62c56f98SSadaf Ebrahimi inc_files = [] 338*62c56f98SSadaf Ebrahimi exc_files = [] 339*62c56f98SSadaf Ebrahimi for path in accumulator: 340*62c56f98SSadaf Ebrahimi if self.is_file_excluded(path, exclude_wildcards): 341*62c56f98SSadaf Ebrahimi exc_files.append(path) 342*62c56f98SSadaf Ebrahimi else: 343*62c56f98SSadaf Ebrahimi inc_files.append(path) 344*62c56f98SSadaf Ebrahimi return (inc_files, exc_files) 345*62c56f98SSadaf Ebrahimi 346*62c56f98SSadaf Ebrahimi def get_included_files(self, include_wildcards, exclude_wildcards): 347*62c56f98SSadaf Ebrahimi """ 348*62c56f98SSadaf Ebrahimi Get all files that match any of the included UNIX-style wildcards. 349*62c56f98SSadaf Ebrahimi While the check_names script is designed only for use on UNIX/macOS 350*62c56f98SSadaf Ebrahimi (due to nm), this function alone will work fine on Windows even with 351*62c56f98SSadaf Ebrahimi forward slashes in the wildcard. 352*62c56f98SSadaf Ebrahimi 353*62c56f98SSadaf Ebrahimi Args: 354*62c56f98SSadaf Ebrahimi * include_wildcards: a List of shell-style wildcards to match filepaths. 355*62c56f98SSadaf Ebrahimi * exclude_wildcards: a List of shell-style wildcards to exclude. 356*62c56f98SSadaf Ebrahimi 357*62c56f98SSadaf Ebrahimi Returns a List of relative filepaths. 358*62c56f98SSadaf Ebrahimi """ 359*62c56f98SSadaf Ebrahimi accumulator = set() 360*62c56f98SSadaf Ebrahimi 361*62c56f98SSadaf Ebrahimi for include_wildcard in include_wildcards: 362*62c56f98SSadaf Ebrahimi accumulator = accumulator.union(glob.iglob(include_wildcard)) 363*62c56f98SSadaf Ebrahimi 364*62c56f98SSadaf Ebrahimi return list(path for path in accumulator 365*62c56f98SSadaf Ebrahimi if not self.is_file_excluded(path, exclude_wildcards)) 366*62c56f98SSadaf Ebrahimi 367*62c56f98SSadaf Ebrahimi def parse_macros(self, include, exclude=None): 368*62c56f98SSadaf Ebrahimi """ 369*62c56f98SSadaf Ebrahimi Parse all macros defined by #define preprocessor directives. 370*62c56f98SSadaf Ebrahimi 371*62c56f98SSadaf Ebrahimi Args: 372*62c56f98SSadaf Ebrahimi * include: A List of glob expressions to look for files through. 373*62c56f98SSadaf Ebrahimi * exclude: A List of glob expressions for excluding files. 374*62c56f98SSadaf Ebrahimi 375*62c56f98SSadaf Ebrahimi Returns a List of Match objects for the found macros. 376*62c56f98SSadaf Ebrahimi """ 377*62c56f98SSadaf Ebrahimi macro_regex = re.compile(r"# *define +(?P<macro>\w+)") 378*62c56f98SSadaf Ebrahimi exclusions = ( 379*62c56f98SSadaf Ebrahimi "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" 380*62c56f98SSadaf Ebrahimi ) 381*62c56f98SSadaf Ebrahimi 382*62c56f98SSadaf Ebrahimi files = self.get_included_files(include, exclude) 383*62c56f98SSadaf Ebrahimi self.log.debug("Looking for macros in {} files".format(len(files))) 384*62c56f98SSadaf Ebrahimi 385*62c56f98SSadaf Ebrahimi macros = [] 386*62c56f98SSadaf Ebrahimi for header_file in files: 387*62c56f98SSadaf Ebrahimi with open(header_file, "r", encoding="utf-8") as header: 388*62c56f98SSadaf Ebrahimi for line_no, line in enumerate(header): 389*62c56f98SSadaf Ebrahimi for macro in macro_regex.finditer(line): 390*62c56f98SSadaf Ebrahimi if macro.group("macro").startswith(exclusions): 391*62c56f98SSadaf Ebrahimi continue 392*62c56f98SSadaf Ebrahimi 393*62c56f98SSadaf Ebrahimi macros.append(Match( 394*62c56f98SSadaf Ebrahimi header_file, 395*62c56f98SSadaf Ebrahimi line, 396*62c56f98SSadaf Ebrahimi line_no, 397*62c56f98SSadaf Ebrahimi macro.span("macro"), 398*62c56f98SSadaf Ebrahimi macro.group("macro"))) 399*62c56f98SSadaf Ebrahimi 400*62c56f98SSadaf Ebrahimi return macros 401*62c56f98SSadaf Ebrahimi 402*62c56f98SSadaf Ebrahimi def parse_mbed_psa_words(self, include, exclude=None): 403*62c56f98SSadaf Ebrahimi """ 404*62c56f98SSadaf Ebrahimi Parse all words in the file that begin with MBED|PSA, in and out of 405*62c56f98SSadaf Ebrahimi macros, comments, anything. 406*62c56f98SSadaf Ebrahimi 407*62c56f98SSadaf Ebrahimi Args: 408*62c56f98SSadaf Ebrahimi * include: A List of glob expressions to look for files through. 409*62c56f98SSadaf Ebrahimi * exclude: A List of glob expressions for excluding files. 410*62c56f98SSadaf Ebrahimi 411*62c56f98SSadaf Ebrahimi Returns a List of Match objects for words beginning with MBED|PSA. 412*62c56f98SSadaf Ebrahimi """ 413*62c56f98SSadaf Ebrahimi # Typos of TLS are common, hence the broader check below than MBEDTLS. 414*62c56f98SSadaf Ebrahimi mbed_regex = re.compile(r"\b(MBED.+?|PSA)_[A-Z0-9_]*") 415*62c56f98SSadaf Ebrahimi exclusions = re.compile(r"// *no-check-names|#error") 416*62c56f98SSadaf Ebrahimi 417*62c56f98SSadaf Ebrahimi files = self.get_included_files(include, exclude) 418*62c56f98SSadaf Ebrahimi self.log.debug( 419*62c56f98SSadaf Ebrahimi "Looking for MBED|PSA words in {} files" 420*62c56f98SSadaf Ebrahimi .format(len(files)) 421*62c56f98SSadaf Ebrahimi ) 422*62c56f98SSadaf Ebrahimi 423*62c56f98SSadaf Ebrahimi mbed_psa_words = [] 424*62c56f98SSadaf Ebrahimi for filename in files: 425*62c56f98SSadaf Ebrahimi with open(filename, "r", encoding="utf-8") as fp: 426*62c56f98SSadaf Ebrahimi for line_no, line in enumerate(fp): 427*62c56f98SSadaf Ebrahimi if exclusions.search(line): 428*62c56f98SSadaf Ebrahimi continue 429*62c56f98SSadaf Ebrahimi 430*62c56f98SSadaf Ebrahimi for name in mbed_regex.finditer(line): 431*62c56f98SSadaf Ebrahimi mbed_psa_words.append(Match( 432*62c56f98SSadaf Ebrahimi filename, 433*62c56f98SSadaf Ebrahimi line, 434*62c56f98SSadaf Ebrahimi line_no, 435*62c56f98SSadaf Ebrahimi name.span(0), 436*62c56f98SSadaf Ebrahimi name.group(0))) 437*62c56f98SSadaf Ebrahimi 438*62c56f98SSadaf Ebrahimi return mbed_psa_words 439*62c56f98SSadaf Ebrahimi 440*62c56f98SSadaf Ebrahimi def parse_enum_consts(self, include, exclude=None): 441*62c56f98SSadaf Ebrahimi """ 442*62c56f98SSadaf Ebrahimi Parse all enum value constants that are declared. 443*62c56f98SSadaf Ebrahimi 444*62c56f98SSadaf Ebrahimi Args: 445*62c56f98SSadaf Ebrahimi * include: A List of glob expressions to look for files through. 446*62c56f98SSadaf Ebrahimi * exclude: A List of glob expressions for excluding files. 447*62c56f98SSadaf Ebrahimi 448*62c56f98SSadaf Ebrahimi Returns a List of Match objects for the findings. 449*62c56f98SSadaf Ebrahimi """ 450*62c56f98SSadaf Ebrahimi files = self.get_included_files(include, exclude) 451*62c56f98SSadaf Ebrahimi self.log.debug("Looking for enum consts in {} files".format(len(files))) 452*62c56f98SSadaf Ebrahimi 453*62c56f98SSadaf Ebrahimi # Emulate a finite state machine to parse enum declarations. 454*62c56f98SSadaf Ebrahimi # OUTSIDE_KEYWORD = outside the enum keyword 455*62c56f98SSadaf Ebrahimi # IN_BRACES = inside enum opening braces 456*62c56f98SSadaf Ebrahimi # IN_BETWEEN = between enum keyword and opening braces 457*62c56f98SSadaf Ebrahimi states = enum.Enum("FSM", ["OUTSIDE_KEYWORD", "IN_BRACES", "IN_BETWEEN"]) 458*62c56f98SSadaf Ebrahimi enum_consts = [] 459*62c56f98SSadaf Ebrahimi for header_file in files: 460*62c56f98SSadaf Ebrahimi state = states.OUTSIDE_KEYWORD 461*62c56f98SSadaf Ebrahimi with open(header_file, "r", encoding="utf-8") as header: 462*62c56f98SSadaf Ebrahimi for line_no, line in enumerate(header): 463*62c56f98SSadaf Ebrahimi # Match typedefs and brackets only when they are at the 464*62c56f98SSadaf Ebrahimi # beginning of the line -- if they are indented, they might 465*62c56f98SSadaf Ebrahimi # be sub-structures within structs, etc. 466*62c56f98SSadaf Ebrahimi optional_c_identifier = r"([_a-zA-Z][_a-zA-Z0-9]*)?" 467*62c56f98SSadaf Ebrahimi if (state == states.OUTSIDE_KEYWORD and 468*62c56f98SSadaf Ebrahimi re.search(r"^(typedef +)?enum " + \ 469*62c56f98SSadaf Ebrahimi optional_c_identifier + \ 470*62c56f98SSadaf Ebrahimi r" *{", line)): 471*62c56f98SSadaf Ebrahimi state = states.IN_BRACES 472*62c56f98SSadaf Ebrahimi elif (state == states.OUTSIDE_KEYWORD and 473*62c56f98SSadaf Ebrahimi re.search(r"^(typedef +)?enum", line)): 474*62c56f98SSadaf Ebrahimi state = states.IN_BETWEEN 475*62c56f98SSadaf Ebrahimi elif (state == states.IN_BETWEEN and 476*62c56f98SSadaf Ebrahimi re.search(r"^{", line)): 477*62c56f98SSadaf Ebrahimi state = states.IN_BRACES 478*62c56f98SSadaf Ebrahimi elif (state == states.IN_BRACES and 479*62c56f98SSadaf Ebrahimi re.search(r"^}", line)): 480*62c56f98SSadaf Ebrahimi state = states.OUTSIDE_KEYWORD 481*62c56f98SSadaf Ebrahimi elif (state == states.IN_BRACES and 482*62c56f98SSadaf Ebrahimi not re.search(r"^ *#", line)): 483*62c56f98SSadaf Ebrahimi enum_const = re.search(r"^ *(?P<enum_const>\w+)", line) 484*62c56f98SSadaf Ebrahimi if not enum_const: 485*62c56f98SSadaf Ebrahimi continue 486*62c56f98SSadaf Ebrahimi 487*62c56f98SSadaf Ebrahimi enum_consts.append(Match( 488*62c56f98SSadaf Ebrahimi header_file, 489*62c56f98SSadaf Ebrahimi line, 490*62c56f98SSadaf Ebrahimi line_no, 491*62c56f98SSadaf Ebrahimi enum_const.span("enum_const"), 492*62c56f98SSadaf Ebrahimi enum_const.group("enum_const"))) 493*62c56f98SSadaf Ebrahimi 494*62c56f98SSadaf Ebrahimi return enum_consts 495*62c56f98SSadaf Ebrahimi 496*62c56f98SSadaf Ebrahimi IGNORED_CHUNK_REGEX = re.compile('|'.join([ 497*62c56f98SSadaf Ebrahimi r'/\*.*?\*/', # block comment entirely on one line 498*62c56f98SSadaf Ebrahimi r'//.*', # line comment 499*62c56f98SSadaf Ebrahimi r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal 500*62c56f98SSadaf Ebrahimi ])) 501*62c56f98SSadaf Ebrahimi 502*62c56f98SSadaf Ebrahimi def strip_comments_and_literals(self, line, in_block_comment): 503*62c56f98SSadaf Ebrahimi """Strip comments and string literals from line. 504*62c56f98SSadaf Ebrahimi 505*62c56f98SSadaf Ebrahimi Continuation lines are not supported. 506*62c56f98SSadaf Ebrahimi 507*62c56f98SSadaf Ebrahimi If in_block_comment is true, assume that the line starts inside a 508*62c56f98SSadaf Ebrahimi block comment. 509*62c56f98SSadaf Ebrahimi 510*62c56f98SSadaf Ebrahimi Return updated values of (line, in_block_comment) where: 511*62c56f98SSadaf Ebrahimi * Comments in line have been replaced by a space (or nothing at the 512*62c56f98SSadaf Ebrahimi start or end of the line). 513*62c56f98SSadaf Ebrahimi * String contents have been removed. 514*62c56f98SSadaf Ebrahimi * in_block_comment indicates whether the line ends inside a block 515*62c56f98SSadaf Ebrahimi comment that continues on the next line. 516*62c56f98SSadaf Ebrahimi """ 517*62c56f98SSadaf Ebrahimi 518*62c56f98SSadaf Ebrahimi # Terminate current multiline comment? 519*62c56f98SSadaf Ebrahimi if in_block_comment: 520*62c56f98SSadaf Ebrahimi m = re.search(r"\*/", line) 521*62c56f98SSadaf Ebrahimi if m: 522*62c56f98SSadaf Ebrahimi in_block_comment = False 523*62c56f98SSadaf Ebrahimi line = line[m.end(0):] 524*62c56f98SSadaf Ebrahimi else: 525*62c56f98SSadaf Ebrahimi return '', True 526*62c56f98SSadaf Ebrahimi 527*62c56f98SSadaf Ebrahimi # Remove full comments and string literals. 528*62c56f98SSadaf Ebrahimi # Do it all together to handle cases like "/*" correctly. 529*62c56f98SSadaf Ebrahimi # Note that continuation lines are not supported. 530*62c56f98SSadaf Ebrahimi line = re.sub(self.IGNORED_CHUNK_REGEX, 531*62c56f98SSadaf Ebrahimi lambda s: '""' if s.group('string') else ' ', 532*62c56f98SSadaf Ebrahimi line) 533*62c56f98SSadaf Ebrahimi 534*62c56f98SSadaf Ebrahimi # Start an unfinished comment? 535*62c56f98SSadaf Ebrahimi # (If `/*` was part of a complete comment, it's already been removed.) 536*62c56f98SSadaf Ebrahimi m = re.search(r"/\*", line) 537*62c56f98SSadaf Ebrahimi if m: 538*62c56f98SSadaf Ebrahimi in_block_comment = True 539*62c56f98SSadaf Ebrahimi line = line[:m.start(0)] 540*62c56f98SSadaf Ebrahimi 541*62c56f98SSadaf Ebrahimi return line, in_block_comment 542*62c56f98SSadaf Ebrahimi 543*62c56f98SSadaf Ebrahimi IDENTIFIER_REGEX = re.compile('|'.join([ 544*62c56f98SSadaf Ebrahimi # Match " something(a" or " *something(a". Functions. 545*62c56f98SSadaf Ebrahimi # Assumptions: 546*62c56f98SSadaf Ebrahimi # - function definition from return type to one of its arguments is 547*62c56f98SSadaf Ebrahimi # all on one line 548*62c56f98SSadaf Ebrahimi # - function definition line only contains alphanumeric, asterisk, 549*62c56f98SSadaf Ebrahimi # underscore, and open bracket 550*62c56f98SSadaf Ebrahimi r".* \**(\w+) *\( *\w", 551*62c56f98SSadaf Ebrahimi # Match "(*something)(". 552*62c56f98SSadaf Ebrahimi r".*\( *\* *(\w+) *\) *\(", 553*62c56f98SSadaf Ebrahimi # Match names of named data structures. 554*62c56f98SSadaf Ebrahimi r"(?:typedef +)?(?:struct|union|enum) +(\w+)(?: *{)?$", 555*62c56f98SSadaf Ebrahimi # Match names of typedef instances, after closing bracket. 556*62c56f98SSadaf Ebrahimi r"}? *(\w+)[;[].*", 557*62c56f98SSadaf Ebrahimi ])) 558*62c56f98SSadaf Ebrahimi # The regex below is indented for clarity. 559*62c56f98SSadaf Ebrahimi EXCLUSION_LINES = re.compile("|".join([ 560*62c56f98SSadaf Ebrahimi r"extern +\"C\"", 561*62c56f98SSadaf Ebrahimi r"(typedef +)?(struct|union|enum)( *{)?$", 562*62c56f98SSadaf Ebrahimi r"} *;?$", 563*62c56f98SSadaf Ebrahimi r"$", 564*62c56f98SSadaf Ebrahimi r"//", 565*62c56f98SSadaf Ebrahimi r"#", 566*62c56f98SSadaf Ebrahimi ])) 567*62c56f98SSadaf Ebrahimi 568*62c56f98SSadaf Ebrahimi def parse_identifiers_in_file(self, header_file, identifiers): 569*62c56f98SSadaf Ebrahimi """ 570*62c56f98SSadaf Ebrahimi Parse all lines of a header where a function/enum/struct/union/typedef 571*62c56f98SSadaf Ebrahimi identifier is declared, based on some regex and heuristics. Highly 572*62c56f98SSadaf Ebrahimi dependent on formatting style. 573*62c56f98SSadaf Ebrahimi 574*62c56f98SSadaf Ebrahimi Append found matches to the list ``identifiers``. 575*62c56f98SSadaf Ebrahimi """ 576*62c56f98SSadaf Ebrahimi 577*62c56f98SSadaf Ebrahimi with open(header_file, "r", encoding="utf-8") as header: 578*62c56f98SSadaf Ebrahimi in_block_comment = False 579*62c56f98SSadaf Ebrahimi # The previous line variable is used for concatenating lines 580*62c56f98SSadaf Ebrahimi # when identifiers are formatted and spread across multiple 581*62c56f98SSadaf Ebrahimi # lines. 582*62c56f98SSadaf Ebrahimi previous_line = "" 583*62c56f98SSadaf Ebrahimi 584*62c56f98SSadaf Ebrahimi for line_no, line in enumerate(header): 585*62c56f98SSadaf Ebrahimi line, in_block_comment = \ 586*62c56f98SSadaf Ebrahimi self.strip_comments_and_literals(line, in_block_comment) 587*62c56f98SSadaf Ebrahimi 588*62c56f98SSadaf Ebrahimi if self.EXCLUSION_LINES.match(line): 589*62c56f98SSadaf Ebrahimi previous_line = "" 590*62c56f98SSadaf Ebrahimi continue 591*62c56f98SSadaf Ebrahimi 592*62c56f98SSadaf Ebrahimi # If the line contains only space-separated alphanumeric 593*62c56f98SSadaf Ebrahimi # characters (or underscore, asterisk, or open parenthesis), 594*62c56f98SSadaf Ebrahimi # and nothing else, high chance it's a declaration that 595*62c56f98SSadaf Ebrahimi # continues on the next line 596*62c56f98SSadaf Ebrahimi if re.search(r"^([\w\*\(]+\s+)+$", line): 597*62c56f98SSadaf Ebrahimi previous_line += line 598*62c56f98SSadaf Ebrahimi continue 599*62c56f98SSadaf Ebrahimi 600*62c56f98SSadaf Ebrahimi # If previous line seemed to start an unfinished declaration 601*62c56f98SSadaf Ebrahimi # (as above), concat and treat them as one. 602*62c56f98SSadaf Ebrahimi if previous_line: 603*62c56f98SSadaf Ebrahimi line = previous_line.strip() + " " + line.strip() + "\n" 604*62c56f98SSadaf Ebrahimi previous_line = "" 605*62c56f98SSadaf Ebrahimi 606*62c56f98SSadaf Ebrahimi # Skip parsing if line has a space in front = heuristic to 607*62c56f98SSadaf Ebrahimi # skip function argument lines (highly subject to formatting 608*62c56f98SSadaf Ebrahimi # changes) 609*62c56f98SSadaf Ebrahimi if line[0] == " ": 610*62c56f98SSadaf Ebrahimi continue 611*62c56f98SSadaf Ebrahimi 612*62c56f98SSadaf Ebrahimi identifier = self.IDENTIFIER_REGEX.search(line) 613*62c56f98SSadaf Ebrahimi 614*62c56f98SSadaf Ebrahimi if not identifier: 615*62c56f98SSadaf Ebrahimi continue 616*62c56f98SSadaf Ebrahimi 617*62c56f98SSadaf Ebrahimi # Find the group that matched, and append it 618*62c56f98SSadaf Ebrahimi for group in identifier.groups(): 619*62c56f98SSadaf Ebrahimi if not group: 620*62c56f98SSadaf Ebrahimi continue 621*62c56f98SSadaf Ebrahimi 622*62c56f98SSadaf Ebrahimi identifiers.append(Match( 623*62c56f98SSadaf Ebrahimi header_file, 624*62c56f98SSadaf Ebrahimi line, 625*62c56f98SSadaf Ebrahimi line_no, 626*62c56f98SSadaf Ebrahimi identifier.span(), 627*62c56f98SSadaf Ebrahimi group)) 628*62c56f98SSadaf Ebrahimi 629*62c56f98SSadaf Ebrahimi def parse_identifiers(self, include, exclude=None): 630*62c56f98SSadaf Ebrahimi """ 631*62c56f98SSadaf Ebrahimi Parse all lines of a header where a function/enum/struct/union/typedef 632*62c56f98SSadaf Ebrahimi identifier is declared, based on some regex and heuristics. Highly 633*62c56f98SSadaf Ebrahimi dependent on formatting style. Identifiers in excluded files are still 634*62c56f98SSadaf Ebrahimi parsed 635*62c56f98SSadaf Ebrahimi 636*62c56f98SSadaf Ebrahimi Args: 637*62c56f98SSadaf Ebrahimi * include: A List of glob expressions to look for files through. 638*62c56f98SSadaf Ebrahimi * exclude: A List of glob expressions for excluding files. 639*62c56f98SSadaf Ebrahimi 640*62c56f98SSadaf Ebrahimi Returns: a Tuple of two Lists of Match objects with identifiers. 641*62c56f98SSadaf Ebrahimi * included_identifiers: A List of Match objects with identifiers from 642*62c56f98SSadaf Ebrahimi included files. 643*62c56f98SSadaf Ebrahimi * excluded_identifiers: A List of Match objects with identifiers from 644*62c56f98SSadaf Ebrahimi excluded files. 645*62c56f98SSadaf Ebrahimi """ 646*62c56f98SSadaf Ebrahimi 647*62c56f98SSadaf Ebrahimi included_files, excluded_files = \ 648*62c56f98SSadaf Ebrahimi self.get_all_files(include, exclude) 649*62c56f98SSadaf Ebrahimi 650*62c56f98SSadaf Ebrahimi self.log.debug("Looking for included identifiers in {} files".format \ 651*62c56f98SSadaf Ebrahimi (len(included_files))) 652*62c56f98SSadaf Ebrahimi 653*62c56f98SSadaf Ebrahimi included_identifiers = [] 654*62c56f98SSadaf Ebrahimi excluded_identifiers = [] 655*62c56f98SSadaf Ebrahimi for header_file in included_files: 656*62c56f98SSadaf Ebrahimi self.parse_identifiers_in_file(header_file, included_identifiers) 657*62c56f98SSadaf Ebrahimi for header_file in excluded_files: 658*62c56f98SSadaf Ebrahimi self.parse_identifiers_in_file(header_file, excluded_identifiers) 659*62c56f98SSadaf Ebrahimi 660*62c56f98SSadaf Ebrahimi return (included_identifiers, excluded_identifiers) 661*62c56f98SSadaf Ebrahimi 662*62c56f98SSadaf Ebrahimi def parse_symbols(self): 663*62c56f98SSadaf Ebrahimi """ 664*62c56f98SSadaf Ebrahimi Compile the Mbed TLS libraries, and parse the TLS, Crypto, and x509 665*62c56f98SSadaf Ebrahimi object files using nm to retrieve the list of referenced symbols. 666*62c56f98SSadaf Ebrahimi Exceptions thrown here are rethrown because they would be critical 667*62c56f98SSadaf Ebrahimi errors that void several tests, and thus needs to halt the program. This 668*62c56f98SSadaf Ebrahimi is explicitly done for clarity. 669*62c56f98SSadaf Ebrahimi 670*62c56f98SSadaf Ebrahimi Returns a List of unique symbols defined and used in the libraries. 671*62c56f98SSadaf Ebrahimi """ 672*62c56f98SSadaf Ebrahimi self.log.info("Compiling...") 673*62c56f98SSadaf Ebrahimi symbols = [] 674*62c56f98SSadaf Ebrahimi 675*62c56f98SSadaf Ebrahimi # Back up the config and atomically compile with the full configuration. 676*62c56f98SSadaf Ebrahimi shutil.copy( 677*62c56f98SSadaf Ebrahimi "include/mbedtls/mbedtls_config.h", 678*62c56f98SSadaf Ebrahimi "include/mbedtls/mbedtls_config.h.bak" 679*62c56f98SSadaf Ebrahimi ) 680*62c56f98SSadaf Ebrahimi try: 681*62c56f98SSadaf Ebrahimi # Use check=True in all subprocess calls so that failures are raised 682*62c56f98SSadaf Ebrahimi # as exceptions and logged. 683*62c56f98SSadaf Ebrahimi subprocess.run( 684*62c56f98SSadaf Ebrahimi ["python3", "scripts/config.py", "full"], 685*62c56f98SSadaf Ebrahimi universal_newlines=True, 686*62c56f98SSadaf Ebrahimi check=True 687*62c56f98SSadaf Ebrahimi ) 688*62c56f98SSadaf Ebrahimi my_environment = os.environ.copy() 689*62c56f98SSadaf Ebrahimi my_environment["CFLAGS"] = "-fno-asynchronous-unwind-tables" 690*62c56f98SSadaf Ebrahimi # Run make clean separately to lib to prevent unwanted behavior when 691*62c56f98SSadaf Ebrahimi # make is invoked with parallelism. 692*62c56f98SSadaf Ebrahimi subprocess.run( 693*62c56f98SSadaf Ebrahimi ["make", "clean"], 694*62c56f98SSadaf Ebrahimi universal_newlines=True, 695*62c56f98SSadaf Ebrahimi check=True 696*62c56f98SSadaf Ebrahimi ) 697*62c56f98SSadaf Ebrahimi subprocess.run( 698*62c56f98SSadaf Ebrahimi ["make", "lib"], 699*62c56f98SSadaf Ebrahimi env=my_environment, 700*62c56f98SSadaf Ebrahimi universal_newlines=True, 701*62c56f98SSadaf Ebrahimi stdout=subprocess.PIPE, 702*62c56f98SSadaf Ebrahimi stderr=subprocess.STDOUT, 703*62c56f98SSadaf Ebrahimi check=True 704*62c56f98SSadaf Ebrahimi ) 705*62c56f98SSadaf Ebrahimi 706*62c56f98SSadaf Ebrahimi # Perform object file analysis using nm 707*62c56f98SSadaf Ebrahimi symbols = self.parse_symbols_from_nm([ 708*62c56f98SSadaf Ebrahimi "library/libmbedcrypto.a", 709*62c56f98SSadaf Ebrahimi "library/libmbedtls.a", 710*62c56f98SSadaf Ebrahimi "library/libmbedx509.a" 711*62c56f98SSadaf Ebrahimi ]) 712*62c56f98SSadaf Ebrahimi 713*62c56f98SSadaf Ebrahimi subprocess.run( 714*62c56f98SSadaf Ebrahimi ["make", "clean"], 715*62c56f98SSadaf Ebrahimi universal_newlines=True, 716*62c56f98SSadaf Ebrahimi check=True 717*62c56f98SSadaf Ebrahimi ) 718*62c56f98SSadaf Ebrahimi except subprocess.CalledProcessError as error: 719*62c56f98SSadaf Ebrahimi self.log.debug(error.output) 720*62c56f98SSadaf Ebrahimi raise error 721*62c56f98SSadaf Ebrahimi finally: 722*62c56f98SSadaf Ebrahimi # Put back the original config regardless of there being errors. 723*62c56f98SSadaf Ebrahimi # Works also for keyboard interrupts. 724*62c56f98SSadaf Ebrahimi shutil.move( 725*62c56f98SSadaf Ebrahimi "include/mbedtls/mbedtls_config.h.bak", 726*62c56f98SSadaf Ebrahimi "include/mbedtls/mbedtls_config.h" 727*62c56f98SSadaf Ebrahimi ) 728*62c56f98SSadaf Ebrahimi 729*62c56f98SSadaf Ebrahimi return symbols 730*62c56f98SSadaf Ebrahimi 731*62c56f98SSadaf Ebrahimi def parse_symbols_from_nm(self, object_files): 732*62c56f98SSadaf Ebrahimi """ 733*62c56f98SSadaf Ebrahimi Run nm to retrieve the list of referenced symbols in each object file. 734*62c56f98SSadaf Ebrahimi Does not return the position data since it is of no use. 735*62c56f98SSadaf Ebrahimi 736*62c56f98SSadaf Ebrahimi Args: 737*62c56f98SSadaf Ebrahimi * object_files: a List of compiled object filepaths to search through. 738*62c56f98SSadaf Ebrahimi 739*62c56f98SSadaf Ebrahimi Returns a List of unique symbols defined and used in any of the object 740*62c56f98SSadaf Ebrahimi files. 741*62c56f98SSadaf Ebrahimi """ 742*62c56f98SSadaf Ebrahimi nm_undefined_regex = re.compile(r"^\S+: +U |^$|^\S+:$") 743*62c56f98SSadaf Ebrahimi nm_valid_regex = re.compile(r"^\S+( [0-9A-Fa-f]+)* . _*(?P<symbol>\w+)") 744*62c56f98SSadaf Ebrahimi exclusions = ("FStar", "Hacl") 745*62c56f98SSadaf Ebrahimi 746*62c56f98SSadaf Ebrahimi symbols = [] 747*62c56f98SSadaf Ebrahimi 748*62c56f98SSadaf Ebrahimi # Gather all outputs of nm 749*62c56f98SSadaf Ebrahimi nm_output = "" 750*62c56f98SSadaf Ebrahimi for lib in object_files: 751*62c56f98SSadaf Ebrahimi nm_output += subprocess.run( 752*62c56f98SSadaf Ebrahimi ["nm", "-og", lib], 753*62c56f98SSadaf Ebrahimi universal_newlines=True, 754*62c56f98SSadaf Ebrahimi stdout=subprocess.PIPE, 755*62c56f98SSadaf Ebrahimi stderr=subprocess.STDOUT, 756*62c56f98SSadaf Ebrahimi check=True 757*62c56f98SSadaf Ebrahimi ).stdout 758*62c56f98SSadaf Ebrahimi 759*62c56f98SSadaf Ebrahimi for line in nm_output.splitlines(): 760*62c56f98SSadaf Ebrahimi if not nm_undefined_regex.search(line): 761*62c56f98SSadaf Ebrahimi symbol = nm_valid_regex.search(line) 762*62c56f98SSadaf Ebrahimi if (symbol and not symbol.group("symbol").startswith(exclusions)): 763*62c56f98SSadaf Ebrahimi symbols.append(symbol.group("symbol")) 764*62c56f98SSadaf Ebrahimi else: 765*62c56f98SSadaf Ebrahimi self.log.error(line) 766*62c56f98SSadaf Ebrahimi 767*62c56f98SSadaf Ebrahimi return symbols 768*62c56f98SSadaf Ebrahimi 769*62c56f98SSadaf Ebrahimiclass NameChecker(): 770*62c56f98SSadaf Ebrahimi """ 771*62c56f98SSadaf Ebrahimi Representation of the core name checking operation performed by this script. 772*62c56f98SSadaf Ebrahimi """ 773*62c56f98SSadaf Ebrahimi def __init__(self, parse_result, log): 774*62c56f98SSadaf Ebrahimi self.parse_result = parse_result 775*62c56f98SSadaf Ebrahimi self.log = log 776*62c56f98SSadaf Ebrahimi 777*62c56f98SSadaf Ebrahimi def perform_checks(self, quiet=False): 778*62c56f98SSadaf Ebrahimi """ 779*62c56f98SSadaf Ebrahimi A comprehensive checker that performs each check in order, and outputs 780*62c56f98SSadaf Ebrahimi a final verdict. 781*62c56f98SSadaf Ebrahimi 782*62c56f98SSadaf Ebrahimi Args: 783*62c56f98SSadaf Ebrahimi * quiet: whether to hide detailed problem explanation. 784*62c56f98SSadaf Ebrahimi """ 785*62c56f98SSadaf Ebrahimi self.log.info("=============") 786*62c56f98SSadaf Ebrahimi Problem.quiet = quiet 787*62c56f98SSadaf Ebrahimi problems = 0 788*62c56f98SSadaf Ebrahimi problems += self.check_symbols_declared_in_header() 789*62c56f98SSadaf Ebrahimi 790*62c56f98SSadaf Ebrahimi pattern_checks = [ 791*62c56f98SSadaf Ebrahimi ("public_macros", PUBLIC_MACRO_PATTERN), 792*62c56f98SSadaf Ebrahimi ("internal_macros", INTERNAL_MACRO_PATTERN), 793*62c56f98SSadaf Ebrahimi ("enum_consts", CONSTANTS_PATTERN), 794*62c56f98SSadaf Ebrahimi ("identifiers", IDENTIFIER_PATTERN) 795*62c56f98SSadaf Ebrahimi ] 796*62c56f98SSadaf Ebrahimi for group, check_pattern in pattern_checks: 797*62c56f98SSadaf Ebrahimi problems += self.check_match_pattern(group, check_pattern) 798*62c56f98SSadaf Ebrahimi 799*62c56f98SSadaf Ebrahimi problems += self.check_for_typos() 800*62c56f98SSadaf Ebrahimi 801*62c56f98SSadaf Ebrahimi self.log.info("=============") 802*62c56f98SSadaf Ebrahimi if problems > 0: 803*62c56f98SSadaf Ebrahimi self.log.info("FAIL: {0} problem(s) to fix".format(str(problems))) 804*62c56f98SSadaf Ebrahimi if quiet: 805*62c56f98SSadaf Ebrahimi self.log.info("Remove --quiet to see explanations.") 806*62c56f98SSadaf Ebrahimi else: 807*62c56f98SSadaf Ebrahimi self.log.info("Use --quiet for minimal output.") 808*62c56f98SSadaf Ebrahimi return 1 809*62c56f98SSadaf Ebrahimi else: 810*62c56f98SSadaf Ebrahimi self.log.info("PASS") 811*62c56f98SSadaf Ebrahimi return 0 812*62c56f98SSadaf Ebrahimi 813*62c56f98SSadaf Ebrahimi def check_symbols_declared_in_header(self): 814*62c56f98SSadaf Ebrahimi """ 815*62c56f98SSadaf Ebrahimi Perform a check that all detected symbols in the library object files 816*62c56f98SSadaf Ebrahimi are properly declared in headers. 817*62c56f98SSadaf Ebrahimi Assumes parse_names_in_source() was called before this. 818*62c56f98SSadaf Ebrahimi 819*62c56f98SSadaf Ebrahimi Returns the number of problems that need fixing. 820*62c56f98SSadaf Ebrahimi """ 821*62c56f98SSadaf Ebrahimi problems = [] 822*62c56f98SSadaf Ebrahimi all_identifiers = self.parse_result["identifiers"] + \ 823*62c56f98SSadaf Ebrahimi self.parse_result["excluded_identifiers"] 824*62c56f98SSadaf Ebrahimi 825*62c56f98SSadaf Ebrahimi for symbol in self.parse_result["symbols"]: 826*62c56f98SSadaf Ebrahimi found_symbol_declared = False 827*62c56f98SSadaf Ebrahimi for identifier_match in all_identifiers: 828*62c56f98SSadaf Ebrahimi if symbol == identifier_match.name: 829*62c56f98SSadaf Ebrahimi found_symbol_declared = True 830*62c56f98SSadaf Ebrahimi break 831*62c56f98SSadaf Ebrahimi 832*62c56f98SSadaf Ebrahimi if not found_symbol_declared: 833*62c56f98SSadaf Ebrahimi problems.append(SymbolNotInHeader(symbol)) 834*62c56f98SSadaf Ebrahimi 835*62c56f98SSadaf Ebrahimi self.output_check_result("All symbols in header", problems) 836*62c56f98SSadaf Ebrahimi return len(problems) 837*62c56f98SSadaf Ebrahimi 838*62c56f98SSadaf Ebrahimi def check_match_pattern(self, group_to_check, check_pattern): 839*62c56f98SSadaf Ebrahimi """ 840*62c56f98SSadaf Ebrahimi Perform a check that all items of a group conform to a regex pattern. 841*62c56f98SSadaf Ebrahimi Assumes parse_names_in_source() was called before this. 842*62c56f98SSadaf Ebrahimi 843*62c56f98SSadaf Ebrahimi Args: 844*62c56f98SSadaf Ebrahimi * group_to_check: string key to index into self.parse_result. 845*62c56f98SSadaf Ebrahimi * check_pattern: the regex to check against. 846*62c56f98SSadaf Ebrahimi 847*62c56f98SSadaf Ebrahimi Returns the number of problems that need fixing. 848*62c56f98SSadaf Ebrahimi """ 849*62c56f98SSadaf Ebrahimi problems = [] 850*62c56f98SSadaf Ebrahimi 851*62c56f98SSadaf Ebrahimi for item_match in self.parse_result[group_to_check]: 852*62c56f98SSadaf Ebrahimi if not re.search(check_pattern, item_match.name): 853*62c56f98SSadaf Ebrahimi problems.append(PatternMismatch(check_pattern, item_match)) 854*62c56f98SSadaf Ebrahimi # Double underscore should not be used for names 855*62c56f98SSadaf Ebrahimi if re.search(r".*__.*", item_match.name): 856*62c56f98SSadaf Ebrahimi problems.append( 857*62c56f98SSadaf Ebrahimi PatternMismatch("no double underscore allowed", item_match)) 858*62c56f98SSadaf Ebrahimi 859*62c56f98SSadaf Ebrahimi self.output_check_result( 860*62c56f98SSadaf Ebrahimi "Naming patterns of {}".format(group_to_check), 861*62c56f98SSadaf Ebrahimi problems) 862*62c56f98SSadaf Ebrahimi return len(problems) 863*62c56f98SSadaf Ebrahimi 864*62c56f98SSadaf Ebrahimi def check_for_typos(self): 865*62c56f98SSadaf Ebrahimi """ 866*62c56f98SSadaf Ebrahimi Perform a check that all words in the source code beginning with MBED are 867*62c56f98SSadaf Ebrahimi either defined as macros, or as enum constants. 868*62c56f98SSadaf Ebrahimi Assumes parse_names_in_source() was called before this. 869*62c56f98SSadaf Ebrahimi 870*62c56f98SSadaf Ebrahimi Returns the number of problems that need fixing. 871*62c56f98SSadaf Ebrahimi """ 872*62c56f98SSadaf Ebrahimi problems = [] 873*62c56f98SSadaf Ebrahimi 874*62c56f98SSadaf Ebrahimi # Set comprehension, equivalent to a list comprehension wrapped by set() 875*62c56f98SSadaf Ebrahimi all_caps_names = { 876*62c56f98SSadaf Ebrahimi match.name 877*62c56f98SSadaf Ebrahimi for match 878*62c56f98SSadaf Ebrahimi in self.parse_result["public_macros"] + 879*62c56f98SSadaf Ebrahimi self.parse_result["internal_macros"] + 880*62c56f98SSadaf Ebrahimi self.parse_result["private_macros"] + 881*62c56f98SSadaf Ebrahimi self.parse_result["enum_consts"] 882*62c56f98SSadaf Ebrahimi } 883*62c56f98SSadaf Ebrahimi typo_exclusion = re.compile(r"XXX|__|_$|^MBEDTLS_.*CONFIG_FILE$|" 884*62c56f98SSadaf Ebrahimi r"MBEDTLS_TEST_LIBTESTDRIVER*|" 885*62c56f98SSadaf Ebrahimi r"PSA_CRYPTO_DRIVER_TEST") 886*62c56f98SSadaf Ebrahimi 887*62c56f98SSadaf Ebrahimi for name_match in self.parse_result["mbed_psa_words"]: 888*62c56f98SSadaf Ebrahimi found = name_match.name in all_caps_names 889*62c56f98SSadaf Ebrahimi 890*62c56f98SSadaf Ebrahimi # Since MBEDTLS_PSA_ACCEL_XXX defines are defined by the 891*62c56f98SSadaf Ebrahimi # PSA driver, they will not exist as macros. However, they 892*62c56f98SSadaf Ebrahimi # should still be checked for typos using the equivalent 893*62c56f98SSadaf Ebrahimi # BUILTINs that exist. 894*62c56f98SSadaf Ebrahimi if "MBEDTLS_PSA_ACCEL_" in name_match.name: 895*62c56f98SSadaf Ebrahimi found = name_match.name.replace( 896*62c56f98SSadaf Ebrahimi "MBEDTLS_PSA_ACCEL_", 897*62c56f98SSadaf Ebrahimi "MBEDTLS_PSA_BUILTIN_") in all_caps_names 898*62c56f98SSadaf Ebrahimi 899*62c56f98SSadaf Ebrahimi if not found and not typo_exclusion.search(name_match.name): 900*62c56f98SSadaf Ebrahimi problems.append(Typo(name_match)) 901*62c56f98SSadaf Ebrahimi 902*62c56f98SSadaf Ebrahimi self.output_check_result("Likely typos", problems) 903*62c56f98SSadaf Ebrahimi return len(problems) 904*62c56f98SSadaf Ebrahimi 905*62c56f98SSadaf Ebrahimi def output_check_result(self, name, problems): 906*62c56f98SSadaf Ebrahimi """ 907*62c56f98SSadaf Ebrahimi Write out the PASS/FAIL status of a performed check depending on whether 908*62c56f98SSadaf Ebrahimi there were problems. 909*62c56f98SSadaf Ebrahimi 910*62c56f98SSadaf Ebrahimi Args: 911*62c56f98SSadaf Ebrahimi * name: the name of the test 912*62c56f98SSadaf Ebrahimi * problems: a List of encountered Problems 913*62c56f98SSadaf Ebrahimi """ 914*62c56f98SSadaf Ebrahimi if problems: 915*62c56f98SSadaf Ebrahimi self.log.info("{}: FAIL\n".format(name)) 916*62c56f98SSadaf Ebrahimi for problem in problems: 917*62c56f98SSadaf Ebrahimi self.log.warning(str(problem)) 918*62c56f98SSadaf Ebrahimi else: 919*62c56f98SSadaf Ebrahimi self.log.info("{}: PASS".format(name)) 920*62c56f98SSadaf Ebrahimi 921*62c56f98SSadaf Ebrahimidef main(): 922*62c56f98SSadaf Ebrahimi """ 923*62c56f98SSadaf Ebrahimi Perform argument parsing, and create an instance of CodeParser and 924*62c56f98SSadaf Ebrahimi NameChecker to begin the core operation. 925*62c56f98SSadaf Ebrahimi """ 926*62c56f98SSadaf Ebrahimi parser = argparse.ArgumentParser( 927*62c56f98SSadaf Ebrahimi formatter_class=argparse.RawDescriptionHelpFormatter, 928*62c56f98SSadaf Ebrahimi description=( 929*62c56f98SSadaf Ebrahimi "This script confirms that the naming of all symbols and identifiers " 930*62c56f98SSadaf Ebrahimi "in Mbed TLS are consistent with the house style and are also " 931*62c56f98SSadaf Ebrahimi "self-consistent.\n\n" 932*62c56f98SSadaf Ebrahimi "Expected to be run from the MbedTLS root directory.") 933*62c56f98SSadaf Ebrahimi ) 934*62c56f98SSadaf Ebrahimi parser.add_argument( 935*62c56f98SSadaf Ebrahimi "-v", "--verbose", 936*62c56f98SSadaf Ebrahimi action="store_true", 937*62c56f98SSadaf Ebrahimi help="show parse results" 938*62c56f98SSadaf Ebrahimi ) 939*62c56f98SSadaf Ebrahimi parser.add_argument( 940*62c56f98SSadaf Ebrahimi "-q", "--quiet", 941*62c56f98SSadaf Ebrahimi action="store_true", 942*62c56f98SSadaf Ebrahimi help="hide unnecessary text, explanations, and highlights" 943*62c56f98SSadaf Ebrahimi ) 944*62c56f98SSadaf Ebrahimi 945*62c56f98SSadaf Ebrahimi args = parser.parse_args() 946*62c56f98SSadaf Ebrahimi 947*62c56f98SSadaf Ebrahimi # Configure the global logger, which is then passed to the classes below 948*62c56f98SSadaf Ebrahimi log = logging.getLogger() 949*62c56f98SSadaf Ebrahimi log.setLevel(logging.DEBUG if args.verbose else logging.INFO) 950*62c56f98SSadaf Ebrahimi log.addHandler(logging.StreamHandler()) 951*62c56f98SSadaf Ebrahimi 952*62c56f98SSadaf Ebrahimi try: 953*62c56f98SSadaf Ebrahimi code_parser = CodeParser(log) 954*62c56f98SSadaf Ebrahimi parse_result = code_parser.comprehensive_parse() 955*62c56f98SSadaf Ebrahimi except Exception: # pylint: disable=broad-except 956*62c56f98SSadaf Ebrahimi traceback.print_exc() 957*62c56f98SSadaf Ebrahimi sys.exit(2) 958*62c56f98SSadaf Ebrahimi 959*62c56f98SSadaf Ebrahimi name_checker = NameChecker(parse_result, log) 960*62c56f98SSadaf Ebrahimi return_code = name_checker.perform_checks(quiet=args.quiet) 961*62c56f98SSadaf Ebrahimi 962*62c56f98SSadaf Ebrahimi sys.exit(return_code) 963*62c56f98SSadaf Ebrahimi 964*62c56f98SSadaf Ebrahimiif __name__ == "__main__": 965*62c56f98SSadaf Ebrahimi main() 966