1#!/usr/bin/env python3 2 3# Tool to bundle multiple C/C++ source files, inlining any includes. 4# 5# Note: there are two types of exclusion options: the '-x' flag, which besides 6# excluding a file also adds an #error directive in place of the #include, and 7# the '-k' flag, which keeps the #include and doesn't inline the file. The 8# intended use cases are: '-x' for files that would normally be #if'd out, so 9# features that 100% won't be used in the amalgamated file, for which every 10# occurrence adds the error, and '-k' for headers that we wish to manually 11# include, such as a project's public API, for which occurrences after the first 12# are removed. 13# 14# Todo: the error handling could be better, which currently throws and halts 15# (which is functional just not very friendly). 16# 17# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain) 18 19import argparse, re, sys 20 21from pathlib import Path 22from typing import Any, List, Optional, Pattern, Set, TextIO 23 24# Set of file roots when searching (equivalent to -I paths for the compiler). 25roots: Set[Path] = set() 26 27# Set of (canonical) file Path objects to exclude from inlining (and not only 28# exclude but to add a compiler error directive when they're encountered). 29excludes: Set[Path] = set() 30 31# Set of (canonical) file Path objects to keep as include directives. 32keeps: Set[Path] = set() 33 34# Whether to keep the #pragma once directives (unlikely, since this will result 35# in a warning, but the option is there). 36keep_pragma: bool = False 37 38# Destination file object (or stdout if no output file was supplied). 39destn: TextIO = sys.stdout 40 41# Set of file Path objects previously inlined (and to ignore if reencountering). 42found: Set[Path] = set() 43 44# Compiled regex Pattern to handle "#pragma once" in various formats: 45# 46# #pragma once 47# #pragma once 48# # pragma once 49# #pragma once 50# #pragma once // comment 51# 52# Ignoring commented versions, same as include_regex. 53# 54pragma_regex: Pattern = re.compile(r'^\s*#\s*pragma\s*once\s*') 55 56# Compiled regex Pattern to handle the following type of file includes: 57# 58# #include "file" 59# #include "file" 60# # include "file" 61# #include "file" 62# #include "file" // comment 63# #include "file" // comment with quote " 64# 65# And all combinations of, as well as ignoring the following: 66# 67# #include <file> 68# //#include "file" 69# /*#include "file"*/ 70# 71# We don't try to catch errors since the compiler will do this (and the code is 72# expected to be valid before processing) and we don't care what follows the 73# file (whether it's a valid comment or not, since anything after the quoted 74# string is ignored) 75# 76include_regex: Pattern = re.compile(r'^\s*#\s*include\s*"(.+?)"') 77 78# Simple tests to prove include_regex's cases. 79# 80def test_match_include() -> bool: 81 if (include_regex.match('#include "file"') and 82 include_regex.match(' #include "file"') and 83 include_regex.match('# include "file"') and 84 include_regex.match('#include "file"') and 85 include_regex.match('#include "file" // comment')): 86 if (not include_regex.match('#include <file>') and 87 not include_regex.match('//#include "file"') and 88 not include_regex.match('/*#include "file"*/')): 89 found = include_regex.match('#include "file" // "') 90 if (found and found.group(1) == 'file'): 91 print('#include match valid') 92 return True 93 return False 94 95# Simple tests to prove pragma_regex's cases. 96# 97def test_match_pragma() -> bool: 98 if (pragma_regex.match('#pragma once') and 99 pragma_regex.match(' #pragma once') and 100 pragma_regex.match('# pragma once') and 101 pragma_regex.match('#pragma once') and 102 pragma_regex.match('#pragma once // comment')): 103 if (not pragma_regex.match('//#pragma once') and 104 not pragma_regex.match('/*#pragma once*/')): 105 print('#pragma once match valid') 106 return True 107 return False 108 109# Finds 'file'. First the list of 'root' paths are searched, followed by the 110# currently processing file's 'parent' path, returning a valid Path in 111# canonical form. If no match is found None is returned. 112# 113def resolve_include(file: str, parent: Optional[Path] = None) -> Optional[Path]: 114 for root in roots: 115 found = root.joinpath(file).resolve() 116 if (found.is_file()): 117 return found 118 if (parent): 119 found = parent.joinpath(file).resolve(); 120 else: 121 found = Path(file) 122 if (found.is_file()): 123 return found 124 return None 125 126# Helper to resolve lists of files. 'file_list' is passed in from the arguments 127# and each entry resolved to its canonical path (like any include entry, either 128# from the list of root paths or the owning file's 'parent', which in this case 129# is case is the input file). The results are stored in 'resolved'. 130# 131def resolve_excluded_files(file_list: Optional[List[str]], resolved: Set[Path], parent: Optional[Path] = None) -> None: 132 if (file_list): 133 for filename in file_list: 134 found = resolve_include(filename, parent) 135 if (found): 136 resolved.add(found) 137 else: 138 error_line(f'Warning: excluded file not found: {filename}') 139 140# Writes 'line' to the open 'destn' (or stdout). 141# 142def write_line(line: str) -> None: 143 print(line, file=destn) 144 145# Logs 'line' to stderr. This is also used for general notifications that we 146# don't want to go to stdout (so the source can be piped). 147# 148def error_line(line: Any) -> None: 149 print(line, file=sys.stderr) 150 151# Inline the contents of 'file' (with any of its includes also inlined, etc.). 152# 153# Note: text encoding errors are ignored and replaced with ? when reading the 154# input files. This isn't ideal, but it's more than likely in the comments than 155# code and a) the text editor has probably also failed to read the same content, 156# and b) the compiler probably did too. 157# 158def add_file(file: Path, file_name: str = None) -> None: 159 if (file.is_file()): 160 if (not file_name): 161 file_name = file.name 162 error_line(f'Processing: {file_name}') 163 with file.open('r', errors='replace') as opened: 164 for line in opened: 165 line = line.rstrip('\n') 166 match_include = include_regex.match(line); 167 if (match_include): 168 # We have a quoted include directive so grab the file 169 inc_name = match_include.group(1) 170 resolved = resolve_include(inc_name, file.parent) 171 if (resolved): 172 if (resolved in excludes): 173 # The file was excluded so error if the compiler uses it 174 write_line(f'#error Using excluded file: {inc_name} (re-amalgamate source to fix)') 175 error_line(f'Excluding: {inc_name}') 176 else: 177 if (resolved not in found): 178 # The file was not previously encountered 179 found.add(resolved) 180 if (resolved in keeps): 181 # But the include was flagged to keep as included 182 write_line(f'/**** *NOT* inlining {inc_name} ****/') 183 write_line(line) 184 error_line(f'Not inlining: {inc_name}') 185 else: 186 # The file was neither excluded nor seen before so inline it 187 write_line(f'/**** start inlining {inc_name} ****/') 188 add_file(resolved, inc_name) 189 write_line(f'/**** ended inlining {inc_name} ****/') 190 else: 191 write_line(f'/**** skipping file: {inc_name} ****/') 192 else: 193 # The include file didn't resolve to a file 194 write_line(f'#error Unable to find: {inc_name}') 195 error_line(f'Error: Unable to find: {inc_name}') 196 else: 197 # Skip any 'pragma once' directives, otherwise write the source line 198 if (keep_pragma or not pragma_regex.match(line)): 199 write_line(line) 200 else: 201 error_line(f'Error: Invalid file: {file}') 202 203# Start here 204parser = argparse.ArgumentParser(description='Amalgamate Tool', epilog=f'example: {sys.argv[0]} -r ../my/path -r ../other/path -o out.c in.c') 205parser.add_argument('-r', '--root', action='append', type=Path, help='file root search path') 206parser.add_argument('-x', '--exclude', action='append', help='file to completely exclude from inlining') 207parser.add_argument('-k', '--keep', action='append', help='file to exclude from inlining but keep the include directive') 208parser.add_argument('-p', '--pragma', action='store_true', default=False, help='keep any "#pragma once" directives (removed by default)') 209parser.add_argument('-o', '--output', type=argparse.FileType('w'), help='output file (otherwise stdout)') 210parser.add_argument('input', type=Path, help='input file') 211args = parser.parse_args() 212 213# Fail early on an invalid input (and store it so we don't recurse) 214args.input = args.input.resolve(strict=True) 215found.add(args.input) 216 217# Resolve all of the root paths upfront (we'll halt here on invalid roots) 218if (args.root): 219 for path in args.root: 220 roots.add(path.resolve(strict=True)) 221 222# The remaining params: so resolve the excluded files and #pragma once directive 223resolve_excluded_files(args.exclude, excludes, args.input.parent) 224resolve_excluded_files(args.keep, keeps, args.input.parent) 225keep_pragma = args.pragma; 226 227# Then recursively process the input file 228try: 229 if (args.output): 230 destn = args.output 231 add_file(args.input) 232finally: 233 if (destn): 234 destn.close() 235