xref: /aosp_15_r20/external/zstd/build/single_file_libs/combine.sh (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1#!/bin/sh -e
2
3# Tool to bundle multiple C/C++ source files, inlining any includes.
4#
5# TODO: ROOTS, FOUND, etc., as arrays (since they fail on paths with spaces)
6#
7# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain)
8
9# Common file roots
10ROOTS="."
11
12# -x option excluded includes
13XINCS=""
14
15# -k option includes to keep as include directives
16KINCS=""
17
18# Files previously visited
19FOUND=""
20
21# Optional destination file (empty string to write to stdout)
22DESTN=""
23
24# Whether the "#pragma once" directives should be written to the output
25PONCE=0
26
27# Prints the script usage then exits
28usage() {
29  echo "Usage: $0 [-r <path>] [-x <header>] [-k <header>] [-o <outfile>] infile"
30  echo "  -r file root search path"
31  echo "  -x file to completely exclude from inlining"
32  echo "  -k file to exclude from inlining but keep the include directive"
33  echo "  -p keep any '#pragma once' directives (removed by default)"
34  echo "  -o output file (otherwise stdout)"
35  echo "Example: $0 -r ../my/path - r ../other/path -o out.c in.c"
36  exit 1
37}
38
39# Tests that the grep implementation works as expected (older OSX grep fails)
40test_deps() {
41  if ! echo '#include "foo"' | grep -Eq '^\s*#\s*include\s*".+"'; then
42    echo "Aborting: the grep implementation fails to parse include lines"
43    exit 1
44  fi
45  if ! echo '"foo.h"' | sed -E 's/"([^"]+)"/\1/' | grep -Eq '^foo\.h$'; then
46    echo "Aborting: sed is unavailable or non-functional"
47    exit 1
48  fi
49}
50
51# Test if glob pattern $1 matches subject $2 (see fnmatch(3))
52fnmatch() {
53  case "$2" in
54  $1)
55    return 0
56    ;;
57  esac
58  return 1
59}
60
61# Test if line $1 is local include directive
62is_include_line() {
63  fnmatch "*#*include*" "$1" || return 1
64  printf "%s\n" "$1" | grep -Eq '^\s*#\s*include\s*".+"'
65}
66
67# Test if line $1 is pragma once directive
68is_pragma_once_line() {
69  fnmatch "*#*pragma*once*" "$1" || return 1
70  printf "%s\n" "$1" | grep -Eq '^\s*#\s*pragma\s*once\s*'
71}
72
73# Tests if list $1 has item $2 (returning zero on a match)
74# (originally used grep -Eq "(^|\s*)$2(\$|\s*))
75readonly list_FS="$IFS"
76list_has_item() {
77  # Re: escaping glob pattern special characters in item string:
78  #
79  # bash (tested 3.2.57, 5.1.4), dash (tested 0.5.10.2), NetBSD /bin/sh
80  # (tested 8.2), and Solaris /bin/sh (tested 11.4) require escaping
81  # backslashes in a bracket expression despite POSIX specifying that
82  # backslash loses significance in a bracket expression.
83  #
84  # Conversely, neither FreeBSD /bin/sh (tested 12.2) nor OpenBSD /bin/sh
85  # (tested 7.1) obey backslash-escaping in case statement patterns even
86  # outside bracket expressions, so escape special characters using bracket
87  # expressions.
88  #
89  # Solaris /bin/sh (tested 11.4) requires vertical bar (|) to be escaped.
90  #
91  # All accommodations should behave as expected under strict POSIX semantics.
92  if fnmatch "*[\\*?[|]*" "$2"; then
93    set -- "$1" "$(printf '%s\n' "$2" | sed -e 's/[*?[|]/[&]/g; s/[\]/[\\&]/g')"
94  fi
95  for item_P in "*[$list_FS]$2[$list_FS]*" "*[$list_FS]$2" "$2[$list_FS]*" "$2"; do
96    fnmatch "${item_P}" "$1" && return 0
97  done
98  return 1
99}
100
101# Adds a new line with the supplied arguments to $DESTN (or stdout)
102write_line() {
103  if [ -n "$DESTN" ]; then
104    printf '%s\n' "$@" >> "$DESTN"
105  else
106    printf '%s\n' "$@"
107  fi
108}
109
110log_line() {
111  echo $@ >&2
112}
113
114# Find this file!
115resolve_include() {
116  local srcdir=$1
117  local inc=$2
118  for root in $srcdir $ROOTS; do
119    if [ -f "$root/$inc" ]; then
120      # Try to reduce the file path into a canonical form (so that multiple)
121      # includes of the same file are successfully deduplicated, even if they
122      # are expressed differently.
123      local relpath="$(realpath --relative-to . "$root/$inc" 2>/dev/null)"
124      if [ "$relpath" != "" ]; then # not all realpaths support --relative-to
125        echo "$relpath"
126        return 0
127      fi
128      local relpath="$(realpath "$root/$inc" 2>/dev/null)"
129      if [ "$relpath" != "" ]; then # not all distros have realpath...
130        echo "$relpath"
131        return 0
132      fi
133      # Fallback on Python to reduce the path if the above fails.
134      local relpath=$(python -c "import os,sys; print os.path.relpath(sys.argv[1])" "$root/$inc" 2>/dev/null)
135      if [ "$relpath" != "" ]; then # not all distros have realpath...
136        echo "$relpath"
137        return 0
138      fi
139      # Worst case, fall back to just the root + relative include path. The
140      # problem with this is that it is possible to emit multiple different
141      # resolved paths to the same file, depending on exactly how its included.
142      # Since the main loop below keeps a list of the resolved paths it's
143      # already included, in order to avoid repeated includes, this failure to
144      # produce a canonical/reduced path can lead to multiple inclusions of the
145      # same file. But it seems like the resulting single file library still
146      # works (hurray include guards!), so I guess it's ok.
147      echo "$root/$inc"
148      return 0
149    fi
150  done
151  return 1
152}
153
154# Adds the contents of $1 with any of its includes inlined
155add_file() {
156  local file=$1
157  if [ -n "$file" ]; then
158    log_line "Processing: $file"
159    # Get directory of the current so we can resolve relative includes
160    local srcdir="$(dirname "$file")"
161    # Read the file
162    local line=
163    while IFS= read -r line; do
164      if is_include_line "$line"; then
165        # We have an include directive so strip the (first) file
166        local inc=$(echo "$line" | grep -Eo '".*"' | sed -E 's/"([^"]+)"/\1/' | head -1)
167        local res_inc="$(resolve_include "$srcdir" "$inc")"
168        if list_has_item "$XINCS" "$inc"; then
169          # The file was excluded so error if the source attempts to use it
170          write_line "#error Using excluded file: $inc (re-amalgamate source to fix)"
171          log_line "Excluding: $inc"
172        else
173          if ! list_has_item "$FOUND" "$res_inc"; then
174            # The file was not previously encountered
175            FOUND="$FOUND $res_inc"
176            if list_has_item "$KINCS" "$inc"; then
177              # But the include was flagged to keep as included
178              write_line "/**** *NOT* inlining $inc ****/"
179              write_line "$line"
180              log_line "Not Inlining: $inc"
181            else
182              # The file was neither excluded nor seen before so inline it
183              write_line "/**** start inlining $inc ****/"
184              add_file "$res_inc"
185              write_line "/**** ended inlining $inc ****/"
186            fi
187          else
188            write_line "/**** skipping file: $inc ****/"
189          fi
190        fi
191      else
192        # Skip any 'pragma once' directives, otherwise write the source line
193        local write=$PONCE
194        if [ $write -eq 0 ]; then
195          if ! is_pragma_once_line "$line"; then
196            write=1
197          fi
198        fi
199        if [ $write -ne 0 ]; then
200          write_line "$line"
201        fi
202      fi
203    done < "$file"
204  else
205    write_line "#error Unable to find \"$1\""
206    log_line "Error: Unable to find: \"$1\""
207  fi
208}
209
210while getopts ":r:x:k:po:" opts; do
211  case $opts in
212  r)
213    ROOTS="$ROOTS $OPTARG"
214    ;;
215  x)
216    XINCS="$XINCS $OPTARG"
217    ;;
218  k)
219    KINCS="$KINCS $OPTARG"
220    ;;
221  p)
222    PONCE=1
223    ;;
224  o)
225    DESTN="$OPTARG"
226    ;;
227  *)
228    usage
229    ;;
230  esac
231done
232shift $((OPTIND-1))
233
234if [ -n "$1" ]; then
235  if [ -f "$1" ]; then
236    if [ -n "$DESTN" ]; then
237      printf "" > "$DESTN"
238    fi
239    test_deps
240    log_line "Processing using the slower shell script; this might take a while"
241    add_file "$1"
242  else
243    echo "Input file not found: \"$1\""
244    exit 1
245  fi
246else
247  usage
248fi
249exit 0
250