xref: /aosp_15_r20/external/zstd/build/single_file_libs/combine.sh (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1*01826a49SYabin Cui#!/bin/sh -e
2*01826a49SYabin Cui
3*01826a49SYabin Cui# Tool to bundle multiple C/C++ source files, inlining any includes.
4*01826a49SYabin Cui#
5*01826a49SYabin Cui# TODO: ROOTS, FOUND, etc., as arrays (since they fail on paths with spaces)
6*01826a49SYabin Cui#
7*01826a49SYabin Cui# Author: Carl Woffenden, Numfum GmbH (this script is released under a CC0 license/Public Domain)
8*01826a49SYabin Cui
9*01826a49SYabin Cui# Common file roots
10*01826a49SYabin CuiROOTS="."
11*01826a49SYabin Cui
12*01826a49SYabin Cui# -x option excluded includes
13*01826a49SYabin CuiXINCS=""
14*01826a49SYabin Cui
15*01826a49SYabin Cui# -k option includes to keep as include directives
16*01826a49SYabin CuiKINCS=""
17*01826a49SYabin Cui
18*01826a49SYabin Cui# Files previously visited
19*01826a49SYabin CuiFOUND=""
20*01826a49SYabin Cui
21*01826a49SYabin Cui# Optional destination file (empty string to write to stdout)
22*01826a49SYabin CuiDESTN=""
23*01826a49SYabin Cui
24*01826a49SYabin Cui# Whether the "#pragma once" directives should be written to the output
25*01826a49SYabin CuiPONCE=0
26*01826a49SYabin Cui
27*01826a49SYabin Cui# Prints the script usage then exits
28*01826a49SYabin Cuiusage() {
29*01826a49SYabin Cui  echo "Usage: $0 [-r <path>] [-x <header>] [-k <header>] [-o <outfile>] infile"
30*01826a49SYabin Cui  echo "  -r file root search path"
31*01826a49SYabin Cui  echo "  -x file to completely exclude from inlining"
32*01826a49SYabin Cui  echo "  -k file to exclude from inlining but keep the include directive"
33*01826a49SYabin Cui  echo "  -p keep any '#pragma once' directives (removed by default)"
34*01826a49SYabin Cui  echo "  -o output file (otherwise stdout)"
35*01826a49SYabin Cui  echo "Example: $0 -r ../my/path - r ../other/path -o out.c in.c"
36*01826a49SYabin Cui  exit 1
37*01826a49SYabin Cui}
38*01826a49SYabin Cui
39*01826a49SYabin Cui# Tests that the grep implementation works as expected (older OSX grep fails)
40*01826a49SYabin Cuitest_deps() {
41*01826a49SYabin Cui  if ! echo '#include "foo"' | grep -Eq '^\s*#\s*include\s*".+"'; then
42*01826a49SYabin Cui    echo "Aborting: the grep implementation fails to parse include lines"
43*01826a49SYabin Cui    exit 1
44*01826a49SYabin Cui  fi
45*01826a49SYabin Cui  if ! echo '"foo.h"' | sed -E 's/"([^"]+)"/\1/' | grep -Eq '^foo\.h$'; then
46*01826a49SYabin Cui    echo "Aborting: sed is unavailable or non-functional"
47*01826a49SYabin Cui    exit 1
48*01826a49SYabin Cui  fi
49*01826a49SYabin Cui}
50*01826a49SYabin Cui
51*01826a49SYabin Cui# Test if glob pattern $1 matches subject $2 (see fnmatch(3))
52*01826a49SYabin Cuifnmatch() {
53*01826a49SYabin Cui  case "$2" in
54*01826a49SYabin Cui  $1)
55*01826a49SYabin Cui    return 0
56*01826a49SYabin Cui    ;;
57*01826a49SYabin Cui  esac
58*01826a49SYabin Cui  return 1
59*01826a49SYabin Cui}
60*01826a49SYabin Cui
61*01826a49SYabin Cui# Test if line $1 is local include directive
62*01826a49SYabin Cuiis_include_line() {
63*01826a49SYabin Cui  fnmatch "*#*include*" "$1" || return 1
64*01826a49SYabin Cui  printf "%s\n" "$1" | grep -Eq '^\s*#\s*include\s*".+"'
65*01826a49SYabin Cui}
66*01826a49SYabin Cui
67*01826a49SYabin Cui# Test if line $1 is pragma once directive
68*01826a49SYabin Cuiis_pragma_once_line() {
69*01826a49SYabin Cui  fnmatch "*#*pragma*once*" "$1" || return 1
70*01826a49SYabin Cui  printf "%s\n" "$1" | grep -Eq '^\s*#\s*pragma\s*once\s*'
71*01826a49SYabin Cui}
72*01826a49SYabin Cui
73*01826a49SYabin Cui# Tests if list $1 has item $2 (returning zero on a match)
74*01826a49SYabin Cui# (originally used grep -Eq "(^|\s*)$2(\$|\s*))
75*01826a49SYabin Cuireadonly list_FS="$IFS"
76*01826a49SYabin Cuilist_has_item() {
77*01826a49SYabin Cui  # Re: escaping glob pattern special characters in item string:
78*01826a49SYabin Cui  #
79*01826a49SYabin Cui  # bash (tested 3.2.57, 5.1.4), dash (tested 0.5.10.2), NetBSD /bin/sh
80*01826a49SYabin Cui  # (tested 8.2), and Solaris /bin/sh (tested 11.4) require escaping
81*01826a49SYabin Cui  # backslashes in a bracket expression despite POSIX specifying that
82*01826a49SYabin Cui  # backslash loses significance in a bracket expression.
83*01826a49SYabin Cui  #
84*01826a49SYabin Cui  # Conversely, neither FreeBSD /bin/sh (tested 12.2) nor OpenBSD /bin/sh
85*01826a49SYabin Cui  # (tested 7.1) obey backslash-escaping in case statement patterns even
86*01826a49SYabin Cui  # outside bracket expressions, so escape special characters using bracket
87*01826a49SYabin Cui  # expressions.
88*01826a49SYabin Cui  #
89*01826a49SYabin Cui  # Solaris /bin/sh (tested 11.4) requires vertical bar (|) to be escaped.
90*01826a49SYabin Cui  #
91*01826a49SYabin Cui  # All accommodations should behave as expected under strict POSIX semantics.
92*01826a49SYabin Cui  if fnmatch "*[\\*?[|]*" "$2"; then
93*01826a49SYabin Cui    set -- "$1" "$(printf '%s\n' "$2" | sed -e 's/[*?[|]/[&]/g; s/[\]/[\\&]/g')"
94*01826a49SYabin Cui  fi
95*01826a49SYabin Cui  for item_P in "*[$list_FS]$2[$list_FS]*" "*[$list_FS]$2" "$2[$list_FS]*" "$2"; do
96*01826a49SYabin Cui    fnmatch "${item_P}" "$1" && return 0
97*01826a49SYabin Cui  done
98*01826a49SYabin Cui  return 1
99*01826a49SYabin Cui}
100*01826a49SYabin Cui
101*01826a49SYabin Cui# Adds a new line with the supplied arguments to $DESTN (or stdout)
102*01826a49SYabin Cuiwrite_line() {
103*01826a49SYabin Cui  if [ -n "$DESTN" ]; then
104*01826a49SYabin Cui    printf '%s\n' "$@" >> "$DESTN"
105*01826a49SYabin Cui  else
106*01826a49SYabin Cui    printf '%s\n' "$@"
107*01826a49SYabin Cui  fi
108*01826a49SYabin Cui}
109*01826a49SYabin Cui
110*01826a49SYabin Cuilog_line() {
111*01826a49SYabin Cui  echo $@ >&2
112*01826a49SYabin Cui}
113*01826a49SYabin Cui
114*01826a49SYabin Cui# Find this file!
115*01826a49SYabin Cuiresolve_include() {
116*01826a49SYabin Cui  local srcdir=$1
117*01826a49SYabin Cui  local inc=$2
118*01826a49SYabin Cui  for root in $srcdir $ROOTS; do
119*01826a49SYabin Cui    if [ -f "$root/$inc" ]; then
120*01826a49SYabin Cui      # Try to reduce the file path into a canonical form (so that multiple)
121*01826a49SYabin Cui      # includes of the same file are successfully deduplicated, even if they
122*01826a49SYabin Cui      # are expressed differently.
123*01826a49SYabin Cui      local relpath="$(realpath --relative-to . "$root/$inc" 2>/dev/null)"
124*01826a49SYabin Cui      if [ "$relpath" != "" ]; then # not all realpaths support --relative-to
125*01826a49SYabin Cui        echo "$relpath"
126*01826a49SYabin Cui        return 0
127*01826a49SYabin Cui      fi
128*01826a49SYabin Cui      local relpath="$(realpath "$root/$inc" 2>/dev/null)"
129*01826a49SYabin Cui      if [ "$relpath" != "" ]; then # not all distros have realpath...
130*01826a49SYabin Cui        echo "$relpath"
131*01826a49SYabin Cui        return 0
132*01826a49SYabin Cui      fi
133*01826a49SYabin Cui      # Fallback on Python to reduce the path if the above fails.
134*01826a49SYabin Cui      local relpath=$(python -c "import os,sys; print os.path.relpath(sys.argv[1])" "$root/$inc" 2>/dev/null)
135*01826a49SYabin Cui      if [ "$relpath" != "" ]; then # not all distros have realpath...
136*01826a49SYabin Cui        echo "$relpath"
137*01826a49SYabin Cui        return 0
138*01826a49SYabin Cui      fi
139*01826a49SYabin Cui      # Worst case, fall back to just the root + relative include path. The
140*01826a49SYabin Cui      # problem with this is that it is possible to emit multiple different
141*01826a49SYabin Cui      # resolved paths to the same file, depending on exactly how its included.
142*01826a49SYabin Cui      # Since the main loop below keeps a list of the resolved paths it's
143*01826a49SYabin Cui      # already included, in order to avoid repeated includes, this failure to
144*01826a49SYabin Cui      # produce a canonical/reduced path can lead to multiple inclusions of the
145*01826a49SYabin Cui      # same file. But it seems like the resulting single file library still
146*01826a49SYabin Cui      # works (hurray include guards!), so I guess it's ok.
147*01826a49SYabin Cui      echo "$root/$inc"
148*01826a49SYabin Cui      return 0
149*01826a49SYabin Cui    fi
150*01826a49SYabin Cui  done
151*01826a49SYabin Cui  return 1
152*01826a49SYabin Cui}
153*01826a49SYabin Cui
154*01826a49SYabin Cui# Adds the contents of $1 with any of its includes inlined
155*01826a49SYabin Cuiadd_file() {
156*01826a49SYabin Cui  local file=$1
157*01826a49SYabin Cui  if [ -n "$file" ]; then
158*01826a49SYabin Cui    log_line "Processing: $file"
159*01826a49SYabin Cui    # Get directory of the current so we can resolve relative includes
160*01826a49SYabin Cui    local srcdir="$(dirname "$file")"
161*01826a49SYabin Cui    # Read the file
162*01826a49SYabin Cui    local line=
163*01826a49SYabin Cui    while IFS= read -r line; do
164*01826a49SYabin Cui      if is_include_line "$line"; then
165*01826a49SYabin Cui        # We have an include directive so strip the (first) file
166*01826a49SYabin Cui        local inc=$(echo "$line" | grep -Eo '".*"' | sed -E 's/"([^"]+)"/\1/' | head -1)
167*01826a49SYabin Cui        local res_inc="$(resolve_include "$srcdir" "$inc")"
168*01826a49SYabin Cui        if list_has_item "$XINCS" "$inc"; then
169*01826a49SYabin Cui          # The file was excluded so error if the source attempts to use it
170*01826a49SYabin Cui          write_line "#error Using excluded file: $inc (re-amalgamate source to fix)"
171*01826a49SYabin Cui          log_line "Excluding: $inc"
172*01826a49SYabin Cui        else
173*01826a49SYabin Cui          if ! list_has_item "$FOUND" "$res_inc"; then
174*01826a49SYabin Cui            # The file was not previously encountered
175*01826a49SYabin Cui            FOUND="$FOUND $res_inc"
176*01826a49SYabin Cui            if list_has_item "$KINCS" "$inc"; then
177*01826a49SYabin Cui              # But the include was flagged to keep as included
178*01826a49SYabin Cui              write_line "/**** *NOT* inlining $inc ****/"
179*01826a49SYabin Cui              write_line "$line"
180*01826a49SYabin Cui              log_line "Not Inlining: $inc"
181*01826a49SYabin Cui            else
182*01826a49SYabin Cui              # The file was neither excluded nor seen before so inline it
183*01826a49SYabin Cui              write_line "/**** start inlining $inc ****/"
184*01826a49SYabin Cui              add_file "$res_inc"
185*01826a49SYabin Cui              write_line "/**** ended inlining $inc ****/"
186*01826a49SYabin Cui            fi
187*01826a49SYabin Cui          else
188*01826a49SYabin Cui            write_line "/**** skipping file: $inc ****/"
189*01826a49SYabin Cui          fi
190*01826a49SYabin Cui        fi
191*01826a49SYabin Cui      else
192*01826a49SYabin Cui        # Skip any 'pragma once' directives, otherwise write the source line
193*01826a49SYabin Cui        local write=$PONCE
194*01826a49SYabin Cui        if [ $write -eq 0 ]; then
195*01826a49SYabin Cui          if ! is_pragma_once_line "$line"; then
196*01826a49SYabin Cui            write=1
197*01826a49SYabin Cui          fi
198*01826a49SYabin Cui        fi
199*01826a49SYabin Cui        if [ $write -ne 0 ]; then
200*01826a49SYabin Cui          write_line "$line"
201*01826a49SYabin Cui        fi
202*01826a49SYabin Cui      fi
203*01826a49SYabin Cui    done < "$file"
204*01826a49SYabin Cui  else
205*01826a49SYabin Cui    write_line "#error Unable to find \"$1\""
206*01826a49SYabin Cui    log_line "Error: Unable to find: \"$1\""
207*01826a49SYabin Cui  fi
208*01826a49SYabin Cui}
209*01826a49SYabin Cui
210*01826a49SYabin Cuiwhile getopts ":r:x:k:po:" opts; do
211*01826a49SYabin Cui  case $opts in
212*01826a49SYabin Cui  r)
213*01826a49SYabin Cui    ROOTS="$ROOTS $OPTARG"
214*01826a49SYabin Cui    ;;
215*01826a49SYabin Cui  x)
216*01826a49SYabin Cui    XINCS="$XINCS $OPTARG"
217*01826a49SYabin Cui    ;;
218*01826a49SYabin Cui  k)
219*01826a49SYabin Cui    KINCS="$KINCS $OPTARG"
220*01826a49SYabin Cui    ;;
221*01826a49SYabin Cui  p)
222*01826a49SYabin Cui    PONCE=1
223*01826a49SYabin Cui    ;;
224*01826a49SYabin Cui  o)
225*01826a49SYabin Cui    DESTN="$OPTARG"
226*01826a49SYabin Cui    ;;
227*01826a49SYabin Cui  *)
228*01826a49SYabin Cui    usage
229*01826a49SYabin Cui    ;;
230*01826a49SYabin Cui  esac
231*01826a49SYabin Cuidone
232*01826a49SYabin Cuishift $((OPTIND-1))
233*01826a49SYabin Cui
234*01826a49SYabin Cuiif [ -n "$1" ]; then
235*01826a49SYabin Cui  if [ -f "$1" ]; then
236*01826a49SYabin Cui    if [ -n "$DESTN" ]; then
237*01826a49SYabin Cui      printf "" > "$DESTN"
238*01826a49SYabin Cui    fi
239*01826a49SYabin Cui    test_deps
240*01826a49SYabin Cui    log_line "Processing using the slower shell script; this might take a while"
241*01826a49SYabin Cui    add_file "$1"
242*01826a49SYabin Cui  else
243*01826a49SYabin Cui    echo "Input file not found: \"$1\""
244*01826a49SYabin Cui    exit 1
245*01826a49SYabin Cui  fi
246*01826a49SYabin Cuielse
247*01826a49SYabin Cui  usage
248*01826a49SYabin Cuifi
249*01826a49SYabin Cuiexit 0
250