1# -*- coding: utf-8 -*-
2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5""" This module implements the 'scan-build' command API.
6
7To run the static analyzer against a build is done in multiple steps:
8
9 -- Intercept: capture the compilation command during the build,
10 -- Analyze:   run the analyzer against the captured commands,
11 -- Report:    create a cover report from the analyzer outputs.  """
12
13import re
14import os
15import os.path
16import json
17import logging
18import multiprocessing
19import tempfile
20import functools
21import subprocess
22import contextlib
23import datetime
24import shutil
25import glob
26from collections import defaultdict
27
28from libscanbuild import (
29    command_entry_point,
30    compiler_wrapper,
31    wrapper_environment,
32    run_build,
33    run_command,
34    CtuConfig,
35)
36from libscanbuild.arguments import (
37    parse_args_for_scan_build,
38    parse_args_for_analyze_build,
39)
40from libscanbuild.intercept import capture
41from libscanbuild.report import document
42from libscanbuild.compilation import split_command, classify_source, compiler_language
43from libscanbuild.clang import (
44    get_version,
45    get_arguments,
46    get_triple_arch,
47    ClangErrorException,
48)
49from libscanbuild.shell import decode
50
51__all__ = ["scan_build", "analyze_build", "analyze_compiler_wrapper"]
52
53scanbuild_dir = os.path.dirname(os.path.realpath(__import__("sys").argv[0]))
54
55COMPILER_WRAPPER_CC = os.path.join(scanbuild_dir, "..", "libexec", "analyze-cc")
56COMPILER_WRAPPER_CXX = os.path.join(scanbuild_dir, "..", "libexec", "analyze-c++")
57
58CTU_EXTDEF_MAP_FILENAME = "externalDefMap.txt"
59CTU_TEMP_DEFMAP_FOLDER = "tmpExternalDefMaps"
60
61
62@command_entry_point
63def scan_build():
64    """Entry point for scan-build command."""
65
66    args = parse_args_for_scan_build()
67    # will re-assign the report directory as new output
68    with report_directory(
69        args.output, args.keep_empty, args.output_format
70    ) as args.output:
71        # Run against a build command. there are cases, when analyzer run
72        # is not required. But we need to set up everything for the
73        # wrappers, because 'configure' needs to capture the CC/CXX values
74        # for the Makefile.
75        if args.intercept_first:
76            # Run build command with intercept module.
77            exit_code = capture(args)
78            # Run the analyzer against the captured commands.
79            if need_analyzer(args.build):
80                govern_analyzer_runs(args)
81        else:
82            # Run build command and analyzer with compiler wrappers.
83            environment = setup_environment(args)
84            exit_code = run_build(args.build, env=environment)
85        # Cover report generation and bug counting.
86        number_of_bugs = document(args)
87        # Set exit status as it was requested.
88        return number_of_bugs if args.status_bugs else exit_code
89
90
91@command_entry_point
92def analyze_build():
93    """Entry point for analyze-build command."""
94
95    args = parse_args_for_analyze_build()
96    # will re-assign the report directory as new output
97    with report_directory(
98        args.output, args.keep_empty, args.output_format
99    ) as args.output:
100        # Run the analyzer against a compilation db.
101        govern_analyzer_runs(args)
102        # Cover report generation and bug counting.
103        number_of_bugs = document(args)
104        # Set exit status as it was requested.
105        return number_of_bugs if args.status_bugs else 0
106
107
108def need_analyzer(args):
109    """Check the intent of the build command.
110
111    When static analyzer run against project configure step, it should be
112    silent and no need to run the analyzer or generate report.
113
114    To run `scan-build` against the configure step might be necessary,
115    when compiler wrappers are used. That's the moment when build setup
116    check the compiler and capture the location for the build process."""
117
118    return len(args) and not re.search(r"configure|autogen", args[0])
119
120
121def prefix_with(constant, pieces):
122    """From a sequence create another sequence where every second element
123    is from the original sequence and the odd elements are the prefix.
124
125    eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3]"""
126
127    return [elem for piece in pieces for elem in [constant, piece]]
128
129
130def get_ctu_config_from_args(args):
131    """CTU configuration is created from the chosen phases and dir."""
132
133    return (
134        CtuConfig(
135            collect=args.ctu_phases.collect,
136            analyze=args.ctu_phases.analyze,
137            dir=args.ctu_dir,
138            extdef_map_cmd=args.extdef_map_cmd,
139        )
140        if hasattr(args, "ctu_phases") and hasattr(args.ctu_phases, "dir")
141        else CtuConfig(collect=False, analyze=False, dir="", extdef_map_cmd="")
142    )
143
144
145def get_ctu_config_from_json(ctu_conf_json):
146    """CTU configuration is created from the chosen phases and dir."""
147
148    ctu_config = json.loads(ctu_conf_json)
149    # Recover namedtuple from json when coming from analyze-cc or analyze-c++
150    return CtuConfig(
151        collect=ctu_config[0],
152        analyze=ctu_config[1],
153        dir=ctu_config[2],
154        extdef_map_cmd=ctu_config[3],
155    )
156
157
158def create_global_ctu_extdef_map(extdef_map_lines):
159    """Takes iterator of individual external definition maps and creates a
160    global map keeping only unique names. We leave conflicting names out of
161    CTU.
162
163    :param extdef_map_lines: Contains the id of a definition (mangled name) and
164    the originating source (the corresponding AST file) name.
165    :type extdef_map_lines: Iterator of str.
166    :returns: Mangled name - AST file pairs.
167    :rtype: List of (str, str) tuples.
168    """
169
170    mangled_to_asts = defaultdict(set)
171
172    for line in extdef_map_lines:
173        mangled_name, ast_file = line.strip().split(" ", 1)
174        mangled_to_asts[mangled_name].add(ast_file)
175
176    mangled_ast_pairs = []
177
178    for mangled_name, ast_files in mangled_to_asts.items():
179        if len(ast_files) == 1:
180            mangled_ast_pairs.append((mangled_name, next(iter(ast_files))))
181
182    return mangled_ast_pairs
183
184
185def merge_ctu_extdef_maps(ctudir):
186    """Merge individual external definition maps into a global one.
187
188    As the collect phase runs parallel on multiple threads, all compilation
189    units are separately mapped into a temporary file in CTU_TEMP_DEFMAP_FOLDER.
190    These definition maps contain the mangled names and the source
191    (AST generated from the source) which had their definition.
192    These files should be merged at the end into a global map file:
193    CTU_EXTDEF_MAP_FILENAME."""
194
195    def generate_extdef_map_lines(extdefmap_dir):
196        """Iterate over all lines of input files in a determined order."""
197
198        files = glob.glob(os.path.join(extdefmap_dir, "*"))
199        files.sort()
200        for filename in files:
201            with open(filename, "r") as in_file:
202                for line in in_file:
203                    yield line
204
205    def write_global_map(arch, mangled_ast_pairs):
206        """Write (mangled name, ast file) pairs into final file."""
207
208        extern_defs_map_file = os.path.join(ctudir, arch, CTU_EXTDEF_MAP_FILENAME)
209        with open(extern_defs_map_file, "w") as out_file:
210            for mangled_name, ast_file in mangled_ast_pairs:
211                out_file.write("%s %s\n" % (mangled_name, ast_file))
212
213    triple_arches = glob.glob(os.path.join(ctudir, "*"))
214    for triple_path in triple_arches:
215        if os.path.isdir(triple_path):
216            triple_arch = os.path.basename(triple_path)
217            extdefmap_dir = os.path.join(ctudir, triple_arch, CTU_TEMP_DEFMAP_FOLDER)
218
219            extdef_map_lines = generate_extdef_map_lines(extdefmap_dir)
220            mangled_ast_pairs = create_global_ctu_extdef_map(extdef_map_lines)
221            write_global_map(triple_arch, mangled_ast_pairs)
222
223            # Remove all temporary files
224            shutil.rmtree(extdefmap_dir, ignore_errors=True)
225
226
227def run_analyzer_parallel(args):
228    """Runs the analyzer against the given compilation database."""
229
230    def exclude(filename, directory):
231        """Return true when any excluded directory prefix the filename."""
232        if not os.path.isabs(filename):
233            # filename is either absolute or relative to directory. Need to turn
234            # it to absolute since 'args.excludes' are absolute paths.
235            filename = os.path.normpath(os.path.join(directory, filename))
236        return any(
237            re.match(r"^" + exclude_directory, filename)
238            for exclude_directory in args.excludes
239        )
240
241    consts = {
242        "clang": args.clang,
243        "output_dir": args.output,
244        "output_format": args.output_format,
245        "output_failures": args.output_failures,
246        "direct_args": analyzer_params(args),
247        "force_debug": args.force_debug,
248        "ctu": get_ctu_config_from_args(args),
249    }
250
251    logging.debug("run analyzer against compilation database")
252    with open(args.cdb, "r") as handle:
253        generator = (
254            dict(cmd, **consts)
255            for cmd in json.load(handle)
256            if not exclude(cmd["file"], cmd["directory"])
257        )
258        # when verbose output requested execute sequentially
259        pool = multiprocessing.Pool(1 if args.verbose > 2 else None)
260        for current in pool.imap_unordered(run, generator):
261            if current is not None:
262                # display error message from the static analyzer
263                for line in current["error_output"]:
264                    logging.info(line.rstrip())
265        pool.close()
266        pool.join()
267
268
269def govern_analyzer_runs(args):
270    """Governs multiple runs in CTU mode or runs once in normal mode."""
271
272    ctu_config = get_ctu_config_from_args(args)
273    # If we do a CTU collect (1st phase) we remove all previous collection
274    # data first.
275    if ctu_config.collect:
276        shutil.rmtree(ctu_config.dir, ignore_errors=True)
277
278    # If the user asked for a collect (1st) and analyze (2nd) phase, we do an
279    # all-in-one run where we deliberately remove collection data before and
280    # also after the run. If the user asks only for a single phase data is
281    # left so multiple analyze runs can use the same data gathered by a single
282    # collection run.
283    if ctu_config.collect and ctu_config.analyze:
284        # CTU strings are coming from args.ctu_dir and extdef_map_cmd,
285        # so we can leave it empty
286        args.ctu_phases = CtuConfig(
287            collect=True, analyze=False, dir="", extdef_map_cmd=""
288        )
289        run_analyzer_parallel(args)
290        merge_ctu_extdef_maps(ctu_config.dir)
291        args.ctu_phases = CtuConfig(
292            collect=False, analyze=True, dir="", extdef_map_cmd=""
293        )
294        run_analyzer_parallel(args)
295        shutil.rmtree(ctu_config.dir, ignore_errors=True)
296    else:
297        # Single runs (collect or analyze) are launched from here.
298        run_analyzer_parallel(args)
299        if ctu_config.collect:
300            merge_ctu_extdef_maps(ctu_config.dir)
301
302
303def setup_environment(args):
304    """Set up environment for build command to interpose compiler wrapper."""
305
306    environment = dict(os.environ)
307    environment.update(wrapper_environment(args))
308    environment.update(
309        {
310            "CC": COMPILER_WRAPPER_CC,
311            "CXX": COMPILER_WRAPPER_CXX,
312            "ANALYZE_BUILD_CLANG": args.clang if need_analyzer(args.build) else "",
313            "ANALYZE_BUILD_REPORT_DIR": args.output,
314            "ANALYZE_BUILD_REPORT_FORMAT": args.output_format,
315            "ANALYZE_BUILD_REPORT_FAILURES": "yes" if args.output_failures else "",
316            "ANALYZE_BUILD_PARAMETERS": " ".join(analyzer_params(args)),
317            "ANALYZE_BUILD_FORCE_DEBUG": "yes" if args.force_debug else "",
318            "ANALYZE_BUILD_CTU": json.dumps(get_ctu_config_from_args(args)),
319        }
320    )
321    return environment
322
323
324@command_entry_point
325def analyze_compiler_wrapper():
326    """Entry point for `analyze-cc` and `analyze-c++` compiler wrappers."""
327
328    return compiler_wrapper(analyze_compiler_wrapper_impl)
329
330
331def analyze_compiler_wrapper_impl(result, execution):
332    """Implements analyzer compiler wrapper functionality."""
333
334    # don't run analyzer when compilation fails. or when it's not requested.
335    if result or not os.getenv("ANALYZE_BUILD_CLANG"):
336        return
337
338    # check is it a compilation?
339    compilation = split_command(execution.cmd)
340    if compilation is None:
341        return
342    # collect the needed parameters from environment, crash when missing
343    parameters = {
344        "clang": os.getenv("ANALYZE_BUILD_CLANG"),
345        "output_dir": os.getenv("ANALYZE_BUILD_REPORT_DIR"),
346        "output_format": os.getenv("ANALYZE_BUILD_REPORT_FORMAT"),
347        "output_failures": os.getenv("ANALYZE_BUILD_REPORT_FAILURES"),
348        "direct_args": os.getenv("ANALYZE_BUILD_PARAMETERS", "").split(" "),
349        "force_debug": os.getenv("ANALYZE_BUILD_FORCE_DEBUG"),
350        "directory": execution.cwd,
351        "command": [execution.cmd[0], "-c"] + compilation.flags,
352        "ctu": get_ctu_config_from_json(os.getenv("ANALYZE_BUILD_CTU")),
353    }
354    # call static analyzer against the compilation
355    for source in compilation.files:
356        parameters.update({"file": source})
357        logging.debug("analyzer parameters %s", parameters)
358        current = run(parameters)
359        # display error message from the static analyzer
360        if current is not None:
361            for line in current["error_output"]:
362                logging.info(line.rstrip())
363
364
365@contextlib.contextmanager
366def report_directory(hint, keep, output_format):
367    """Responsible for the report directory.
368
369    hint -- could specify the parent directory of the output directory.
370    keep -- a boolean value to keep or delete the empty report directory."""
371
372    stamp_format = "scan-build-%Y-%m-%d-%H-%M-%S-%f-"
373    stamp = datetime.datetime.now().strftime(stamp_format)
374    parent_dir = os.path.abspath(hint)
375    if not os.path.exists(parent_dir):
376        os.makedirs(parent_dir)
377    name = tempfile.mkdtemp(prefix=stamp, dir=parent_dir)
378
379    logging.info("Report directory created: %s", name)
380
381    try:
382        yield name
383    finally:
384        args = (name,)
385        if os.listdir(name):
386            if output_format not in ["sarif", "sarif-html"]:  # FIXME:
387                # 'scan-view' currently does not support sarif format.
388                msg = "Run 'scan-view %s' to examine bug reports."
389            elif output_format == "sarif-html":
390                msg = (
391                    "Run 'scan-view %s' to examine bug reports or see "
392                    "merged sarif results at %s/results-merged.sarif."
393                )
394                args = (name, name)
395            else:
396                msg = "View merged sarif results at %s/results-merged.sarif."
397            keep = True
398        else:
399            if keep:
400                msg = "Report directory '%s' contains no report, but kept."
401            else:
402                msg = "Removing directory '%s' because it contains no report."
403        logging.warning(msg, *args)
404
405        if not keep:
406            os.rmdir(name)
407
408
409def analyzer_params(args):
410    """A group of command line arguments can mapped to command
411    line arguments of the analyzer. This method generates those."""
412
413    result = []
414
415    if args.constraints_model:
416        result.append("-analyzer-constraints={0}".format(args.constraints_model))
417    if args.internal_stats:
418        result.append("-analyzer-stats")
419    if args.analyze_headers:
420        result.append("-analyzer-opt-analyze-headers")
421    if args.stats:
422        result.append("-analyzer-checker=debug.Stats")
423    if args.maxloop:
424        result.extend(["-analyzer-max-loop", str(args.maxloop)])
425    if args.output_format:
426        result.append("-analyzer-output={0}".format(args.output_format))
427    if args.analyzer_config:
428        result.extend(["-analyzer-config", args.analyzer_config])
429    if args.verbose >= 4:
430        result.append("-analyzer-display-progress")
431    if args.plugins:
432        result.extend(prefix_with("-load", args.plugins))
433    if args.enable_checker:
434        checkers = ",".join(args.enable_checker)
435        result.extend(["-analyzer-checker", checkers])
436    if args.disable_checker:
437        checkers = ",".join(args.disable_checker)
438        result.extend(["-analyzer-disable-checker", checkers])
439
440    return prefix_with("-Xclang", result)
441
442
443def require(required):
444    """Decorator for checking the required values in state.
445
446    It checks the required attributes in the passed state and stop when
447    any of those is missing."""
448
449    def decorator(function):
450        @functools.wraps(function)
451        def wrapper(*args, **kwargs):
452            for key in required:
453                if key not in args[0]:
454                    raise KeyError(
455                        "{0} not passed to {1}".format(key, function.__name__)
456                    )
457
458            return function(*args, **kwargs)
459
460        return wrapper
461
462    return decorator
463
464
465@require(
466    [
467        "command",  # entry from compilation database
468        "directory",  # entry from compilation database
469        "file",  # entry from compilation database
470        "clang",  # clang executable name (and path)
471        "direct_args",  # arguments from command line
472        "force_debug",  # kill non debug macros
473        "output_dir",  # where generated report files shall go
474        "output_format",  # it's 'plist', 'html', 'plist-html', 'plist-multi-file', 'sarif', or 'sarif-html'
475        "output_failures",  # generate crash reports or not
476        "ctu",
477    ]
478)  # ctu control options
479def run(opts):
480    """Entry point to run (or not) static analyzer against a single entry
481    of the compilation database.
482
483    This complex task is decomposed into smaller methods which are calling
484    each other in chain. If the analysis is not possible the given method
485    just return and break the chain.
486
487    The passed parameter is a python dictionary. Each method first check
488    that the needed parameters received. (This is done by the 'require'
489    decorator. It's like an 'assert' to check the contract between the
490    caller and the called method.)"""
491
492    try:
493        command = opts.pop("command")
494        command = command if isinstance(command, list) else decode(command)
495        logging.debug("Run analyzer against '%s'", command)
496        opts.update(classify_parameters(command))
497
498        return arch_check(opts)
499    except Exception:
500        logging.error("Problem occurred during analysis.", exc_info=1)
501        return None
502
503
504@require(
505    [
506        "clang",
507        "directory",
508        "flags",
509        "file",
510        "output_dir",
511        "language",
512        "error_output",
513        "exit_code",
514    ]
515)
516def report_failure(opts):
517    """Create report when analyzer failed.
518
519    The major report is the preprocessor output. The output filename generated
520    randomly. The compiler output also captured into '.stderr.txt' file.
521    And some more execution context also saved into '.info.txt' file."""
522
523    def extension():
524        """Generate preprocessor file extension."""
525
526        mapping = {"objective-c++": ".mii", "objective-c": ".mi", "c++": ".ii"}
527        return mapping.get(opts["language"], ".i")
528
529    def destination():
530        """Creates failures directory if not exits yet."""
531
532        failures_dir = os.path.join(opts["output_dir"], "failures")
533        if not os.path.isdir(failures_dir):
534            os.makedirs(failures_dir)
535        return failures_dir
536
537    # Classify error type: when Clang terminated by a signal it's a 'Crash'.
538    # (python subprocess Popen.returncode is negative when child terminated
539    # by signal.) Everything else is 'Other Error'.
540    error = "crash" if opts["exit_code"] < 0 else "other_error"
541    # Create preprocessor output file name. (This is blindly following the
542    # Perl implementation.)
543    (handle, name) = tempfile.mkstemp(
544        suffix=extension(), prefix="clang_" + error + "_", dir=destination()
545    )
546    os.close(handle)
547    # Execute Clang again, but run the syntax check only.
548    cwd = opts["directory"]
549    cmd = (
550        [opts["clang"], "-fsyntax-only", "-E"]
551        + opts["flags"]
552        + [opts["file"], "-o", name]
553    )
554    try:
555        cmd = get_arguments(cmd, cwd)
556        run_command(cmd, cwd=cwd)
557    except subprocess.CalledProcessError:
558        pass
559    except ClangErrorException:
560        pass
561    # write general information about the crash
562    with open(name + ".info.txt", "w") as handle:
563        handle.write(opts["file"] + os.linesep)
564        handle.write(error.title().replace("_", " ") + os.linesep)
565        handle.write(" ".join(cmd) + os.linesep)
566        handle.write(" ".join(os.uname()) + os.linesep)
567        handle.write(get_version(opts["clang"]))
568        handle.close()
569    # write the captured output too
570    with open(name + ".stderr.txt", "w") as handle:
571        handle.writelines(opts["error_output"])
572        handle.close()
573
574
575@require(
576    [
577        "clang",
578        "directory",
579        "flags",
580        "direct_args",
581        "file",
582        "output_dir",
583        "output_format",
584    ]
585)
586def run_analyzer(opts, continuation=report_failure):
587    """It assembles the analysis command line and executes it. Capture the
588    output of the analysis and returns with it. If failure reports are
589    requested, it calls the continuation to generate it."""
590
591    def target():
592        """Creates output file name for reports."""
593        if opts["output_format"] in {"plist", "plist-html", "plist-multi-file"}:
594            (handle, name) = tempfile.mkstemp(
595                prefix="report-", suffix=".plist", dir=opts["output_dir"]
596            )
597            os.close(handle)
598            return name
599        elif opts["output_format"] in {"sarif", "sarif-html"}:
600            (handle, name) = tempfile.mkstemp(
601                prefix="result-", suffix=".sarif", dir=opts["output_dir"]
602            )
603            os.close(handle)
604            return name
605        return opts["output_dir"]
606
607    try:
608        cwd = opts["directory"]
609        cmd = get_arguments(
610            [opts["clang"], "--analyze"]
611            + opts["direct_args"]
612            + opts["flags"]
613            + [opts["file"], "-o", target()],
614            cwd,
615        )
616        output = run_command(cmd, cwd=cwd)
617        return {"error_output": output, "exit_code": 0}
618    except subprocess.CalledProcessError as ex:
619        result = {"error_output": ex.output, "exit_code": ex.returncode}
620        if opts.get("output_failures", False):
621            opts.update(result)
622            continuation(opts)
623        return result
624    except ClangErrorException as ex:
625        result = {"error_output": ex.error, "exit_code": 0}
626        if opts.get("output_failures", False):
627            opts.update(result)
628            continuation(opts)
629        return result
630
631
632def extdef_map_list_src_to_ast(extdef_src_list):
633    """Turns textual external definition map list with source files into an
634    external definition map list with ast files."""
635
636    extdef_ast_list = []
637    for extdef_src_txt in extdef_src_list:
638        mangled_name, path = extdef_src_txt.split(" ", 1)
639        # Normalize path on windows as well
640        path = os.path.splitdrive(path)[1]
641        # Make relative path out of absolute
642        path = path[1:] if path[0] == os.sep else path
643        ast_path = os.path.join("ast", path + ".ast")
644        extdef_ast_list.append(mangled_name + " " + ast_path)
645    return extdef_ast_list
646
647
648@require(["clang", "directory", "flags", "direct_args", "file", "ctu"])
649def ctu_collect_phase(opts):
650    """Preprocess source by generating all data needed by CTU analysis."""
651
652    def generate_ast(triple_arch):
653        """Generates ASTs for the current compilation command."""
654
655        args = opts["direct_args"] + opts["flags"]
656        ast_joined_path = os.path.join(
657            opts["ctu"].dir,
658            triple_arch,
659            "ast",
660            os.path.realpath(opts["file"])[1:] + ".ast",
661        )
662        ast_path = os.path.abspath(ast_joined_path)
663        ast_dir = os.path.dirname(ast_path)
664        if not os.path.isdir(ast_dir):
665            try:
666                os.makedirs(ast_dir)
667            except OSError:
668                # In case an other process already created it.
669                pass
670        ast_command = [opts["clang"], "-emit-ast"]
671        ast_command.extend(args)
672        ast_command.append("-w")
673        ast_command.append(opts["file"])
674        ast_command.append("-o")
675        ast_command.append(ast_path)
676        logging.debug("Generating AST using '%s'", ast_command)
677        run_command(ast_command, cwd=opts["directory"])
678
679    def map_extdefs(triple_arch):
680        """Generate external definition map file for the current source."""
681
682        args = opts["direct_args"] + opts["flags"]
683        extdefmap_command = [opts["ctu"].extdef_map_cmd]
684        extdefmap_command.append(opts["file"])
685        extdefmap_command.append("--")
686        extdefmap_command.extend(args)
687        logging.debug(
688            "Generating external definition map using '%s'", extdefmap_command
689        )
690        extdef_src_list = run_command(extdefmap_command, cwd=opts["directory"])
691        extdef_ast_list = extdef_map_list_src_to_ast(extdef_src_list)
692        extern_defs_map_folder = os.path.join(
693            opts["ctu"].dir, triple_arch, CTU_TEMP_DEFMAP_FOLDER
694        )
695        if not os.path.isdir(extern_defs_map_folder):
696            try:
697                os.makedirs(extern_defs_map_folder)
698            except OSError:
699                # In case an other process already created it.
700                pass
701        if extdef_ast_list:
702            with tempfile.NamedTemporaryFile(
703                mode="w", dir=extern_defs_map_folder, delete=False
704            ) as out_file:
705                out_file.write("\n".join(extdef_ast_list) + "\n")
706
707    cwd = opts["directory"]
708    cmd = (
709        [opts["clang"], "--analyze"]
710        + opts["direct_args"]
711        + opts["flags"]
712        + [opts["file"]]
713    )
714    triple_arch = get_triple_arch(cmd, cwd)
715    generate_ast(triple_arch)
716    map_extdefs(triple_arch)
717
718
719@require(["ctu"])
720def dispatch_ctu(opts, continuation=run_analyzer):
721    """Execute only one phase of 2 phases of CTU if needed."""
722
723    ctu_config = opts["ctu"]
724
725    if ctu_config.collect or ctu_config.analyze:
726        assert ctu_config.collect != ctu_config.analyze
727        if ctu_config.collect:
728            return ctu_collect_phase(opts)
729        if ctu_config.analyze:
730            cwd = opts["directory"]
731            cmd = (
732                [opts["clang"], "--analyze"]
733                + opts["direct_args"]
734                + opts["flags"]
735                + [opts["file"]]
736            )
737            triarch = get_triple_arch(cmd, cwd)
738            ctu_options = [
739                "ctu-dir=" + os.path.join(ctu_config.dir, triarch),
740                "experimental-enable-naive-ctu-analysis=true",
741            ]
742            analyzer_options = prefix_with("-analyzer-config", ctu_options)
743            direct_options = prefix_with("-Xanalyzer", analyzer_options)
744            opts["direct_args"].extend(direct_options)
745
746    return continuation(opts)
747
748
749@require(["flags", "force_debug"])
750def filter_debug_flags(opts, continuation=dispatch_ctu):
751    """Filter out nondebug macros when requested."""
752
753    if opts.pop("force_debug"):
754        # lazy implementation just append an undefine macro at the end
755        opts.update({"flags": opts["flags"] + ["-UNDEBUG"]})
756
757    return continuation(opts)
758
759
760@require(["language", "compiler", "file", "flags"])
761def language_check(opts, continuation=filter_debug_flags):
762    """Find out the language from command line parameters or file name
763    extension. The decision also influenced by the compiler invocation."""
764
765    accepted = frozenset(
766        {
767            "c",
768            "c++",
769            "objective-c",
770            "objective-c++",
771            "c-cpp-output",
772            "c++-cpp-output",
773            "objective-c-cpp-output",
774        }
775    )
776
777    # language can be given as a parameter...
778    language = opts.pop("language")
779    compiler = opts.pop("compiler")
780    # ... or find out from source file extension
781    if language is None and compiler is not None:
782        language = classify_source(opts["file"], compiler == "c")
783
784    if language is None:
785        logging.debug("skip analysis, language not known")
786        return None
787    elif language not in accepted:
788        logging.debug("skip analysis, language not supported")
789        return None
790    else:
791        logging.debug("analysis, language: %s", language)
792        opts.update({"language": language, "flags": ["-x", language] + opts["flags"]})
793        return continuation(opts)
794
795
796@require(["arch_list", "flags"])
797def arch_check(opts, continuation=language_check):
798    """Do run analyzer through one of the given architectures."""
799
800    disabled = frozenset({"ppc", "ppc64"})
801
802    received_list = opts.pop("arch_list")
803    if received_list:
804        # filter out disabled architectures and -arch switches
805        filtered_list = [a for a in received_list if a not in disabled]
806        if filtered_list:
807            # There should be only one arch given (or the same multiple
808            # times). If there are multiple arch are given and are not
809            # the same, those should not change the pre-processing step.
810            # But that's the only pass we have before run the analyzer.
811            current = filtered_list.pop()
812            logging.debug("analysis, on arch: %s", current)
813
814            opts.update({"flags": ["-arch", current] + opts["flags"]})
815            return continuation(opts)
816        else:
817            logging.debug("skip analysis, found not supported arch")
818            return None
819    else:
820        logging.debug("analysis, on default arch")
821        return continuation(opts)
822
823
824# To have good results from static analyzer certain compiler options shall be
825# omitted. The compiler flag filtering only affects the static analyzer run.
826#
827# Keys are the option name, value number of options to skip
828IGNORED_FLAGS = {
829    "-c": 0,  # compile option will be overwritten
830    "-fsyntax-only": 0,  # static analyzer option will be overwritten
831    "-o": 1,  # will set up own output file
832    # flags below are inherited from the perl implementation.
833    "-g": 0,
834    "-save-temps": 0,
835    "-install_name": 1,
836    "-exported_symbols_list": 1,
837    "-current_version": 1,
838    "-compatibility_version": 1,
839    "-init": 1,
840    "-e": 1,
841    "-seg1addr": 1,
842    "-bundle_loader": 1,
843    "-multiply_defined": 1,
844    "-sectorder": 3,
845    "--param": 1,
846    "--serialize-diagnostics": 1,
847}
848
849
850def classify_parameters(command):
851    """Prepare compiler flags (filters some and add others) and take out
852    language (-x) and architecture (-arch) flags for future processing."""
853
854    result = {
855        "flags": [],  # the filtered compiler flags
856        "arch_list": [],  # list of architecture flags
857        "language": None,  # compilation language, None, if not specified
858        "compiler": compiler_language(command),  # 'c' or 'c++'
859    }
860
861    # iterate on the compile options
862    args = iter(command[1:])
863    for arg in args:
864        # take arch flags into a separate basket
865        if arg == "-arch":
866            result["arch_list"].append(next(args))
867        # take language
868        elif arg == "-x":
869            result["language"] = next(args)
870        # parameters which looks source file are not flags
871        elif re.match(r"^[^-].+", arg) and classify_source(arg):
872            pass
873        # ignore some flags
874        elif arg in IGNORED_FLAGS:
875            count = IGNORED_FLAGS[arg]
876            for _ in range(count):
877                next(args)
878        # we don't care about extra warnings, but we should suppress ones
879        # that we don't want to see.
880        elif re.match(r"^-W.+", arg) and not re.match(r"^-Wno-.+", arg):
881            pass
882        # and consider everything else as compilation flag.
883        else:
884            result["flags"].append(arg)
885
886    return result
887