xref: /aosp_15_r20/external/pigweed/pw_ide/py/pw_ide/cpp.py (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1# Copyright 2022 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Configure C/C++ IDE support for Pigweed projects.
15
16We support C/C++ code analysis via ``clangd``, or other language servers that
17are compatible with the ``clangd`` compilation database format.
18
19While clangd can work well out of the box for typical C++ codebases, some work
20is required to coax it to work for embedded projects. In particular, Pigweed
21projects use multiple toolchains within a distinct environment, and almost
22always define multiple targets. This means compilation units are likely have
23multiple compile commands and the toolchain executables are unlikely to be in
24your path. ``clangd`` is not equipped to deal with this out of the box. We
25handle this by:
26
27- Processing the compilation database produced by the build system into
28  multiple internally-consistent compilation databases, one for each target
29  (where a "target" is a particular build for a particular system using a
30  particular toolchain).
31
32- Creating unambiguous paths to toolchain drivers to ensure the right toolchain
33  is used and that clangd knows where to find that toolchain's system headers.
34
35- Providing tools for working with several compilation databases that are
36  spiritually similar to tools like ``pyenv``, ``rbenv``, etc.
37
38In short, we take the probably-broken compilation database that the build system
39generates, process it into several not-broken compilation databases in the
40``pw_ide`` working directory, and provide a stable symlink that points to the
41selected active target's compliation database. If ``clangd`` is configured to
42point at the symlink and is set up with the right paths, you'll get code
43intelligence.
44"""
45
46from __future__ import annotations
47
48from contextlib import contextmanager
49from dataclasses import asdict, dataclass, field
50import functools
51import glob
52from hashlib import sha1
53from io import TextIOBase
54import json
55import logging
56from pathlib import Path
57import platform
58import random
59import re
60import sys
61from typing import (
62    Any,
63    cast,
64    Generator,
65    Iterator,
66    TypedDict,
67)
68
69from pw_cli.env import pigweed_environment
70
71from pw_ide.exceptions import (
72    BadCompDbException,
73    InvalidTargetException,
74    MissingCompDbException,
75    UnresolvablePathException,
76)
77
78from pw_ide.settings import PigweedIdeSettings
79from pw_ide.symlinks import set_symlink
80
81_LOG = logging.getLogger(__package__)
82env = pigweed_environment()
83
84COMPDB_FILE_NAME = 'compile_commands.json'
85STABLE_CLANGD_DIR_NAME = '.stable'
86_CPP_IDE_FEATURES_DATA_FILE = 'pw_ide_state.json'
87_UNSUPPORTED_TOOLCHAIN_EXECUTABLES = ('_pw_invalid', 'python')
88_SUPPORTED_WRAPPER_EXECUTABLES = ('ccache',)
89
90
91@dataclass(frozen=True)
92class CppIdeFeaturesTarget:
93    """Data pertaining to a C++ code analysis target."""
94
95    name: str
96    compdb_file_path: Path
97    num_commands: int
98    is_enabled: bool = True
99
100    def __str__(self) -> str:
101        return self.name
102
103    def serialized(self) -> dict[str, Any]:
104        return {
105            **asdict(self),
106            **{
107                'compdb_file_path': str(self.compdb_file_path),
108            },
109        }
110
111    @classmethod
112    def deserialize(cls, **data) -> CppIdeFeaturesTarget:
113        return cls(
114            **{
115                **data,
116                **{
117                    'compdb_file_path': Path(data['compdb_file_path']),
118                },
119            }
120        )
121
122
123CppCompilationDatabaseFileHashes = dict[Path, str]
124CppCompilationDatabaseFileTargets = dict[Path, list[CppIdeFeaturesTarget]]
125
126
127@dataclass
128class CppIdeFeaturesData:
129    """State data about C++ code analysis features."""
130
131    targets: dict[str, CppIdeFeaturesTarget] = field(default_factory=dict)
132    current_target: CppIdeFeaturesTarget | None = None
133    compdb_hashes: CppCompilationDatabaseFileHashes = field(
134        default_factory=dict
135    )
136    compdb_targets: CppCompilationDatabaseFileTargets = field(
137        default_factory=dict
138    )
139
140    def serialized(self) -> dict[str, Any]:
141        return {
142            'current_target': self.current_target.serialized()
143            if self.current_target is not None
144            else None,
145            'targets': {
146                name: target_data.serialized()
147                for name, target_data in self.targets.items()
148            },
149            'compdb_hashes': {
150                str(path): hash_str
151                for path, hash_str in self.compdb_hashes.items()
152            },
153            'compdb_targets': {
154                str(path): [
155                    target_data.serialized() for target_data in target_data_list
156                ]
157                for path, target_data_list in self.compdb_targets.items()
158            },
159        }
160
161    @classmethod
162    def deserialize(cls, **data) -> CppIdeFeaturesData:
163        return cls(
164            current_target=CppIdeFeaturesTarget.deserialize(
165                **data['current_target']
166            )
167            if data['current_target'] is not None
168            else None,
169            targets={
170                name: CppIdeFeaturesTarget.deserialize(**target_data)
171                for name, target_data in data['targets'].items()
172            },
173            compdb_hashes={
174                Path(path_str): hash_str
175                for path_str, hash_str in data['compdb_hashes'].items()
176            },
177            compdb_targets={
178                Path(path_str): [
179                    CppIdeFeaturesTarget.deserialize(**target_data)
180                    for target_data in target_data_list
181                ]
182                for path_str, target_data_list in data['compdb_targets'].items()
183            },
184        )
185
186
187class CppIdeFeaturesState:
188    """Container for IDE features state data."""
189
190    def __init__(self, pw_ide_settings: PigweedIdeSettings) -> None:
191        self.settings = pw_ide_settings
192
193    def __len__(self) -> int:
194        return len(self.targets)
195
196    def __getitem__(self, index: str) -> CppIdeFeaturesTarget:
197        return self.targets[index]
198
199    def __iter__(self) -> Generator[CppIdeFeaturesTarget, None, None]:
200        return (target for target in self.targets.values())
201
202    @property
203    def stable_target_link(self) -> Path:
204        return self.settings.working_dir / STABLE_CLANGD_DIR_NAME
205
206    @contextmanager
207    def _file(self) -> Generator[CppIdeFeaturesData, None, None]:
208        """A simple key-value store for state data."""
209        file_path = self.settings.working_dir / _CPP_IDE_FEATURES_DATA_FILE
210
211        try:
212            with open(file_path) as file:
213                data = CppIdeFeaturesData.deserialize(**json.load(file))
214        except (FileNotFoundError, json.decoder.JSONDecodeError):
215            data = CppIdeFeaturesData()
216
217        yield data
218
219        with open(file_path, 'w') as file:
220            json.dump(data.serialized(), file, indent=2)
221
222    @property
223    def targets(self) -> dict[str, CppIdeFeaturesTarget]:
224        with self._file() as state:
225            exclude_predicate = (
226                lambda x: x not in self.settings.targets_exclude
227                if len(self.settings.targets_exclude) > 0
228                else lambda x: True
229            )
230            include_predicate = (
231                lambda x: x in self.settings.targets_include
232                if len(self.settings.targets_include) > 0
233                else lambda x: True
234            )
235
236            return {
237                name: target
238                for (name, target) in state.targets.items()
239                if exclude_predicate(name) and include_predicate(name)
240            }
241
242    @targets.setter
243    def targets(self, new_targets: dict[str, CppIdeFeaturesTarget]) -> None:
244        with self._file() as state:
245            state.targets = new_targets
246
247    @property
248    def current_target(self) -> CppIdeFeaturesTarget | None:
249        with self._file() as state:
250            return state.current_target
251
252    @current_target.setter
253    def current_target(
254        self, new_current_target: str | CppIdeFeaturesTarget | None
255    ) -> None:
256        with self._file() as state:
257            if new_current_target is None:
258                state.current_target = None
259            else:
260                if isinstance(new_current_target, CppIdeFeaturesTarget):
261                    name = new_current_target.name
262                    new_current_target_inst = new_current_target
263                else:
264                    name = new_current_target
265
266                    try:
267                        new_current_target_inst = state.targets[name]
268                    except KeyError:
269                        raise InvalidTargetException
270
271                if not new_current_target_inst.compdb_file_path.exists():
272                    raise MissingCompDbException
273
274                set_symlink(
275                    new_current_target_inst.compdb_file_path.parent,
276                    self.stable_target_link,
277                )
278
279                state.current_target = state.targets[name]
280
281    @property
282    def max_commands_target(self) -> CppIdeFeaturesTarget | None:
283        with self._file() as state:
284            if len(state.targets) == 0:
285                return None
286
287            max_commands_target_name = sorted(
288                [
289                    (name, target.num_commands)
290                    for name, target in state.targets.items()
291                ],
292                key=lambda x: x[1],
293                reverse=True,
294            )[0][0]
295
296            return state.targets[max_commands_target_name]
297
298    @property
299    def compdb_hashes(self) -> CppCompilationDatabaseFileHashes:
300        with self._file() as state:
301            return state.compdb_hashes
302
303    @compdb_hashes.setter
304    def compdb_hashes(
305        self, new_compdb_hashes: CppCompilationDatabaseFileHashes
306    ) -> None:
307        with self._file() as state:
308            state.compdb_hashes = new_compdb_hashes
309
310    @property
311    def compdb_targets(self) -> CppCompilationDatabaseFileTargets:
312        with self._file() as state:
313            return state.compdb_targets
314
315    @compdb_targets.setter
316    def compdb_targets(
317        self, new_compdb_targets: CppCompilationDatabaseFileTargets
318    ) -> None:
319        with self._file() as state:
320            state.compdb_targets = new_compdb_targets
321
322
323def path_to_executable(
324    exe: str,
325    *,
326    default_path: Path | None = None,
327    path_globs: list[str] | None = None,
328    strict: bool = False,
329) -> Path | None:
330    """Return the path to a compiler executable.
331
332    In a ``clang`` compile command, the executable may or may not include a
333    path. For example:
334
335    .. code-block:: none
336
337       /usr/bin/clang      <- includes a path
338       ../path/to/my/clang <- includes a path
339       clang               <- doesn't include a path
340
341    If it includes a path, then ``clangd`` will have no problem finding the
342    driver, so we can simply return the path. If the executable *doesn't*
343    include a path, then ``clangd`` will search ``$PATH``, and may not find the
344    intended driver unless you actually want the default system toolchain or
345    Pigweed paths have been added to ``$PATH``. So this function provides two
346    options for resolving those ambiguous paths:
347
348    - Provide a default path, and all executables without a path will be
349      re-written with a path within the default path.
350    - Provide the a set of globs that will be used to search for the executable,
351      which will normally be the query driver globs used with clangd.
352
353    By default, if neither of these options is chosen, or if the executable
354    cannot be found within the provided globs, the pathless executable that was
355    provided will be returned, and clangd will resort to searching $PATH. If you
356    instead pass ``strict=True``, this will raise an exception if an unambiguous
357    path cannot be constructed.
358
359    This function only tries to ensure that all executables have a path to
360    eliminate ambiguity. A couple of important things to keep in mind:
361
362    - This doesn't guarantee that the path exists or an executable actually
363      exists at the path. It only ensures that some path is provided to an
364      executable.
365    - An executable being present at the indicated path doesn't guarantee that
366      it will work flawlessly for clangd code analysis. The clangd
367      ``--query-driver`` argument needs to include a path to this executable in
368      order for its bundled headers to be resolved correctly.
369
370    This function also filters out invalid or unsupported drivers. For example,
371    build systems will sometimes naively include build steps for Python or other
372    languages in the compilation database, which are not usable with clangd.
373    As a result, this function has four possible end states:
374
375    - It returns a path with an executable that can be used as a ``clangd``
376      driver.
377    - It returns ``None``, meaning the compile command was invalid.
378    - It returns the same string that was provided (as a ``Path``), if a path
379      couldn't be resolved and ``strict=False``.
380    - It raises an ``UnresolvablePathException`` if the executable cannot be
381      placed in an unambiguous path and ``strict=True``.
382    """
383    maybe_path = Path(exe)
384
385    # We were give an empty string, not a path. Not a valid command.
386    if len(maybe_path.parts) == 0:
387        _LOG.debug("Invalid executable path. The path was an empty string.")
388        return None
389
390    # Determine if the executable name matches unsupported drivers.
391    is_supported_driver = True
392
393    for unsupported_executable in _UNSUPPORTED_TOOLCHAIN_EXECUTABLES:
394        if unsupported_executable in maybe_path.name:
395            is_supported_driver = False
396
397    if not is_supported_driver:
398        _LOG.debug(
399            "Invalid executable path. This is not a supported driver: %s", exe
400        )
401        return None
402
403    # Now, ensure the executable has a path.
404
405    # This is either a relative or absolute path -- return it.
406    if len(maybe_path.parts) > 1:
407        return maybe_path
408
409    # If we got here, there's only one "part", so we assume it's an executable
410    # without a path. This logic doesn't work with a path like `./exe` since
411    # that also yields only one part. So currently this breaks if you actually
412    # have your compiler executable in your root build directory, which is
413    # (hopefully) very rare.
414
415    # If we got a default path, use it.
416    if default_path is not None:
417        return default_path / maybe_path
418
419    # Otherwise, try to find the executable within the query driver globs.
420    # Note that unlike the previous paths, this path will only succeed if an
421    # executable actually exists somewhere in the query driver globs.
422    if path_globs is not None:
423        for path_glob in path_globs:
424            for path_str in glob.iglob(path_glob):
425                path = Path(path_str)
426                if path.name == maybe_path.name:
427                    return path.absolute()
428
429    if strict:
430        raise UnresolvablePathException(
431            f'Cannot place {exe} in an unambiguous path!'
432        )
433
434    return maybe_path
435
436
437def command_parts(command: str) -> tuple[str | None, str, list[str]]:
438    """Return the executable string and the rest of the command tokens.
439
440    If the command contains a prefixed wrapper like `ccache`, it will be
441    extracted separately. So the return value contains:
442        (wrapper, compiler executable, all other tokens)
443    """
444    parts = command.split()
445    curr = ''
446    wrapper = None
447
448    try:
449        curr = parts.pop(0)
450    except IndexError:
451        return (None, curr, [])
452
453    if curr in _SUPPORTED_WRAPPER_EXECUTABLES:
454        wrapper = curr
455
456        while curr := parts.pop(0):
457            # This is very `ccache`-centric. It will work for other wrappers
458            # that use KEY=VALUE-style options or no options at all, but will
459            # not work for other cases.
460            if re.fullmatch(r'(.*)=(.*)', curr):
461                wrapper = f'{wrapper} {curr}'
462            else:
463                break
464
465    return (wrapper, curr, parts)
466
467
468# This is a clumsy way to express optional keys, which is not directly
469# supported in TypedDicts right now.
470# TODO(chadnorvell): Use `NotRequired` when we support Python 3.11.
471class BaseCppCompileCommandDict(TypedDict):
472    file: str
473    directory: str
474    output: str | None
475
476
477class CppCompileCommandDictWithCommand(BaseCppCompileCommandDict):
478    command: str
479
480
481class CppCompileCommandDictWithArguments(BaseCppCompileCommandDict):
482    arguments: list[str]
483
484
485CppCompileCommandDict = (
486    CppCompileCommandDictWithCommand | CppCompileCommandDictWithArguments
487)
488
489
490class CppCompileCommand:
491    """A representation of a clang compilation database compile command.
492
493    See: https://clang.llvm.org/docs/JSONCompilationDatabase.html
494    """
495
496    def __init__(
497        self,
498        file: str,
499        directory: str,
500        command: str | None = None,
501        arguments: list[str] | None = None,
502        output: str | None = None,
503    ) -> None:
504        # Per the spec, either one of these two must be present. clangd seems
505        # to prefer "arguments" when both are present.
506        if command is None and arguments is None:
507            raise TypeError(
508                'A compile command requires either \'command\' '
509                'or \'arguments\'.'
510            )
511
512        if command is None:
513            raise TypeError(
514                'Compile commands without \'command\' ' 'are not supported yet.'
515            )
516
517        self._command = command
518        self._arguments = arguments
519        self._file = file
520        self._directory = directory
521
522        _, executable, tokens = command_parts(command)
523        self._executable_path = Path(executable)
524        self._inferred_output: str | None = None
525
526        try:
527            # Find the output argument and grab its value.
528            output_flag_idx = tokens.index('-o')
529            self._inferred_output = tokens[output_flag_idx + 1]
530        except ValueError:
531            # No -o found, probably not a C/C++ compile command.
532            self._inferred_output = None
533        except IndexError:
534            # It has an -o but no argument after it.
535            raise TypeError(
536                'Failed to load compile command with no output argument!'
537            )
538
539        self._provided_output = output
540        self.target: str | None = None
541
542    @property
543    def file(self) -> str:
544        return self._file
545
546    @property
547    def directory(self) -> str:
548        return self._directory
549
550    @property
551    def command(self) -> str | None:
552        return self._command
553
554    @property
555    def arguments(self) -> list[str] | None:
556        return self._arguments
557
558    @property
559    def output(self) -> str | None:
560        # We're ignoring provided output values for now.
561        return self._inferred_output
562
563    @property
564    def output_path(self) -> Path | None:
565        if self.output is None:
566            return None
567
568        return Path(self.directory) / Path(self.output)
569
570    @property
571    def executable_path(self) -> Path:
572        return self._executable_path
573
574    @property
575    def executable_name(self) -> str:
576        return self.executable_path.name
577
578    @classmethod
579    def from_dict(
580        cls, compile_command_dict: dict[str, Any]
581    ) -> CppCompileCommand:
582        return cls(
583            # We want to let possible Nones through to raise at runtime.
584            file=cast(str, compile_command_dict.get('file')),
585            directory=cast(str, compile_command_dict.get('directory')),
586            command=compile_command_dict.get('command'),
587            arguments=compile_command_dict.get('arguments'),
588            output=compile_command_dict.get('output'),
589        )
590
591    @classmethod
592    def try_from_dict(
593        cls, compile_command_dict: dict[str, Any]
594    ) -> CppCompileCommand | None:
595        try:
596            return cls.from_dict(compile_command_dict)
597        except TypeError:
598            return None
599
600    def process(
601        self,
602        *,
603        default_path: Path | None = None,
604        path_globs: list[str] | None = None,
605        strict: bool = False,
606    ) -> CppCompileCommand | None:
607        """Process a compile command.
608
609        At minimum, a compile command from a clang compilation database needs to
610        be correlated with its target, and this method returns the target name
611        with the compile command. But it also cleans up other things we need for
612        reliable code intelligence:
613
614        - Some targets may not be valid C/C++ compile commands. For example,
615          some build systems will naively include build steps for Python or for
616          linting commands. We want to filter those out.
617
618        - Some compile commands don't provide a path to the compiler executable
619          (referred to by clang as the "driver"). In that case, clangd is very
620          unlikely to find the executable unless it happens to be in ``$PATH``.
621          The ``--query-driver`` argument to ``clangd`` allowlists
622          executables/drivers for use its use, but clangd doesn't use it to
623          resolve ambiguous paths. We bridge that gap here. Any executable
624          without a path will be either placed in the provided default path or
625          searched for in the query driver globs and be replaced with a path to
626          the executable.
627        """
628        if self.command is None:
629            raise NotImplementedError(
630                'Compile commands without \'command\' ' 'are not supported yet.'
631            )
632
633        wrapper, executable_str, tokens = command_parts(self.command)
634        executable_path = path_to_executable(
635            executable_str,
636            default_path=default_path,
637            path_globs=path_globs,
638            strict=strict,
639        )
640
641        if executable_path is None:
642            _LOG.debug(
643                "Compile command rejected due to bad executable path: %s",
644                self.command,
645            )
646            return None
647
648        if self.output is None:
649            _LOG.debug(
650                "Compile command rejected due to no output property: %s",
651                self.command,
652            )
653            return None
654
655        # TODO(chadnorvell): Some commands include the executable multiple
656        # times. It's not clear if that affects clangd.
657        new_command = f'{str(executable_path)} {" ".join(tokens)}'
658
659        if wrapper is not None:
660            new_command = f'{wrapper} {new_command}'
661
662        return self.__class__(
663            file=self.file,
664            directory=self.directory,
665            command=new_command,
666            arguments=None,
667            output=self.output,
668        )
669
670    def as_dict(self) -> CppCompileCommandDict:
671        base_compile_command_dict: BaseCppCompileCommandDict = {
672            'file': self.file,
673            'directory': self.directory,
674            'output': self.output,
675        }
676
677        # TODO(chadnorvell): Support "arguments". The spec requires that a
678        # We don't support "arguments" at all right now. When we do, we should
679        # preferentially include "arguments" only, and only include "command"
680        # when "arguments" is not present.
681        if self.command is not None:
682            compile_command_dict: CppCompileCommandDictWithCommand = {
683                'command': self.command,
684                # Unfortunately dict spreading doesn't work with mypy.
685                'file': base_compile_command_dict['file'],
686                'directory': base_compile_command_dict['directory'],
687                'output': base_compile_command_dict['output'],
688            }
689        else:
690            raise NotImplementedError(
691                'Compile commands without \'command\' ' 'are not supported yet.'
692            )
693
694        return compile_command_dict
695
696
697def _path_nearest_parent(path1: Path, path2: Path) -> Path:
698    """Get the closest common parent of two paths."""
699    # This is the Python < 3.9 version of: if path2.is_relative_to(path1)
700    try:
701        path2.relative_to(path1)
702        return path1
703    except ValueError:
704        pass
705
706    if path1 == path2:
707        return path1
708
709    if len(path1.parts) > len(path2.parts):
710        return _path_nearest_parent(path1.parent, path2)
711
712    if len(path1.parts) < len(path2.parts):
713        return _path_nearest_parent(path1, path2.parent)
714
715    return _path_nearest_parent(path1.parent, path2.parent)
716
717
718def _infer_target_pos(target_glob: str) -> list[int]:
719    """Infer the position of the target in a compilation unit artifact path."""
720    tokens = Path(target_glob).parts
721    positions = []
722
723    for pos, token in enumerate(tokens):
724        if token == '?':
725            positions.append(pos)
726        elif token == '*':
727            pass
728        else:
729            raise ValueError(f'Invalid target inference token: {token}')
730
731    return positions
732
733
734def infer_target(target_glob: str, root: Path, output_path: Path) -> str | None:
735    """Infer a target from a compilation unit artifact path.
736
737    See the documentation for ``PigweedIdeSettings.target_inference``."""
738    target_pos = _infer_target_pos(target_glob)
739
740    if len(target_pos) == 0:
741        return None
742
743    # Depending on the build system and project configuration, the target name
744    # may be in the "directory" or the "output" of the compile command. So we
745    # need to construct the full path that combines both and use that to search
746    # for the target.
747    try:
748        # The path used for target inference is the path relative to the root
749        # dir. If this artifact is a direct child of the root, this just
750        # truncates the root off of its path.
751        subpath = output_path.relative_to(root)
752    except ValueError:
753        # If the output path isn't a child path of the root dir, find the
754        # closest shared parent dir and use that as the root for truncation.
755        common_parent = _path_nearest_parent(root, output_path)
756        subpath = output_path.relative_to(common_parent)
757
758    return '_'.join([subpath.parts[pos] for pos in target_pos])
759
760
761LoadableToCppCompilationDatabase = (
762    list[dict[str, Any]] | str | TextIOBase | Path
763)
764
765
766class CppCompilationDatabase:
767    """A representation of a clang compilation database.
768
769    See: https://clang.llvm.org/docs/JSONCompilationDatabase.html
770    """
771
772    def __init__(
773        self,
774        root_dir: Path | None = None,
775        file_path: Path | None = None,
776        source_file_path: Path | None = None,
777        target_inference: str | None = None,
778    ) -> None:
779        self._db: list[CppCompileCommand] = []
780        self.file_path: Path | None = file_path
781        self.source_file_path: Path | None = source_file_path
782        self.source_file_hash: str | None = None
783
784        if target_inference is None:
785            self.target_inference = PigweedIdeSettings().target_inference
786        else:
787            self.target_inference = target_inference
788
789        # Only compilation databases that are loaded will have this, and it
790        # contains the root directory of the build that the compilation
791        # database is based on. Processed compilation databases will not have
792        # a value here.
793        self._root_dir = root_dir
794
795    def __len__(self) -> int:
796        return len(self._db)
797
798    def __getitem__(self, index: int) -> CppCompileCommand:
799        return self._db[index]
800
801    def __iter__(self) -> Generator[CppCompileCommand, None, None]:
802        return (compile_command for compile_command in self._db)
803
804    @property
805    def file_hash(self) -> str:
806        # If this compilation database did not originate from a file, return a
807        # hash that is almost certainly not going to match any other hash; these
808        # sources are not persistent, so they cannot be compared.
809        if self.file_path is None:
810            return '%032x' % random.getrandbits(160)
811
812        data = self.file_path.read_text().encode('utf-8')
813        return sha1(data).hexdigest()
814
815    def add(self, *commands: CppCompileCommand):
816        """Add compile commands to the compilation database."""
817        self._db.extend(commands)
818
819    def merge(self, other: CppCompilationDatabase) -> None:
820        """Merge values from another database into this one.
821
822        This will not overwrite a compile command that already exists for a
823        particular file.
824        """
825        self_dict = {c.file: c for c in self._db}
826
827        for compile_command in other:
828            if compile_command.file not in self_dict:
829                self_dict[compile_command.file] = compile_command
830
831        self._db = list(self_dict.values())
832
833    def as_dicts(self) -> list[CppCompileCommandDict]:
834        return [compile_command.as_dict() for compile_command in self._db]
835
836    def to_json(self) -> str:
837        """Output the compilation database to a JSON string."""
838
839        return json.dumps(self.as_dicts(), indent=2, sort_keys=True)
840
841    def to_file(self, path: Path):
842        """Write the compilation database to a JSON file."""
843        path.parent.mkdir(parents=True, exist_ok=True)
844
845        with open(path, 'w') as file:
846            json.dump(self.as_dicts(), file, indent=2, sort_keys=True)
847
848    @classmethod
849    def load(
850        cls,
851        compdb_to_load: LoadableToCppCompilationDatabase,
852        root_dir: Path,
853        target_inference: str | None = None,
854    ) -> CppCompilationDatabase:
855        """Load a compilation database.
856
857        You can provide a JSON file handle or path, a JSON string, or a native
858        Python data structure that matches the format (list of dicts).
859        """
860        db_as_dicts: list[dict[str, Any]]
861        file_path = None
862
863        if isinstance(compdb_to_load, list):
864            # The provided data is already in the format we want it to be in,
865            # probably, and if it isn't we'll find out when we try to
866            # instantiate the database.
867            db_as_dicts = compdb_to_load
868        else:
869            if isinstance(compdb_to_load, Path):
870                # The provided data is a path to a file, presumably JSON.
871                try:
872                    file_path = compdb_to_load
873                    compdb_data = compdb_to_load.read_text()
874                except FileNotFoundError:
875                    raise MissingCompDbException()
876            elif isinstance(compdb_to_load, TextIOBase):
877                # The provided data is a file handle, presumably JSON.
878                file_path = Path(compdb_to_load.name)  # type: ignore
879                compdb_data = compdb_to_load.read()
880            elif isinstance(compdb_to_load, str):
881                # The provided data is a a string, presumably JSON.
882                compdb_data = compdb_to_load
883
884            db_as_dicts = json.loads(compdb_data)
885
886        compdb = cls(
887            root_dir=root_dir,
888            file_path=file_path,
889            target_inference=target_inference,
890        )
891
892        try:
893            compdb.add(
894                *[
895                    compile_command
896                    for compile_command_dict in db_as_dicts
897                    if (
898                        compile_command := CppCompileCommand.try_from_dict(
899                            compile_command_dict
900                        )
901                    )
902                    is not None
903                ]
904            )
905        except TypeError:
906            # This will arise if db_as_dicts is not actually a list of dicts
907            raise BadCompDbException()
908
909        return compdb
910
911    def process(
912        self,
913        settings: PigweedIdeSettings,
914        *,
915        default_path: Path | None = None,
916        path_globs: list[str] | None = None,
917        strict: bool = False,
918        always_output_new: bool = False,
919    ) -> CppCompilationDatabasesMap | None:
920        """Process a ``clangd`` compilation database file.
921
922        Given a clang compilation database that may have commands for multiple
923        valid or invalid targets/toolchains, keep only the valid compile
924        commands and store them in target-specific compilation databases.
925
926        If this finds that the processed file is functionally identical to the
927        input file (meaning that the input file did not require processing to
928        be used successfully with ``clangd``), then it will return ``None``,
929        indicating that the original file should be used. This behavior can be
930        overridden by setting ``always_output_new``, which will ensure that a
931        new compilation database is always written to the working directory and
932        original compilation databases outside the working directory are never
933        made available for code intelligence.
934        """
935        if self._root_dir is None:
936            raise ValueError(
937                'Can only process a compilation database that '
938                'contains a root build directory, usually '
939                'specified when loading the file. Are you '
940                'trying to process an already-processed '
941                'compilation database?'
942            )
943
944        clean_compdbs = CppCompilationDatabasesMap(settings)
945
946        # Do processing, segregate processed commands into separate databases
947        # for each target.
948        for compile_command in self:
949            processed_command = compile_command.process(
950                default_path=default_path, path_globs=path_globs, strict=strict
951            )
952
953            if (
954                processed_command is not None
955                and processed_command.output_path is not None
956            ):
957                target = infer_target(
958                    self.target_inference,
959                    self._root_dir,
960                    processed_command.output_path,
961                )
962
963                target = cast(str, target)
964                processed_command.target = target
965                clean_compdbs[target].add(processed_command)
966
967                if clean_compdbs[target].source_file_path is None:
968                    clean_compdbs[target].source_file_path = self.file_path
969                    clean_compdbs[target].source_file_hash = self.file_hash
970
971        # TODO(chadnorvell): Handle len(clean_compdbs) == 0
972
973        # Determine if the processed database is functionally identical to the
974        # original, unless configured to always output the new databases.
975        # The criteria for "functionally identical" are:
976        #
977        # - The original file only contained commands for a single target
978        # - The number of compile commands in the processed database is equal to
979        #   that of the original database.
980        #
981        # This is a little bit crude. For example, it doesn't account for the
982        # (rare) edge case of multiple databases having commands for the same
983        # target. However, if you know that you have that kind of situation, you
984        # should use `always_output_new` and not rely on this.
985        if (
986            not always_output_new
987            and len(clean_compdbs) == 1
988            and len(clean_compdbs[0]) == len(self)
989        ):
990            return None
991
992        return clean_compdbs
993
994
995class CppCompilationDatabasesMap:
996    """Container for a map of target name to compilation database."""
997
998    def __init__(self, settings: PigweedIdeSettings):
999        self.settings = settings
1000        self._dbs: dict[str, CppCompilationDatabase] = dict()
1001
1002    def __len__(self) -> int:
1003        return len(self._dbs)
1004
1005    def _default(self, key: str | int):
1006        # This is like `defaultdict` except that we can use the provided key
1007        # (i.e. the target name) in the constructor.
1008        if isinstance(key, str) and key not in self._dbs:
1009            file_path = self.settings.working_dir / key / COMPDB_FILE_NAME
1010            self._dbs[key] = CppCompilationDatabase(file_path=file_path)
1011
1012    def __getitem__(self, key: str | int) -> CppCompilationDatabase:
1013        self._default(key)
1014
1015        # Support list-based indexing...
1016        if isinstance(key, int):
1017            return list(self._dbs.values())[key]
1018
1019        # ... and key-based indexing.
1020        return self._dbs[key]
1021
1022    def __setitem__(self, key: str, item: CppCompilationDatabase) -> None:
1023        self._default(key)
1024        self._dbs[key] = item
1025
1026    def __iter__(self) -> Iterator[str]:
1027        for target, _ in self.items():
1028            yield target
1029
1030    @property
1031    def targets(self) -> list[str]:
1032        return list(self._dbs.keys())
1033
1034    def items(
1035        self,
1036    ) -> Generator[tuple[str, CppCompilationDatabase], None, None]:
1037        return ((key, value) for (key, value) in self._dbs.items())
1038
1039    def _sort_by_commands(self) -> list[str]:
1040        """Sort targets by the number of compile commands they have."""
1041        enumerated_targets = sorted(
1042            [(len(db), target) for target, db in self._dbs.items()],
1043            key=lambda x: x[0],
1044            reverse=True,
1045        )
1046
1047        return [target for (_, target) in enumerated_targets]
1048
1049    def _sort_with_target_priority(self, target: str) -> list[str]:
1050        """Sorted targets, but with the provided target first."""
1051        sorted_targets = self._sort_by_commands()
1052        # This will raise a ValueError if the target is not in the list, but
1053        # we have ensured that that will never happen by the time we get here.
1054        sorted_targets.remove(target)
1055        return [target, *sorted_targets]
1056
1057    def _targets_to_write(self, target: str) -> list[str]:
1058        """Return the list of targets whose comp. commands should be written.
1059
1060        Under most conditions, this will return a list with just the provided
1061        target; essentially it's a no-op. But if ``cascade_targets`` is
1062        enabled, this returns a list of all targets with the provided target
1063        at the head of the list.
1064        """
1065        if not self.settings.cascade_targets:
1066            return [target]
1067
1068        return self._sort_with_target_priority(target)
1069
1070    def _compdb_to_write(self, target: str) -> CppCompilationDatabase:
1071        """The compilation database to write to file for this target.
1072
1073        Under most conditions, this will return the compilation database
1074        associated with the provided target. But if ``cascade_targets`` is
1075        enabled, this returns a compilation database with commands from all
1076        targets, ordered per ``_sort_with_target_priority``.
1077        """
1078        targets = self._targets_to_write(target)
1079        compdb = CppCompilationDatabase()
1080
1081        for iter_target in targets:
1082            compdb.add(*self[iter_target])
1083
1084        return compdb
1085
1086    def test_write(self) -> None:
1087        """Test writing to file.
1088
1089        This will raise an exception if the file is not JSON-serializable."""
1090        for _, compdb in self.items():
1091            compdb.to_json()
1092
1093    def write(self) -> None:
1094        """Write compilation databases to target-specific JSON files."""
1095        for target in self:
1096            path = self.settings.working_dir / target / COMPDB_FILE_NAME
1097            self._compdb_to_write(target).to_file(path)
1098
1099    @classmethod
1100    def merge(
1101        cls, *db_sets: CppCompilationDatabasesMap
1102    ) -> CppCompilationDatabasesMap:
1103        """Merge several sets of processed compilation databases.
1104
1105        If you process N compilation databases produced by a build system,
1106        you'll end up with N sets of processed compilation databases,
1107        containing databases for one or more targets each. This method
1108        merges them into one set of databases with one database per target.
1109
1110        The expectation is that the vast majority of the time, each of the
1111        raw compilation databases that are processed will contain distinct
1112        targets, meaning that the keys of each ``CppCompilationDatabases``
1113        object that's merged will be unique to each object, and this operation
1114        is nothing more than a shallow merge.
1115
1116        However, this also supports the case where targets may overlap between
1117        ``CppCompilationDatabases`` objects. In that case, we prioritize
1118        correctness, ensuring that the resulting compilation databases will
1119        work correctly with clangd. This means not including duplicate compile
1120        commands for the same file in the same target's database. The choice
1121        of which duplicate compile command ends up in the final database is
1122        unspecified and subject to change. Note also that this method expects
1123        the ``settings`` value to be the same between all of the provided
1124        ``CppCompilationDatabases`` objects.
1125        """
1126        if len(db_sets) == 0:
1127            raise ValueError(
1128                'At least one set of compilation databases is required.'
1129            )
1130
1131        # Shortcut for the most common case.
1132        if len(db_sets) == 1:
1133            return db_sets[0]
1134
1135        merged = cls(db_sets[0].settings)
1136
1137        for dbs in db_sets:
1138            for target, db in dbs.items():
1139                merged[target].merge(db)
1140
1141        return merged
1142
1143
1144@functools.lru_cache
1145def find_cipd_installed_exe_path(exe: str) -> Path:
1146    """Return the path of an executable installed by CIPD.
1147
1148    Search for the executable in the paths pointed by all the defined
1149    `PW_<PROJ_NAME>_CIPD_INSTALL_DIR` environment variables.
1150    """
1151
1152    if sys.platform.lower() in ("win32", "cygwin"):
1153        exe += ".exe"
1154
1155    env_vars = vars(env)
1156
1157    search_paths: list[str] = []
1158    for env_var_name, env_var in env_vars.items():
1159        if re.fullmatch(r"PW_[A-Z_]+_CIPD_INSTALL_DIR", env_var_name):
1160            search_paths.append(str(Path(env_var) / "bin" / exe))
1161
1162    if (env_var := env_vars.get('PW_PIGWEED_CIPD_INSTALL_DIR')) is not None:
1163        search_paths.append(str(Path(env_var) / "bin" / exe))
1164
1165    path = None
1166    exception = None
1167    try:
1168        path = path_to_executable(
1169            exe, default_path=None, path_globs=search_paths, strict=True
1170        )
1171    except UnresolvablePathException as e:
1172        exception = e
1173
1174    if path is None or exception:
1175        search_paths_str = ":".join(search_paths)
1176        raise FileNotFoundError(
1177            f"Not able to find '{exe}' "
1178            f"among '{search_paths_str}'. Is bootstrap successful?"
1179        )
1180
1181    return path
1182
1183
1184def get_clangd_path(settings: PigweedIdeSettings) -> Path:
1185    if settings.clangd_alternate_path is not None:
1186        return settings.clangd_alternate_path
1187
1188    return find_cipd_installed_exe_path('clangd')
1189
1190
1191class ClangdSettings:
1192    """Makes system-specific settings for running ``clangd`` with Pigweed."""
1193
1194    def __init__(self, settings: PigweedIdeSettings):
1195        state = CppIdeFeaturesState(settings)
1196
1197        self.clangd_path = get_clangd_path(settings)
1198
1199        compile_commands_dir = env.PW_PROJECT_ROOT
1200
1201        if state.current_target is not None:
1202            compile_commands_dir = str(state.stable_target_link)
1203
1204        host_cc_path = find_cipd_installed_exe_path("clang++")
1205
1206        self.arguments: list[str] = [
1207            f'--compile-commands-dir={compile_commands_dir}',
1208            '--background-index',
1209            '--clang-tidy',
1210        ]
1211
1212        query_driver = settings.clangd_query_driver_str(host_cc_path)
1213
1214        if query_driver is not None:
1215            self.arguments.append(f'--query-driver={query_driver}')
1216
1217    def command(self, system: str = platform.system()) -> str:
1218        """Return the command that runs clangd with Pigweed paths."""
1219
1220        def make_command(line_continuation: str):
1221            arguments = f' {line_continuation}\n'.join(
1222                f'  {arg}' for arg in self.arguments
1223            )
1224            return f'\n{self.clangd_path} {line_continuation}\n{arguments}'
1225
1226        if system.lower() == 'json':
1227            return '\n' + json.dumps(
1228                [str(self.clangd_path), *self.arguments], indent=2
1229            )
1230
1231        if system.lower() in ['cmd', 'batch']:
1232            return make_command('`')
1233
1234        if system.lower() in ['powershell', 'pwsh']:
1235            return make_command('^')
1236
1237        if system.lower() == 'windows':
1238            return (
1239                f'\nIn PowerShell:\n{make_command("`")}'
1240                f'\n\nIn Command Prompt:\n{make_command("^")}'
1241            )
1242
1243        # Default case for *sh-like shells.
1244        return make_command('\\')
1245