1# Copyright 2022 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Configure C/C++ IDE support for Pigweed projects. 15 16We support C/C++ code analysis via ``clangd``, or other language servers that 17are compatible with the ``clangd`` compilation database format. 18 19While clangd can work well out of the box for typical C++ codebases, some work 20is required to coax it to work for embedded projects. In particular, Pigweed 21projects use multiple toolchains within a distinct environment, and almost 22always define multiple targets. This means compilation units are likely have 23multiple compile commands and the toolchain executables are unlikely to be in 24your path. ``clangd`` is not equipped to deal with this out of the box. We 25handle this by: 26 27- Processing the compilation database produced by the build system into 28 multiple internally-consistent compilation databases, one for each target 29 (where a "target" is a particular build for a particular system using a 30 particular toolchain). 31 32- Creating unambiguous paths to toolchain drivers to ensure the right toolchain 33 is used and that clangd knows where to find that toolchain's system headers. 34 35- Providing tools for working with several compilation databases that are 36 spiritually similar to tools like ``pyenv``, ``rbenv``, etc. 37 38In short, we take the probably-broken compilation database that the build system 39generates, process it into several not-broken compilation databases in the 40``pw_ide`` working directory, and provide a stable symlink that points to the 41selected active target's compliation database. If ``clangd`` is configured to 42point at the symlink and is set up with the right paths, you'll get code 43intelligence. 44""" 45 46from __future__ import annotations 47 48from contextlib import contextmanager 49from dataclasses import asdict, dataclass, field 50import functools 51import glob 52from hashlib import sha1 53from io import TextIOBase 54import json 55import logging 56from pathlib import Path 57import platform 58import random 59import re 60import sys 61from typing import ( 62 Any, 63 cast, 64 Generator, 65 Iterator, 66 TypedDict, 67) 68 69from pw_cli.env import pigweed_environment 70 71from pw_ide.exceptions import ( 72 BadCompDbException, 73 InvalidTargetException, 74 MissingCompDbException, 75 UnresolvablePathException, 76) 77 78from pw_ide.settings import PigweedIdeSettings 79from pw_ide.symlinks import set_symlink 80 81_LOG = logging.getLogger(__package__) 82env = pigweed_environment() 83 84COMPDB_FILE_NAME = 'compile_commands.json' 85STABLE_CLANGD_DIR_NAME = '.stable' 86_CPP_IDE_FEATURES_DATA_FILE = 'pw_ide_state.json' 87_UNSUPPORTED_TOOLCHAIN_EXECUTABLES = ('_pw_invalid', 'python') 88_SUPPORTED_WRAPPER_EXECUTABLES = ('ccache',) 89 90 91@dataclass(frozen=True) 92class CppIdeFeaturesTarget: 93 """Data pertaining to a C++ code analysis target.""" 94 95 name: str 96 compdb_file_path: Path 97 num_commands: int 98 is_enabled: bool = True 99 100 def __str__(self) -> str: 101 return self.name 102 103 def serialized(self) -> dict[str, Any]: 104 return { 105 **asdict(self), 106 **{ 107 'compdb_file_path': str(self.compdb_file_path), 108 }, 109 } 110 111 @classmethod 112 def deserialize(cls, **data) -> CppIdeFeaturesTarget: 113 return cls( 114 **{ 115 **data, 116 **{ 117 'compdb_file_path': Path(data['compdb_file_path']), 118 }, 119 } 120 ) 121 122 123CppCompilationDatabaseFileHashes = dict[Path, str] 124CppCompilationDatabaseFileTargets = dict[Path, list[CppIdeFeaturesTarget]] 125 126 127@dataclass 128class CppIdeFeaturesData: 129 """State data about C++ code analysis features.""" 130 131 targets: dict[str, CppIdeFeaturesTarget] = field(default_factory=dict) 132 current_target: CppIdeFeaturesTarget | None = None 133 compdb_hashes: CppCompilationDatabaseFileHashes = field( 134 default_factory=dict 135 ) 136 compdb_targets: CppCompilationDatabaseFileTargets = field( 137 default_factory=dict 138 ) 139 140 def serialized(self) -> dict[str, Any]: 141 return { 142 'current_target': self.current_target.serialized() 143 if self.current_target is not None 144 else None, 145 'targets': { 146 name: target_data.serialized() 147 for name, target_data in self.targets.items() 148 }, 149 'compdb_hashes': { 150 str(path): hash_str 151 for path, hash_str in self.compdb_hashes.items() 152 }, 153 'compdb_targets': { 154 str(path): [ 155 target_data.serialized() for target_data in target_data_list 156 ] 157 for path, target_data_list in self.compdb_targets.items() 158 }, 159 } 160 161 @classmethod 162 def deserialize(cls, **data) -> CppIdeFeaturesData: 163 return cls( 164 current_target=CppIdeFeaturesTarget.deserialize( 165 **data['current_target'] 166 ) 167 if data['current_target'] is not None 168 else None, 169 targets={ 170 name: CppIdeFeaturesTarget.deserialize(**target_data) 171 for name, target_data in data['targets'].items() 172 }, 173 compdb_hashes={ 174 Path(path_str): hash_str 175 for path_str, hash_str in data['compdb_hashes'].items() 176 }, 177 compdb_targets={ 178 Path(path_str): [ 179 CppIdeFeaturesTarget.deserialize(**target_data) 180 for target_data in target_data_list 181 ] 182 for path_str, target_data_list in data['compdb_targets'].items() 183 }, 184 ) 185 186 187class CppIdeFeaturesState: 188 """Container for IDE features state data.""" 189 190 def __init__(self, pw_ide_settings: PigweedIdeSettings) -> None: 191 self.settings = pw_ide_settings 192 193 def __len__(self) -> int: 194 return len(self.targets) 195 196 def __getitem__(self, index: str) -> CppIdeFeaturesTarget: 197 return self.targets[index] 198 199 def __iter__(self) -> Generator[CppIdeFeaturesTarget, None, None]: 200 return (target for target in self.targets.values()) 201 202 @property 203 def stable_target_link(self) -> Path: 204 return self.settings.working_dir / STABLE_CLANGD_DIR_NAME 205 206 @contextmanager 207 def _file(self) -> Generator[CppIdeFeaturesData, None, None]: 208 """A simple key-value store for state data.""" 209 file_path = self.settings.working_dir / _CPP_IDE_FEATURES_DATA_FILE 210 211 try: 212 with open(file_path) as file: 213 data = CppIdeFeaturesData.deserialize(**json.load(file)) 214 except (FileNotFoundError, json.decoder.JSONDecodeError): 215 data = CppIdeFeaturesData() 216 217 yield data 218 219 with open(file_path, 'w') as file: 220 json.dump(data.serialized(), file, indent=2) 221 222 @property 223 def targets(self) -> dict[str, CppIdeFeaturesTarget]: 224 with self._file() as state: 225 exclude_predicate = ( 226 lambda x: x not in self.settings.targets_exclude 227 if len(self.settings.targets_exclude) > 0 228 else lambda x: True 229 ) 230 include_predicate = ( 231 lambda x: x in self.settings.targets_include 232 if len(self.settings.targets_include) > 0 233 else lambda x: True 234 ) 235 236 return { 237 name: target 238 for (name, target) in state.targets.items() 239 if exclude_predicate(name) and include_predicate(name) 240 } 241 242 @targets.setter 243 def targets(self, new_targets: dict[str, CppIdeFeaturesTarget]) -> None: 244 with self._file() as state: 245 state.targets = new_targets 246 247 @property 248 def current_target(self) -> CppIdeFeaturesTarget | None: 249 with self._file() as state: 250 return state.current_target 251 252 @current_target.setter 253 def current_target( 254 self, new_current_target: str | CppIdeFeaturesTarget | None 255 ) -> None: 256 with self._file() as state: 257 if new_current_target is None: 258 state.current_target = None 259 else: 260 if isinstance(new_current_target, CppIdeFeaturesTarget): 261 name = new_current_target.name 262 new_current_target_inst = new_current_target 263 else: 264 name = new_current_target 265 266 try: 267 new_current_target_inst = state.targets[name] 268 except KeyError: 269 raise InvalidTargetException 270 271 if not new_current_target_inst.compdb_file_path.exists(): 272 raise MissingCompDbException 273 274 set_symlink( 275 new_current_target_inst.compdb_file_path.parent, 276 self.stable_target_link, 277 ) 278 279 state.current_target = state.targets[name] 280 281 @property 282 def max_commands_target(self) -> CppIdeFeaturesTarget | None: 283 with self._file() as state: 284 if len(state.targets) == 0: 285 return None 286 287 max_commands_target_name = sorted( 288 [ 289 (name, target.num_commands) 290 for name, target in state.targets.items() 291 ], 292 key=lambda x: x[1], 293 reverse=True, 294 )[0][0] 295 296 return state.targets[max_commands_target_name] 297 298 @property 299 def compdb_hashes(self) -> CppCompilationDatabaseFileHashes: 300 with self._file() as state: 301 return state.compdb_hashes 302 303 @compdb_hashes.setter 304 def compdb_hashes( 305 self, new_compdb_hashes: CppCompilationDatabaseFileHashes 306 ) -> None: 307 with self._file() as state: 308 state.compdb_hashes = new_compdb_hashes 309 310 @property 311 def compdb_targets(self) -> CppCompilationDatabaseFileTargets: 312 with self._file() as state: 313 return state.compdb_targets 314 315 @compdb_targets.setter 316 def compdb_targets( 317 self, new_compdb_targets: CppCompilationDatabaseFileTargets 318 ) -> None: 319 with self._file() as state: 320 state.compdb_targets = new_compdb_targets 321 322 323def path_to_executable( 324 exe: str, 325 *, 326 default_path: Path | None = None, 327 path_globs: list[str] | None = None, 328 strict: bool = False, 329) -> Path | None: 330 """Return the path to a compiler executable. 331 332 In a ``clang`` compile command, the executable may or may not include a 333 path. For example: 334 335 .. code-block:: none 336 337 /usr/bin/clang <- includes a path 338 ../path/to/my/clang <- includes a path 339 clang <- doesn't include a path 340 341 If it includes a path, then ``clangd`` will have no problem finding the 342 driver, so we can simply return the path. If the executable *doesn't* 343 include a path, then ``clangd`` will search ``$PATH``, and may not find the 344 intended driver unless you actually want the default system toolchain or 345 Pigweed paths have been added to ``$PATH``. So this function provides two 346 options for resolving those ambiguous paths: 347 348 - Provide a default path, and all executables without a path will be 349 re-written with a path within the default path. 350 - Provide the a set of globs that will be used to search for the executable, 351 which will normally be the query driver globs used with clangd. 352 353 By default, if neither of these options is chosen, or if the executable 354 cannot be found within the provided globs, the pathless executable that was 355 provided will be returned, and clangd will resort to searching $PATH. If you 356 instead pass ``strict=True``, this will raise an exception if an unambiguous 357 path cannot be constructed. 358 359 This function only tries to ensure that all executables have a path to 360 eliminate ambiguity. A couple of important things to keep in mind: 361 362 - This doesn't guarantee that the path exists or an executable actually 363 exists at the path. It only ensures that some path is provided to an 364 executable. 365 - An executable being present at the indicated path doesn't guarantee that 366 it will work flawlessly for clangd code analysis. The clangd 367 ``--query-driver`` argument needs to include a path to this executable in 368 order for its bundled headers to be resolved correctly. 369 370 This function also filters out invalid or unsupported drivers. For example, 371 build systems will sometimes naively include build steps for Python or other 372 languages in the compilation database, which are not usable with clangd. 373 As a result, this function has four possible end states: 374 375 - It returns a path with an executable that can be used as a ``clangd`` 376 driver. 377 - It returns ``None``, meaning the compile command was invalid. 378 - It returns the same string that was provided (as a ``Path``), if a path 379 couldn't be resolved and ``strict=False``. 380 - It raises an ``UnresolvablePathException`` if the executable cannot be 381 placed in an unambiguous path and ``strict=True``. 382 """ 383 maybe_path = Path(exe) 384 385 # We were give an empty string, not a path. Not a valid command. 386 if len(maybe_path.parts) == 0: 387 _LOG.debug("Invalid executable path. The path was an empty string.") 388 return None 389 390 # Determine if the executable name matches unsupported drivers. 391 is_supported_driver = True 392 393 for unsupported_executable in _UNSUPPORTED_TOOLCHAIN_EXECUTABLES: 394 if unsupported_executable in maybe_path.name: 395 is_supported_driver = False 396 397 if not is_supported_driver: 398 _LOG.debug( 399 "Invalid executable path. This is not a supported driver: %s", exe 400 ) 401 return None 402 403 # Now, ensure the executable has a path. 404 405 # This is either a relative or absolute path -- return it. 406 if len(maybe_path.parts) > 1: 407 return maybe_path 408 409 # If we got here, there's only one "part", so we assume it's an executable 410 # without a path. This logic doesn't work with a path like `./exe` since 411 # that also yields only one part. So currently this breaks if you actually 412 # have your compiler executable in your root build directory, which is 413 # (hopefully) very rare. 414 415 # If we got a default path, use it. 416 if default_path is not None: 417 return default_path / maybe_path 418 419 # Otherwise, try to find the executable within the query driver globs. 420 # Note that unlike the previous paths, this path will only succeed if an 421 # executable actually exists somewhere in the query driver globs. 422 if path_globs is not None: 423 for path_glob in path_globs: 424 for path_str in glob.iglob(path_glob): 425 path = Path(path_str) 426 if path.name == maybe_path.name: 427 return path.absolute() 428 429 if strict: 430 raise UnresolvablePathException( 431 f'Cannot place {exe} in an unambiguous path!' 432 ) 433 434 return maybe_path 435 436 437def command_parts(command: str) -> tuple[str | None, str, list[str]]: 438 """Return the executable string and the rest of the command tokens. 439 440 If the command contains a prefixed wrapper like `ccache`, it will be 441 extracted separately. So the return value contains: 442 (wrapper, compiler executable, all other tokens) 443 """ 444 parts = command.split() 445 curr = '' 446 wrapper = None 447 448 try: 449 curr = parts.pop(0) 450 except IndexError: 451 return (None, curr, []) 452 453 if curr in _SUPPORTED_WRAPPER_EXECUTABLES: 454 wrapper = curr 455 456 while curr := parts.pop(0): 457 # This is very `ccache`-centric. It will work for other wrappers 458 # that use KEY=VALUE-style options or no options at all, but will 459 # not work for other cases. 460 if re.fullmatch(r'(.*)=(.*)', curr): 461 wrapper = f'{wrapper} {curr}' 462 else: 463 break 464 465 return (wrapper, curr, parts) 466 467 468# This is a clumsy way to express optional keys, which is not directly 469# supported in TypedDicts right now. 470# TODO(chadnorvell): Use `NotRequired` when we support Python 3.11. 471class BaseCppCompileCommandDict(TypedDict): 472 file: str 473 directory: str 474 output: str | None 475 476 477class CppCompileCommandDictWithCommand(BaseCppCompileCommandDict): 478 command: str 479 480 481class CppCompileCommandDictWithArguments(BaseCppCompileCommandDict): 482 arguments: list[str] 483 484 485CppCompileCommandDict = ( 486 CppCompileCommandDictWithCommand | CppCompileCommandDictWithArguments 487) 488 489 490class CppCompileCommand: 491 """A representation of a clang compilation database compile command. 492 493 See: https://clang.llvm.org/docs/JSONCompilationDatabase.html 494 """ 495 496 def __init__( 497 self, 498 file: str, 499 directory: str, 500 command: str | None = None, 501 arguments: list[str] | None = None, 502 output: str | None = None, 503 ) -> None: 504 # Per the spec, either one of these two must be present. clangd seems 505 # to prefer "arguments" when both are present. 506 if command is None and arguments is None: 507 raise TypeError( 508 'A compile command requires either \'command\' ' 509 'or \'arguments\'.' 510 ) 511 512 if command is None: 513 raise TypeError( 514 'Compile commands without \'command\' ' 'are not supported yet.' 515 ) 516 517 self._command = command 518 self._arguments = arguments 519 self._file = file 520 self._directory = directory 521 522 _, executable, tokens = command_parts(command) 523 self._executable_path = Path(executable) 524 self._inferred_output: str | None = None 525 526 try: 527 # Find the output argument and grab its value. 528 output_flag_idx = tokens.index('-o') 529 self._inferred_output = tokens[output_flag_idx + 1] 530 except ValueError: 531 # No -o found, probably not a C/C++ compile command. 532 self._inferred_output = None 533 except IndexError: 534 # It has an -o but no argument after it. 535 raise TypeError( 536 'Failed to load compile command with no output argument!' 537 ) 538 539 self._provided_output = output 540 self.target: str | None = None 541 542 @property 543 def file(self) -> str: 544 return self._file 545 546 @property 547 def directory(self) -> str: 548 return self._directory 549 550 @property 551 def command(self) -> str | None: 552 return self._command 553 554 @property 555 def arguments(self) -> list[str] | None: 556 return self._arguments 557 558 @property 559 def output(self) -> str | None: 560 # We're ignoring provided output values for now. 561 return self._inferred_output 562 563 @property 564 def output_path(self) -> Path | None: 565 if self.output is None: 566 return None 567 568 return Path(self.directory) / Path(self.output) 569 570 @property 571 def executable_path(self) -> Path: 572 return self._executable_path 573 574 @property 575 def executable_name(self) -> str: 576 return self.executable_path.name 577 578 @classmethod 579 def from_dict( 580 cls, compile_command_dict: dict[str, Any] 581 ) -> CppCompileCommand: 582 return cls( 583 # We want to let possible Nones through to raise at runtime. 584 file=cast(str, compile_command_dict.get('file')), 585 directory=cast(str, compile_command_dict.get('directory')), 586 command=compile_command_dict.get('command'), 587 arguments=compile_command_dict.get('arguments'), 588 output=compile_command_dict.get('output'), 589 ) 590 591 @classmethod 592 def try_from_dict( 593 cls, compile_command_dict: dict[str, Any] 594 ) -> CppCompileCommand | None: 595 try: 596 return cls.from_dict(compile_command_dict) 597 except TypeError: 598 return None 599 600 def process( 601 self, 602 *, 603 default_path: Path | None = None, 604 path_globs: list[str] | None = None, 605 strict: bool = False, 606 ) -> CppCompileCommand | None: 607 """Process a compile command. 608 609 At minimum, a compile command from a clang compilation database needs to 610 be correlated with its target, and this method returns the target name 611 with the compile command. But it also cleans up other things we need for 612 reliable code intelligence: 613 614 - Some targets may not be valid C/C++ compile commands. For example, 615 some build systems will naively include build steps for Python or for 616 linting commands. We want to filter those out. 617 618 - Some compile commands don't provide a path to the compiler executable 619 (referred to by clang as the "driver"). In that case, clangd is very 620 unlikely to find the executable unless it happens to be in ``$PATH``. 621 The ``--query-driver`` argument to ``clangd`` allowlists 622 executables/drivers for use its use, but clangd doesn't use it to 623 resolve ambiguous paths. We bridge that gap here. Any executable 624 without a path will be either placed in the provided default path or 625 searched for in the query driver globs and be replaced with a path to 626 the executable. 627 """ 628 if self.command is None: 629 raise NotImplementedError( 630 'Compile commands without \'command\' ' 'are not supported yet.' 631 ) 632 633 wrapper, executable_str, tokens = command_parts(self.command) 634 executable_path = path_to_executable( 635 executable_str, 636 default_path=default_path, 637 path_globs=path_globs, 638 strict=strict, 639 ) 640 641 if executable_path is None: 642 _LOG.debug( 643 "Compile command rejected due to bad executable path: %s", 644 self.command, 645 ) 646 return None 647 648 if self.output is None: 649 _LOG.debug( 650 "Compile command rejected due to no output property: %s", 651 self.command, 652 ) 653 return None 654 655 # TODO(chadnorvell): Some commands include the executable multiple 656 # times. It's not clear if that affects clangd. 657 new_command = f'{str(executable_path)} {" ".join(tokens)}' 658 659 if wrapper is not None: 660 new_command = f'{wrapper} {new_command}' 661 662 return self.__class__( 663 file=self.file, 664 directory=self.directory, 665 command=new_command, 666 arguments=None, 667 output=self.output, 668 ) 669 670 def as_dict(self) -> CppCompileCommandDict: 671 base_compile_command_dict: BaseCppCompileCommandDict = { 672 'file': self.file, 673 'directory': self.directory, 674 'output': self.output, 675 } 676 677 # TODO(chadnorvell): Support "arguments". The spec requires that a 678 # We don't support "arguments" at all right now. When we do, we should 679 # preferentially include "arguments" only, and only include "command" 680 # when "arguments" is not present. 681 if self.command is not None: 682 compile_command_dict: CppCompileCommandDictWithCommand = { 683 'command': self.command, 684 # Unfortunately dict spreading doesn't work with mypy. 685 'file': base_compile_command_dict['file'], 686 'directory': base_compile_command_dict['directory'], 687 'output': base_compile_command_dict['output'], 688 } 689 else: 690 raise NotImplementedError( 691 'Compile commands without \'command\' ' 'are not supported yet.' 692 ) 693 694 return compile_command_dict 695 696 697def _path_nearest_parent(path1: Path, path2: Path) -> Path: 698 """Get the closest common parent of two paths.""" 699 # This is the Python < 3.9 version of: if path2.is_relative_to(path1) 700 try: 701 path2.relative_to(path1) 702 return path1 703 except ValueError: 704 pass 705 706 if path1 == path2: 707 return path1 708 709 if len(path1.parts) > len(path2.parts): 710 return _path_nearest_parent(path1.parent, path2) 711 712 if len(path1.parts) < len(path2.parts): 713 return _path_nearest_parent(path1, path2.parent) 714 715 return _path_nearest_parent(path1.parent, path2.parent) 716 717 718def _infer_target_pos(target_glob: str) -> list[int]: 719 """Infer the position of the target in a compilation unit artifact path.""" 720 tokens = Path(target_glob).parts 721 positions = [] 722 723 for pos, token in enumerate(tokens): 724 if token == '?': 725 positions.append(pos) 726 elif token == '*': 727 pass 728 else: 729 raise ValueError(f'Invalid target inference token: {token}') 730 731 return positions 732 733 734def infer_target(target_glob: str, root: Path, output_path: Path) -> str | None: 735 """Infer a target from a compilation unit artifact path. 736 737 See the documentation for ``PigweedIdeSettings.target_inference``.""" 738 target_pos = _infer_target_pos(target_glob) 739 740 if len(target_pos) == 0: 741 return None 742 743 # Depending on the build system and project configuration, the target name 744 # may be in the "directory" or the "output" of the compile command. So we 745 # need to construct the full path that combines both and use that to search 746 # for the target. 747 try: 748 # The path used for target inference is the path relative to the root 749 # dir. If this artifact is a direct child of the root, this just 750 # truncates the root off of its path. 751 subpath = output_path.relative_to(root) 752 except ValueError: 753 # If the output path isn't a child path of the root dir, find the 754 # closest shared parent dir and use that as the root for truncation. 755 common_parent = _path_nearest_parent(root, output_path) 756 subpath = output_path.relative_to(common_parent) 757 758 return '_'.join([subpath.parts[pos] for pos in target_pos]) 759 760 761LoadableToCppCompilationDatabase = ( 762 list[dict[str, Any]] | str | TextIOBase | Path 763) 764 765 766class CppCompilationDatabase: 767 """A representation of a clang compilation database. 768 769 See: https://clang.llvm.org/docs/JSONCompilationDatabase.html 770 """ 771 772 def __init__( 773 self, 774 root_dir: Path | None = None, 775 file_path: Path | None = None, 776 source_file_path: Path | None = None, 777 target_inference: str | None = None, 778 ) -> None: 779 self._db: list[CppCompileCommand] = [] 780 self.file_path: Path | None = file_path 781 self.source_file_path: Path | None = source_file_path 782 self.source_file_hash: str | None = None 783 784 if target_inference is None: 785 self.target_inference = PigweedIdeSettings().target_inference 786 else: 787 self.target_inference = target_inference 788 789 # Only compilation databases that are loaded will have this, and it 790 # contains the root directory of the build that the compilation 791 # database is based on. Processed compilation databases will not have 792 # a value here. 793 self._root_dir = root_dir 794 795 def __len__(self) -> int: 796 return len(self._db) 797 798 def __getitem__(self, index: int) -> CppCompileCommand: 799 return self._db[index] 800 801 def __iter__(self) -> Generator[CppCompileCommand, None, None]: 802 return (compile_command for compile_command in self._db) 803 804 @property 805 def file_hash(self) -> str: 806 # If this compilation database did not originate from a file, return a 807 # hash that is almost certainly not going to match any other hash; these 808 # sources are not persistent, so they cannot be compared. 809 if self.file_path is None: 810 return '%032x' % random.getrandbits(160) 811 812 data = self.file_path.read_text().encode('utf-8') 813 return sha1(data).hexdigest() 814 815 def add(self, *commands: CppCompileCommand): 816 """Add compile commands to the compilation database.""" 817 self._db.extend(commands) 818 819 def merge(self, other: CppCompilationDatabase) -> None: 820 """Merge values from another database into this one. 821 822 This will not overwrite a compile command that already exists for a 823 particular file. 824 """ 825 self_dict = {c.file: c for c in self._db} 826 827 for compile_command in other: 828 if compile_command.file not in self_dict: 829 self_dict[compile_command.file] = compile_command 830 831 self._db = list(self_dict.values()) 832 833 def as_dicts(self) -> list[CppCompileCommandDict]: 834 return [compile_command.as_dict() for compile_command in self._db] 835 836 def to_json(self) -> str: 837 """Output the compilation database to a JSON string.""" 838 839 return json.dumps(self.as_dicts(), indent=2, sort_keys=True) 840 841 def to_file(self, path: Path): 842 """Write the compilation database to a JSON file.""" 843 path.parent.mkdir(parents=True, exist_ok=True) 844 845 with open(path, 'w') as file: 846 json.dump(self.as_dicts(), file, indent=2, sort_keys=True) 847 848 @classmethod 849 def load( 850 cls, 851 compdb_to_load: LoadableToCppCompilationDatabase, 852 root_dir: Path, 853 target_inference: str | None = None, 854 ) -> CppCompilationDatabase: 855 """Load a compilation database. 856 857 You can provide a JSON file handle or path, a JSON string, or a native 858 Python data structure that matches the format (list of dicts). 859 """ 860 db_as_dicts: list[dict[str, Any]] 861 file_path = None 862 863 if isinstance(compdb_to_load, list): 864 # The provided data is already in the format we want it to be in, 865 # probably, and if it isn't we'll find out when we try to 866 # instantiate the database. 867 db_as_dicts = compdb_to_load 868 else: 869 if isinstance(compdb_to_load, Path): 870 # The provided data is a path to a file, presumably JSON. 871 try: 872 file_path = compdb_to_load 873 compdb_data = compdb_to_load.read_text() 874 except FileNotFoundError: 875 raise MissingCompDbException() 876 elif isinstance(compdb_to_load, TextIOBase): 877 # The provided data is a file handle, presumably JSON. 878 file_path = Path(compdb_to_load.name) # type: ignore 879 compdb_data = compdb_to_load.read() 880 elif isinstance(compdb_to_load, str): 881 # The provided data is a a string, presumably JSON. 882 compdb_data = compdb_to_load 883 884 db_as_dicts = json.loads(compdb_data) 885 886 compdb = cls( 887 root_dir=root_dir, 888 file_path=file_path, 889 target_inference=target_inference, 890 ) 891 892 try: 893 compdb.add( 894 *[ 895 compile_command 896 for compile_command_dict in db_as_dicts 897 if ( 898 compile_command := CppCompileCommand.try_from_dict( 899 compile_command_dict 900 ) 901 ) 902 is not None 903 ] 904 ) 905 except TypeError: 906 # This will arise if db_as_dicts is not actually a list of dicts 907 raise BadCompDbException() 908 909 return compdb 910 911 def process( 912 self, 913 settings: PigweedIdeSettings, 914 *, 915 default_path: Path | None = None, 916 path_globs: list[str] | None = None, 917 strict: bool = False, 918 always_output_new: bool = False, 919 ) -> CppCompilationDatabasesMap | None: 920 """Process a ``clangd`` compilation database file. 921 922 Given a clang compilation database that may have commands for multiple 923 valid or invalid targets/toolchains, keep only the valid compile 924 commands and store them in target-specific compilation databases. 925 926 If this finds that the processed file is functionally identical to the 927 input file (meaning that the input file did not require processing to 928 be used successfully with ``clangd``), then it will return ``None``, 929 indicating that the original file should be used. This behavior can be 930 overridden by setting ``always_output_new``, which will ensure that a 931 new compilation database is always written to the working directory and 932 original compilation databases outside the working directory are never 933 made available for code intelligence. 934 """ 935 if self._root_dir is None: 936 raise ValueError( 937 'Can only process a compilation database that ' 938 'contains a root build directory, usually ' 939 'specified when loading the file. Are you ' 940 'trying to process an already-processed ' 941 'compilation database?' 942 ) 943 944 clean_compdbs = CppCompilationDatabasesMap(settings) 945 946 # Do processing, segregate processed commands into separate databases 947 # for each target. 948 for compile_command in self: 949 processed_command = compile_command.process( 950 default_path=default_path, path_globs=path_globs, strict=strict 951 ) 952 953 if ( 954 processed_command is not None 955 and processed_command.output_path is not None 956 ): 957 target = infer_target( 958 self.target_inference, 959 self._root_dir, 960 processed_command.output_path, 961 ) 962 963 target = cast(str, target) 964 processed_command.target = target 965 clean_compdbs[target].add(processed_command) 966 967 if clean_compdbs[target].source_file_path is None: 968 clean_compdbs[target].source_file_path = self.file_path 969 clean_compdbs[target].source_file_hash = self.file_hash 970 971 # TODO(chadnorvell): Handle len(clean_compdbs) == 0 972 973 # Determine if the processed database is functionally identical to the 974 # original, unless configured to always output the new databases. 975 # The criteria for "functionally identical" are: 976 # 977 # - The original file only contained commands for a single target 978 # - The number of compile commands in the processed database is equal to 979 # that of the original database. 980 # 981 # This is a little bit crude. For example, it doesn't account for the 982 # (rare) edge case of multiple databases having commands for the same 983 # target. However, if you know that you have that kind of situation, you 984 # should use `always_output_new` and not rely on this. 985 if ( 986 not always_output_new 987 and len(clean_compdbs) == 1 988 and len(clean_compdbs[0]) == len(self) 989 ): 990 return None 991 992 return clean_compdbs 993 994 995class CppCompilationDatabasesMap: 996 """Container for a map of target name to compilation database.""" 997 998 def __init__(self, settings: PigweedIdeSettings): 999 self.settings = settings 1000 self._dbs: dict[str, CppCompilationDatabase] = dict() 1001 1002 def __len__(self) -> int: 1003 return len(self._dbs) 1004 1005 def _default(self, key: str | int): 1006 # This is like `defaultdict` except that we can use the provided key 1007 # (i.e. the target name) in the constructor. 1008 if isinstance(key, str) and key not in self._dbs: 1009 file_path = self.settings.working_dir / key / COMPDB_FILE_NAME 1010 self._dbs[key] = CppCompilationDatabase(file_path=file_path) 1011 1012 def __getitem__(self, key: str | int) -> CppCompilationDatabase: 1013 self._default(key) 1014 1015 # Support list-based indexing... 1016 if isinstance(key, int): 1017 return list(self._dbs.values())[key] 1018 1019 # ... and key-based indexing. 1020 return self._dbs[key] 1021 1022 def __setitem__(self, key: str, item: CppCompilationDatabase) -> None: 1023 self._default(key) 1024 self._dbs[key] = item 1025 1026 def __iter__(self) -> Iterator[str]: 1027 for target, _ in self.items(): 1028 yield target 1029 1030 @property 1031 def targets(self) -> list[str]: 1032 return list(self._dbs.keys()) 1033 1034 def items( 1035 self, 1036 ) -> Generator[tuple[str, CppCompilationDatabase], None, None]: 1037 return ((key, value) for (key, value) in self._dbs.items()) 1038 1039 def _sort_by_commands(self) -> list[str]: 1040 """Sort targets by the number of compile commands they have.""" 1041 enumerated_targets = sorted( 1042 [(len(db), target) for target, db in self._dbs.items()], 1043 key=lambda x: x[0], 1044 reverse=True, 1045 ) 1046 1047 return [target for (_, target) in enumerated_targets] 1048 1049 def _sort_with_target_priority(self, target: str) -> list[str]: 1050 """Sorted targets, but with the provided target first.""" 1051 sorted_targets = self._sort_by_commands() 1052 # This will raise a ValueError if the target is not in the list, but 1053 # we have ensured that that will never happen by the time we get here. 1054 sorted_targets.remove(target) 1055 return [target, *sorted_targets] 1056 1057 def _targets_to_write(self, target: str) -> list[str]: 1058 """Return the list of targets whose comp. commands should be written. 1059 1060 Under most conditions, this will return a list with just the provided 1061 target; essentially it's a no-op. But if ``cascade_targets`` is 1062 enabled, this returns a list of all targets with the provided target 1063 at the head of the list. 1064 """ 1065 if not self.settings.cascade_targets: 1066 return [target] 1067 1068 return self._sort_with_target_priority(target) 1069 1070 def _compdb_to_write(self, target: str) -> CppCompilationDatabase: 1071 """The compilation database to write to file for this target. 1072 1073 Under most conditions, this will return the compilation database 1074 associated with the provided target. But if ``cascade_targets`` is 1075 enabled, this returns a compilation database with commands from all 1076 targets, ordered per ``_sort_with_target_priority``. 1077 """ 1078 targets = self._targets_to_write(target) 1079 compdb = CppCompilationDatabase() 1080 1081 for iter_target in targets: 1082 compdb.add(*self[iter_target]) 1083 1084 return compdb 1085 1086 def test_write(self) -> None: 1087 """Test writing to file. 1088 1089 This will raise an exception if the file is not JSON-serializable.""" 1090 for _, compdb in self.items(): 1091 compdb.to_json() 1092 1093 def write(self) -> None: 1094 """Write compilation databases to target-specific JSON files.""" 1095 for target in self: 1096 path = self.settings.working_dir / target / COMPDB_FILE_NAME 1097 self._compdb_to_write(target).to_file(path) 1098 1099 @classmethod 1100 def merge( 1101 cls, *db_sets: CppCompilationDatabasesMap 1102 ) -> CppCompilationDatabasesMap: 1103 """Merge several sets of processed compilation databases. 1104 1105 If you process N compilation databases produced by a build system, 1106 you'll end up with N sets of processed compilation databases, 1107 containing databases for one or more targets each. This method 1108 merges them into one set of databases with one database per target. 1109 1110 The expectation is that the vast majority of the time, each of the 1111 raw compilation databases that are processed will contain distinct 1112 targets, meaning that the keys of each ``CppCompilationDatabases`` 1113 object that's merged will be unique to each object, and this operation 1114 is nothing more than a shallow merge. 1115 1116 However, this also supports the case where targets may overlap between 1117 ``CppCompilationDatabases`` objects. In that case, we prioritize 1118 correctness, ensuring that the resulting compilation databases will 1119 work correctly with clangd. This means not including duplicate compile 1120 commands for the same file in the same target's database. The choice 1121 of which duplicate compile command ends up in the final database is 1122 unspecified and subject to change. Note also that this method expects 1123 the ``settings`` value to be the same between all of the provided 1124 ``CppCompilationDatabases`` objects. 1125 """ 1126 if len(db_sets) == 0: 1127 raise ValueError( 1128 'At least one set of compilation databases is required.' 1129 ) 1130 1131 # Shortcut for the most common case. 1132 if len(db_sets) == 1: 1133 return db_sets[0] 1134 1135 merged = cls(db_sets[0].settings) 1136 1137 for dbs in db_sets: 1138 for target, db in dbs.items(): 1139 merged[target].merge(db) 1140 1141 return merged 1142 1143 1144@functools.lru_cache 1145def find_cipd_installed_exe_path(exe: str) -> Path: 1146 """Return the path of an executable installed by CIPD. 1147 1148 Search for the executable in the paths pointed by all the defined 1149 `PW_<PROJ_NAME>_CIPD_INSTALL_DIR` environment variables. 1150 """ 1151 1152 if sys.platform.lower() in ("win32", "cygwin"): 1153 exe += ".exe" 1154 1155 env_vars = vars(env) 1156 1157 search_paths: list[str] = [] 1158 for env_var_name, env_var in env_vars.items(): 1159 if re.fullmatch(r"PW_[A-Z_]+_CIPD_INSTALL_DIR", env_var_name): 1160 search_paths.append(str(Path(env_var) / "bin" / exe)) 1161 1162 if (env_var := env_vars.get('PW_PIGWEED_CIPD_INSTALL_DIR')) is not None: 1163 search_paths.append(str(Path(env_var) / "bin" / exe)) 1164 1165 path = None 1166 exception = None 1167 try: 1168 path = path_to_executable( 1169 exe, default_path=None, path_globs=search_paths, strict=True 1170 ) 1171 except UnresolvablePathException as e: 1172 exception = e 1173 1174 if path is None or exception: 1175 search_paths_str = ":".join(search_paths) 1176 raise FileNotFoundError( 1177 f"Not able to find '{exe}' " 1178 f"among '{search_paths_str}'. Is bootstrap successful?" 1179 ) 1180 1181 return path 1182 1183 1184def get_clangd_path(settings: PigweedIdeSettings) -> Path: 1185 if settings.clangd_alternate_path is not None: 1186 return settings.clangd_alternate_path 1187 1188 return find_cipd_installed_exe_path('clangd') 1189 1190 1191class ClangdSettings: 1192 """Makes system-specific settings for running ``clangd`` with Pigweed.""" 1193 1194 def __init__(self, settings: PigweedIdeSettings): 1195 state = CppIdeFeaturesState(settings) 1196 1197 self.clangd_path = get_clangd_path(settings) 1198 1199 compile_commands_dir = env.PW_PROJECT_ROOT 1200 1201 if state.current_target is not None: 1202 compile_commands_dir = str(state.stable_target_link) 1203 1204 host_cc_path = find_cipd_installed_exe_path("clang++") 1205 1206 self.arguments: list[str] = [ 1207 f'--compile-commands-dir={compile_commands_dir}', 1208 '--background-index', 1209 '--clang-tidy', 1210 ] 1211 1212 query_driver = settings.clangd_query_driver_str(host_cc_path) 1213 1214 if query_driver is not None: 1215 self.arguments.append(f'--query-driver={query_driver}') 1216 1217 def command(self, system: str = platform.system()) -> str: 1218 """Return the command that runs clangd with Pigweed paths.""" 1219 1220 def make_command(line_continuation: str): 1221 arguments = f' {line_continuation}\n'.join( 1222 f' {arg}' for arg in self.arguments 1223 ) 1224 return f'\n{self.clangd_path} {line_continuation}\n{arguments}' 1225 1226 if system.lower() == 'json': 1227 return '\n' + json.dumps( 1228 [str(self.clangd_path), *self.arguments], indent=2 1229 ) 1230 1231 if system.lower() in ['cmd', 'batch']: 1232 return make_command('`') 1233 1234 if system.lower() in ['powershell', 'pwsh']: 1235 return make_command('^') 1236 1237 if system.lower() == 'windows': 1238 return ( 1239 f'\nIn PowerShell:\n{make_command("`")}' 1240 f'\n\nIn Command Prompt:\n{make_command("^")}' 1241 ) 1242 1243 # Default case for *sh-like shells. 1244 return make_command('\\') 1245