xref: /aosp_15_r20/external/stg/abigail_reader.cc (revision 9e3b08ae94a55201065475453d799e8b1378bea6)
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2021-2024 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Giuliano Procida
19 // Author: Ignes Simeonova
20 
21 #include "abigail_reader.h"
22 
23 #include <fcntl.h>
24 
25 #include <algorithm>
26 #include <array>
27 #include <cstddef>
28 #include <cstdint>
29 #include <functional>
30 #include <ios>
31 #include <map>
32 #include <memory>
33 #include <optional>
34 #include <set>
35 #include <sstream>
36 #include <string>
37 #include <string_view>
38 #include <type_traits>
39 #include <unordered_map>
40 #include <utility>
41 #include <vector>
42 
43 #include <libxml/globals.h>  // xmlFree moves to xmlmemory.h later
44 #include <libxml/parser.h>
45 #include <libxml/tree.h>
46 #include <libxml/xmlstring.h>
47 #include "error.h"
48 #include "file_descriptor.h"
49 #include "graph.h"
50 #include "runtime.h"
51 #include "scope.h"
52 #include "type_normalisation.h"
53 
54 namespace stg {
55 namespace abixml {
56 
57 namespace {
58 
59 // Cast a libxml string to C string and present it as a string_view.
FromLibxml(const xmlChar * str)60 std::string_view FromLibxml(const xmlChar* str) {
61   return reinterpret_cast<const char*>(str);
62 }
63 
64 // Cast a C string to a libxml string.
ToLibxml(const char * str)65 const xmlChar* ToLibxml(const char* str) {
66   return reinterpret_cast<const xmlChar*>(str);
67 }
68 
69 // Get the name of an XML element.
GetName(xmlNodePtr element)70 std::string_view GetName(xmlNodePtr element) {
71   return FromLibxml(element->name);
72 }
73 
CheckName(const char * name,xmlNodePtr element)74 void CheckName(const char* name, xmlNodePtr element) {
75   const auto element_name = GetName(element);
76   if (element_name != name) {
77     Die() << "expected element '" << name
78           << "' but got '" << element_name << "'";
79   }
80 }
81 
Child(xmlNodePtr node)82 xmlNodePtr Child(xmlNodePtr node) {
83   return node->children;
84 }
85 
Next(xmlNodePtr node)86 xmlNodePtr Next(xmlNodePtr node) {
87   return node->next;
88 }
89 
GetOnlyChild(xmlNodePtr element)90 xmlNodePtr GetOnlyChild(xmlNodePtr element) {
91   const xmlNodePtr child = Child(element);
92   if (child == nullptr || Next(child) != nullptr) {
93     Die() << "element '" << GetName(element) << "' without exactly one child";
94   }
95   return child;
96 }
97 
98 // Get an optional attribute.
GetAttribute(xmlNodePtr node,const char * name)99 std::optional<std::string> GetAttribute(xmlNodePtr node, const char* name) {
100   std::optional<std::string> result;
101   xmlChar* attribute = xmlGetProp(node, ToLibxml(name));
102   if (attribute) {
103     result.emplace(FromLibxml(attribute));
104     xmlFree(attribute);
105   }
106   return result;
107 }
108 
109 // Get an attribute.
GetAttributeOrDie(xmlNodePtr node,const char * name)110 std::string GetAttributeOrDie(xmlNodePtr node, const char* name) {
111   xmlChar* attribute = xmlGetProp(node, ToLibxml(name));
112   if (!attribute) {
113     Die() << "element '" << GetName(node)
114           << "' missing attribute '" << name << "'";
115   }
116   const std::string result(FromLibxml(attribute));
117   xmlFree(attribute);
118   return result;
119 }
120 
121 // Set an attribute value.
SetAttribute(xmlNodePtr node,const char * name,const std::string & value)122 void SetAttribute(xmlNodePtr node, const char* name, const std::string& value) {
123   xmlSetProp(node, ToLibxml(name), ToLibxml(value.c_str()));
124 }
125 
126 // Unset an attribute value.
UnsetAttribute(xmlNodePtr node,const char * name)127 void UnsetAttribute(xmlNodePtr node, const char* name) {
128   xmlUnsetProp(node, ToLibxml(name));
129 }
130 
131 // Remove a node and free its storage.
RemoveNode(xmlNodePtr node)132 void RemoveNode(xmlNodePtr node) {
133   xmlUnlinkNode(node);
134   xmlFreeNode(node);
135 }
136 
137 // Move a node to be the last child of another.
MoveNode(xmlNodePtr node,xmlNodePtr destination)138 void MoveNode(xmlNodePtr node, xmlNodePtr destination) {
139   xmlUnlinkNode(node);
140   xmlAddChild(destination, node);
141 }
142 
143 template <typename T>
Parse(const std::string & value)144 std::optional<T> Parse(const std::string& value) {
145   T result;
146   std::istringstream is(value);
147   is >> std::noskipws >> result;
148   if (is && is.eof()) {
149     return {result};
150   }
151   return {};
152 }
153 
154 template <>
Parse(const std::string & value)155 std::optional<bool> Parse<bool>(const std::string& value) {
156   if (value == "yes") {
157     return {true};
158   } else if (value == "no") {
159     return {false};
160   }
161   return {};
162 }
163 
164 template <>
Parse(const std::string & value)165 std::optional<ElfSymbol::SymbolType> Parse<ElfSymbol::SymbolType>(
166     const std::string& value) {
167   if (value == "no-type") {
168     return {ElfSymbol::SymbolType::NOTYPE};
169   } else if (value == "object-type") {
170     return {ElfSymbol::SymbolType::OBJECT};
171   } else if (value == "func-type") {
172     return {ElfSymbol::SymbolType::FUNCTION};
173   } else if (value == "common-type") {
174     return {ElfSymbol::SymbolType::COMMON};
175   } else if (value == "tls-type") {
176     return {ElfSymbol::SymbolType::TLS};
177   } else if (value == "gnu-ifunc-type") {
178     return {ElfSymbol::SymbolType::GNU_IFUNC};
179   }
180   return {};
181 }
182 
183 template <>
Parse(const std::string & value)184 std::optional<ElfSymbol::Binding> Parse<ElfSymbol::Binding>(
185     const std::string& value) {
186   if (value == "global-binding") {
187     return {ElfSymbol::Binding::GLOBAL};
188   } else if (value == "local-binding") {
189     return {ElfSymbol::Binding::LOCAL};
190   } else if (value == "weak-binding") {
191     return {ElfSymbol::Binding::WEAK};
192   } else if (value == "gnu-unique-binding") {
193     return {ElfSymbol::Binding::GNU_UNIQUE};
194   }
195   return {};
196 }
197 
198 template <>
Parse(const std::string & value)199 std::optional<ElfSymbol::Visibility> Parse<ElfSymbol::Visibility>(
200     const std::string& value) {
201   if (value == "default-visibility") {
202     return {ElfSymbol::Visibility::DEFAULT};
203   } else if (value == "protected-visibility") {
204     return {ElfSymbol::Visibility::PROTECTED};
205   } else if (value == "hidden-visibility") {
206     return {ElfSymbol::Visibility::HIDDEN};
207   } else if (value == "internal-visibility") {
208     return {ElfSymbol::Visibility::INTERNAL};
209   }
210   return {};
211 }
212 
213 template <>
Parse(const std::string & value)214 std::optional<ElfSymbol::CRC> Parse<ElfSymbol::CRC>(const std::string& value) {
215   uint32_t number;
216   std::istringstream is(value);
217   is >> std::noskipws >> std::hex >> number;
218   if (is && is.eof()) {
219     return std::make_optional<ElfSymbol::CRC>(number);
220   }
221   return std::nullopt;
222 }
223 
224 template <typename T>
GetParsedValueOrDie(xmlNodePtr element,const char * name,const std::string & value,const std::optional<T> & parse)225 T GetParsedValueOrDie(xmlNodePtr element, const char* name,
226                       const std::string& value, const std::optional<T>& parse) {
227   if (parse) {
228     return *parse;
229   }
230   Die() << "element '" << GetName(element)
231         << "' has attribute '" << name
232         << "' with bad value '" << value << "'";
233 }
234 
235 template <typename T>
ReadAttributeOrDie(xmlNodePtr element,const char * name)236 T ReadAttributeOrDie(xmlNodePtr element, const char* name) {
237   const auto value = GetAttributeOrDie(element, name);
238   return GetParsedValueOrDie(element, name, value, Parse<T>(value));
239 }
240 
241 template <typename T>
ReadAttribute(xmlNodePtr element,const char * name)242 std::optional<T> ReadAttribute(xmlNodePtr element, const char* name) {
243   const auto value = GetAttribute(element, name);
244   if (value) {
245     return {GetParsedValueOrDie(element, name, *value, Parse<T>(*value))};
246   }
247   return {};
248 }
249 
250 template <typename T>
ReadAttribute(xmlNodePtr element,const char * name,const T & default_value)251 T ReadAttribute(xmlNodePtr element, const char* name, const T& default_value) {
252   const auto value = GetAttribute(element, name);
253   if (value) {
254     return GetParsedValueOrDie(element, name, *value, Parse<T>(*value));
255   }
256   return default_value;
257 }
258 
259 template <typename T>
ReadAttribute(xmlNodePtr element,const char * name,std::function<std::optional<T> (const std::string &)> parse)260 T ReadAttribute(xmlNodePtr element, const char* name,
261                 std::function<std::optional<T>(const std::string&)> parse) {
262   const auto value = GetAttributeOrDie(element, name);
263   return GetParsedValueOrDie(element, name, value, parse(value));
264 }
265 
266 // Remove non-element nodes, recursively.
267 //
268 // This simplifies subsequent manipulation. This should only remove comment,
269 // text and possibly CDATA nodes.
StripNonElements(xmlNodePtr node)270 void StripNonElements(xmlNodePtr node) {
271   switch (node->type) {
272     case XML_COMMENT_NODE:
273     case XML_TEXT_NODE:
274     case XML_CDATA_SECTION_NODE:
275       RemoveNode(node);
276       break;
277     case XML_ELEMENT_NODE: {
278       xmlNodePtr child = Child(node);
279       while (child) {
280         const xmlNodePtr next = Next(child);
281         StripNonElements(child);
282         child = next;
283       }
284       break;
285     }
286     default:
287       Die() << "unexpected XML node type: " << node->type;
288   }
289 }
290 
291 // Determine whether one XML element is a subtree of another, and optionally,
292 // actually equal to it.
SubOrEqualTree(bool also_equal,xmlNodePtr left,xmlNodePtr right)293 bool SubOrEqualTree(bool also_equal, xmlNodePtr left, xmlNodePtr right) {
294   // Node names must match.
295   const auto left_name = GetName(left);
296   const auto right_name = GetName(right);
297   if (left_name != right_name) {
298     return false;
299   }
300 
301   // Attributes may be missing on the left, but must match otherwise.
302   size_t left_attributes = 0;
303   for (auto* p = left->properties; p; p = p->next) {
304     ++left_attributes;
305     const auto attribute = FromLibxml(p->name);
306     const char* attribute_name = attribute.data();
307     const auto left_value = GetAttributeOrDie(left, attribute_name);
308     const auto right_value = GetAttribute(right, attribute_name);
309     if (!right_value || left_value != right_value.value()) {
310       return false;
311     }
312   }
313   // To also be equal, we just need to check the counts are the same.
314   if (also_equal) {
315     size_t right_attributes = 0;
316     for (auto* p = right->properties; p; p = p->next) {
317       ++right_attributes;
318     }
319     if (left_attributes != right_attributes) {
320       return false;
321     }
322   }
323 
324   // The left subelements must be a subsequence of the right ones and to also be
325   // equal, we must not have skipped any right ones.
326   xmlNodePtr left_child = Child(left);
327   xmlNodePtr right_child = Child(right);
328   while (left_child != nullptr && right_child != nullptr) {
329     if (SubOrEqualTree(also_equal, left_child, right_child)) {
330       left_child = Next(left_child);
331     } else if (also_equal) {
332       return false;
333     }
334     right_child = Next(right_child);
335   }
336   return left_child == nullptr && (right_child == nullptr || !also_equal);
337 }
338 
339 }  // namespace
340 
341 // Determine whether one XML element is a subtree of another.
SubTree(xmlNodePtr left,xmlNodePtr right)342 bool SubTree(xmlNodePtr left, xmlNodePtr right) {
343   return SubOrEqualTree(false, left, right);
344 }
345 
346 // Determine whether one XML element is the same as another.
EqualTree(xmlNodePtr left,xmlNodePtr right)347 bool EqualTree(xmlNodePtr left, xmlNodePtr right) {
348   return SubOrEqualTree(true, left, right);
349 }
350 
351 // Find a maximal XML element if one exists.
MaximalTree(const std::vector<xmlNodePtr> & nodes)352 std::optional<size_t> MaximalTree(const std::vector<xmlNodePtr>& nodes) {
353   if (nodes.empty()) {
354     return std::nullopt;
355   }
356 
357   // Find a potentially maximal candidate by scanning through and retaining the
358   // new node if it's a supertree of the current candidate.
359   const auto count = nodes.size();
360   std::vector<bool> ok(count);
361   size_t candidate = 0;
362   ok[candidate] = true;
363   for (size_t ix = 1; ix < count; ++ix) {
364     if (SubTree(nodes[candidate], nodes[ix])) {
365       candidate = ix;
366       ok[candidate] = true;
367     }
368   }
369 
370   // Verify the candidate is indeed maximal by comparing it with the nodes not
371   // already known to be subtrees of it.
372   const auto& candidate_node = nodes[candidate];
373   for (size_t ix = 0; ix < count; ++ix) {
374     const auto& node = nodes[ix];
375     if (!ok[ix] && !SubTree(node, candidate_node)) {
376       return std::nullopt;
377     }
378   }
379 
380   return std::make_optional(candidate);
381 }
382 
383 namespace {
384 
385 // Check if string_view is in an array.
386 template<size_t N>
Contains(const std::array<std::string_view,N> & haystack,std::string_view needle)387 bool Contains(const std::array<std::string_view, N>& haystack,
388               std::string_view needle) {
389   return std::find(haystack.begin(), haystack.end(), needle) != haystack.end();
390 }
391 
392 // Remove source location attributes.
393 //
394 // This simplifies element comparison later.
StripLocationInfo(xmlNodePtr node)395 void StripLocationInfo(xmlNodePtr node) {
396   static const std::array<std::string_view, 7> has_location_info = {
397     "class-decl",
398     "enum-decl",
399     "function-decl",
400     "parameter",
401     "typedef-decl",
402     "union-decl",
403     "var-decl"
404   };
405 
406   if (Contains(has_location_info, GetName(node))) {
407     UnsetAttribute(node, "filepath");
408     UnsetAttribute(node, "line");
409     UnsetAttribute(node, "column");
410   }
411   for (auto* child = Child(node); child; child = Next(child)) {
412     StripLocationInfo(child);
413   }
414 }
415 
416 // Remove access attribute.
417 //
418 // This simplifies element comparison later in a very specific way: libabigail
419 // (possibly older versions) uses the access specifier for the type it's trying
420 // to "emit in scope", even for its containing types, making deduplicating types
421 // trickier. We don't care about access anyway, so just remove it everywhere.
StripAccess(xmlNodePtr node)422 void StripAccess(xmlNodePtr node) {
423   static const std::array<std::string_view, 5> has_access = {
424     "base-class",
425     "data-member",
426     "member-function",
427     "member-template",
428     "member-type",
429   };
430 
431   if (Contains(has_access, GetName(node))) {
432     UnsetAttribute(node, "access");
433   }
434   for (auto* child = Child(node); child; child = Next(child)) {
435     StripAccess(child);
436   }
437 }
438 
439 // Elements corresponding to named types that can be anonymous or marked as
440 // unreachable by libabigail, so user-defined types, excepting typedefs.
441 const std::array<std::string_view, 3> kNamedTypes = {
442   "class-decl",
443   "enum-decl",
444   "union-decl",
445 };
446 
447 // Remove attributes emitted by abidw --load-all-types.
448 //
449 // With this invocation and if any user-defined types are deemed unreachable,
450 // libabigail will output a tracking-non-reachable-types attribute on top-level
451 // elements and an is-non-reachable attribute on each such type element.
452 //
453 // We have our own graph-theoretic notion of reachability and these attributes
454 // have no ABI relevance and can interfere with element comparisons.
StripReachabilityAttributes(xmlNodePtr node)455 void StripReachabilityAttributes(xmlNodePtr node) {
456   const auto node_name = GetName(node);
457 
458   if (node_name == "abi-corpus-group" || node_name == "abi-corpus") {
459     UnsetAttribute(node, "tracking-non-reachable-types");
460   } else if (Contains(kNamedTypes, node_name)) {
461     UnsetAttribute(node, "is-non-reachable");
462   }
463 
464   for (auto* child = Child(node); child; child = Next(child)) {
465     StripReachabilityAttributes(child);
466   }
467 }
468 
469 // Fix bad DWARF -> ELF links caused by size zero symbol confusion.
470 //
471 // libabigail used to be confused by these sorts of symbols, resulting in
472 // declarations pointing at the wrong ELF symbols:
473 //
474 // 573623: ffffffc0122383c0   256 OBJECT  GLOBAL DEFAULT   33 vm_node_stat
475 // 573960: ffffffc0122383c0     0 OBJECT  GLOBAL DEFAULT   33 vm_numa_stat
FixBadDwarfElfLinks(xmlNodePtr root)476 void FixBadDwarfElfLinks(xmlNodePtr root) {
477   std::unordered_map<std::string, size_t> elf_links;
478 
479   // See which ELF symbol IDs have multiple declarations.
480   const std::function<void(xmlNodePtr)> count = [&](xmlNodePtr node) {
481     if (GetName(node) == "var-decl") {
482       const auto symbol_id = GetAttribute(node, "elf-symbol-id");
483       if (symbol_id) {
484         ++elf_links[symbol_id.value()];
485       }
486     }
487 
488     for (auto* child = Child(node); child; child = Next(child)) {
489       count(child);
490     }
491   };
492   count(root);
493 
494   // Fix up likely bad links from DWARF declaration to ELF symbol.
495   const std::function<void(xmlNodePtr)> fix = [&](xmlNodePtr node) {
496     if (GetName(node) == "var-decl") {
497       const auto name = GetAttributeOrDie(node, "name");
498       const auto mangled_name = GetAttribute(node, "mangled-name");
499       const auto symbol_id = GetAttribute(node, "elf-symbol-id");
500       if (mangled_name && symbol_id && name != symbol_id.value()
501           && elf_links[symbol_id.value()] > 1) {
502         if (mangled_name.value() == name) {
503           Warn() << "fixing up ELF symbol for '" << name
504                  << "' (was '" << symbol_id.value() << "')";
505           SetAttribute(node, "elf-symbol-id", name);
506         } else if (mangled_name.value() == symbol_id.value()) {
507           Warn() << "fixing up mangled name and ELF symbol for '" << name
508                  << "' (was '" << symbol_id.value() << "')";
509           SetAttribute(node, "mangled-name", name);
510           SetAttribute(node, "elf-symbol-id", name);
511         }
512       }
513     }
514 
515     for (auto* child = Child(node); child; child = Next(child)) {
516       fix(child);
517     }
518   };
519   fix(root);
520 }
521 
522 // Tidy anonymous types in various ways.
523 //
524 // 1. Normalise anonymous type names by dropping the name attribute.
525 //
526 // Anonymous type names take the form __anonymous_foo__N where foo is one of
527 // enum, struct or union and N is an optional numerical suffix. We don't care
528 // about these names but they may cause trouble when comparing elements.
529 //
530 // 2. Reanonymise anonymous types that have been given names.
531 //
532 // At some point abidw changed its behaviour given an anonymous type with a
533 // naming typedef. In addition to linking the typedef and type in both
534 // directions, the code now gives (some) anonymous types the same name as the
535 // typedef. This misrepresents the original types.
536 //
537 // Such types should be anonymous. We set is-anonymous and drop the name.
538 //
539 // 3. Discard naming typedef backlinks.
540 //
541 // The attribute naming-typedef-id is a backwards link from an anonymous type to
542 // the typedef that refers to it.
543 //
544 // We don't care about these attributes and they may cause comparison issues.
TidyAnonymousTypes(xmlNodePtr node)545 void TidyAnonymousTypes(xmlNodePtr node) {
546   if (Contains(kNamedTypes, GetName(node))) {
547     const bool is_anon = ReadAttribute<bool>(node, "is-anonymous", false);
548     const auto naming_attribute = GetAttribute(node, "naming-typedef-id");
549     if (is_anon) {
550       UnsetAttribute(node, "name");
551     } else if (naming_attribute) {
552       SetAttribute(node, "is-anonymous", "yes");
553       UnsetAttribute(node, "name");
554     }
555     if (naming_attribute) {
556       UnsetAttribute(node, "naming-typedef-id");
557     }
558   }
559 
560   for (auto* child = Child(node); child; child = Next(child)) {
561     TidyAnonymousTypes(child);
562   }
563 }
564 
565 // Remove duplicate members.
RemoveDuplicateMembers(xmlNodePtr root)566 void RemoveDuplicateMembers(xmlNodePtr root) {
567   std::vector<xmlNodePtr> types;
568 
569   // find all structs and unions
570   std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
571     const auto node_name = GetName(node);
572     // preorder in case we delete a nested element
573     for (auto* child = Child(node); child; child = Next(child)) {
574       dfs(child);
575     }
576     if (node_name == "class-decl" || node_name == "union-decl") {
577       types.push_back(node);
578     }
579   };
580   dfs(root);
581 
582   for (const auto& node : types) {
583     // partition members by node name
584     std::map<std::string_view, std::vector<xmlNodePtr>> member_map;
585     for (auto* child = Child(node); child; child = Next(child)) {
586       member_map[GetName(child)].push_back(child);
587     }
588     // for each kind of member...
589     for (auto& [name, members] : member_map) {
590       // ... remove identical duplicate members - O(n^2)
591       for (size_t i = 0; i < members.size(); ++i) {
592         xmlNodePtr& i_node = members[i];
593         bool duplicate = false;
594         for (size_t j = 0; j < i; ++j) {
595           const xmlNodePtr& j_node = members[j];
596           if (j_node != nullptr && EqualTree(i_node, j_node)) {
597             duplicate = true;
598             break;
599           }
600         }
601         if (duplicate) {
602           RemoveNode(i_node);
603           i_node = nullptr;
604         }
605       }
606     }
607   }
608 }
609 
610 // Eliminate non-conflicting / report conflicting duplicate definitions.
611 //
612 // XML elements representing types are sometimes emitted multiple times,
613 // identically. Also, member typedefs are sometimes emitted separately from
614 // their types, resulting in duplicate XML fragments.
615 //
616 // Both these issues can be resolved by first detecting duplicate occurrences of
617 // a given type id and then checking to see if there's an instance that subsumes
618 // the others, which can then be eliminated.
619 //
620 // This function eliminates exact type duplicates and duplicates where there is
621 // at least one maximal definition. It can report the remaining duplicate
622 // definitions.
623 //
624 // If a type has duplicate definitions in multiple namespace scopes or
625 // definitions with different effective names, these are considered to be
626 // *conflicting* duplicate definitions. TODO: update text
HandleDuplicateTypes(xmlNodePtr root)627 void HandleDuplicateTypes(xmlNodePtr root) {
628   // Convenience typedef referring to a namespace scope.
629   using namespace_scope = std::vector<std::string>;
630   // map of type-id to pair of set of namespace scopes and vector of
631   // xmlNodes
632   std::unordered_map<
633       std::string,
634       std::pair<
635           std::set<namespace_scope>,
636           std::vector<xmlNodePtr>>> types;
637   namespace_scope namespaces;
638 
639   // find all type occurrences
640   std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
641     const auto node_name = GetName(node);
642     std::optional<std::string> namespace_name;
643     if (node_name == "namespace-decl") {
644       namespace_name = GetAttribute(node, "name");
645     }
646     if (namespace_name) {
647       namespaces.push_back(namespace_name.value());
648     }
649     if (node_name == "abi-corpus-group"
650         || node_name == "abi-corpus"
651         || node_name == "abi-instr"
652         || namespace_name) {
653       for (auto* child = Child(node); child; child = Next(child)) {
654         dfs(child);
655       }
656     } else {
657       const auto id = GetAttribute(node, "id");
658       if (id) {
659         auto& info = types[id.value()];
660         info.first.insert(namespaces);
661         info.second.push_back(node);
662       }
663     }
664     if (namespace_name) {
665       namespaces.pop_back();
666     }
667   };
668   dfs(root);
669 
670   for (const auto& [id, scopes_and_definitions] : types) {
671     const auto& [scopes, definitions] = scopes_and_definitions;
672 
673     if (scopes.size() > 1) {
674       Warn() << "conflicting scopes found for type '" << id << '\'';
675       continue;
676     }
677 
678     const auto possible_maximal = MaximalTree(definitions);
679     if (possible_maximal) {
680       // Remove all but the maximal definition.
681       const size_t maximal = possible_maximal.value();
682       for (size_t ix = 0; ix < definitions.size(); ++ix) {
683         if (ix != maximal) {
684           RemoveNode(definitions[ix]);
685         }
686       }
687       continue;
688     }
689 
690     // As a rare alternative, check for a stray anonymous member that has been
691     // separated from the main definition.
692     size_t strays = 0;
693     std::optional<size_t> stray;
694     std::optional<size_t> non_stray;
695     for (size_t ix = 0; ix < definitions.size(); ++ix) {
696       auto node = definitions[ix];
697       auto member = Child(node);
698       if (member && !Next(member) && GetName(member) == "data-member") {
699         auto decl = Child(member);
700         if (decl && !Next(decl) && GetName(decl) == "var-decl") {
701           auto name = GetAttribute(decl, "name");
702           if (name && name.value().empty()) {
703             ++strays;
704             stray = ix;
705             continue;
706           }
707         }
708       }
709       non_stray = ix;
710     }
711     if (strays + 1 == definitions.size() && stray.has_value()
712         && non_stray.has_value()) {
713       const auto stray_index = stray.value();
714       const auto non_stray_index = non_stray.value();
715       bool good = true;
716       for (size_t ix = 0; ix < definitions.size(); ++ix) {
717         if (ix == stray_index || ix == non_stray_index) {
718           continue;
719         }
720         if (EqualTree(definitions[stray_index], definitions[ix])) {
721           // it doesn't hurt if we remove exact duplicates and then fail
722           RemoveNode(definitions[ix]);
723         } else {
724           good = false;
725           break;
726         }
727       }
728       if (good) {
729         MoveNode(Child(definitions[stray_index]), definitions[non_stray_index]);
730         RemoveNode(definitions[stray_index]);
731         continue;
732       }
733     }
734 
735     Warn() << "unresolvable duplicate definitions found for type '" << id
736            << '\'';
737   }
738 }
739 
740 }  // namespace
741 
742 // Remove XML nodes and attributes that are neither used or wanted.
Clean(xmlNodePtr root)743 void Clean(xmlNodePtr root) {
744   // Strip non-element nodes to simplify other operations.
745   StripNonElements(root);
746 
747   // Strip location information.
748   StripLocationInfo(root);
749 
750   // Strip access.
751   StripAccess(root);
752 
753   // Strip reachability attributes.
754   StripReachabilityAttributes(root);
755 }
756 
757 namespace {
758 
759 // Transform XML elements to improve their semantics.
Tidy(xmlNodePtr root)760 void Tidy(xmlNodePtr root) {
761   // Fix bad ELF symbol links
762   FixBadDwarfElfLinks(root);
763 
764   // Normalise anonymous type names.
765   // Reanonymise anonymous types.
766   // Discard naming typedef backlinks.
767   TidyAnonymousTypes(root);
768 
769   // Remove duplicate members.
770   RemoveDuplicateMembers(root);
771 
772   // Eliminate complete duplicates and extra fragments of types.
773   // Report conflicting duplicate defintions.
774   // Record whether there are conflicting duplicate definitions.
775   HandleDuplicateTypes(root);
776 }
777 
ParseLength(const std::string & value)778 std::optional<uint64_t> ParseLength(const std::string& value) {
779   if (value == "infinite" || value == "unknown") {
780     return {0};
781   }
782   return Parse<uint64_t>(value);
783 }
784 
ParseReferenceKind(const std::string & value)785 std::optional<PointerReference::Kind> ParseReferenceKind(
786     const std::string& value) {
787   if (value == "lvalue") {
788     return {PointerReference::Kind::LVALUE_REFERENCE};
789   } else if (value == "rvalue") {
790     return {PointerReference::Kind::RVALUE_REFERENCE};
791   }
792   return {};
793 }
794 
795 // Parser for libabigail's ABI XML format, creating a Symbol-Type Graph.
796 //
797 // On construction Abigail consumes a libxml node tree and builds a graph.
798 //
799 // Note that the core parser sees a "clean and tidy" XML document due to
800 // preprocessing that simplifies the XML and resolves several issues. One
801 // notable exception is that duplicate nodes may still remain.
802 //
803 // The main producer of ABI XML is abidw. The format has no formal specification
804 // and has very limited semantic versioning. This parser makes no attempt to
805 // support or correct for deficiencies in older versions of the format.
806 //
807 // The parser detects and will abort on the presence of unexpected elements.
808 //
809 // The parser ignores attributes it doesn't care about, including member access
810 // specifiers and (meaningless) type ids on array dimensions.
811 //
812 // The STG IR and libabigail ABI XML models diverge in some ways. The parser has
813 // to do extra work for each of these, as follows.
814 //
815 // 0. XML uses type and symbol ids to link together elements. These become edges
816 // in the graph between symbols and types and between types and types. Dangling
817 // type references will cause an abort. libabigail is much more relaxed about
818 // symbols without type information and these are modelled as such.
819 //
820 // 1. XML function declarations have in-line types. The parser creates
821 // free-standing types on-the-fly. A useful space optimisation might be to
822 // prevent duplicate creation of such types.
823 //
824 // 2. Variadic parameters are currently flagged with an XML attribute. A
825 // variadic type node is created on demand and will be shared by all such
826 // paramerters.
827 //
828 // 3. XML symbols and aliases have a rather poor repesentation with aliases
829 // represented as comma-separated attribute values. Aliases are resolved in a
830 // post-processing phase.
831 //
832 // 4. XML anonymous types may also have names, these are ignored.
833 class Abigail {
834  public:
835   explicit Abigail(Graph& graph);
836   Id ProcessRoot(xmlNodePtr root);
837 
838  private:
839   struct SymbolInfo {
840     std::string name;
841     std::optional<ElfSymbol::VersionInfo> version_info;
842     xmlNodePtr node;
843   };
844 
845   // Map from libabigail type ids to STG node ids; except for the type of
846   // variadic parameters.
847   Maker<std::string> maker_;
848   // The STG IR uses a distinct node type for the variadic parameter type; if
849   // allocated, this is its STG node id.
850   std::optional<Id> variadic_;
851 
852   // symbol id to symbol information
853   std::unordered_map<std::string, SymbolInfo> symbol_info_map_;
854   // alias symbol id to main symbol id
855   std::unordered_map<std::string, std::string> alias_to_main_;
856   // libabigail decorates certain declarations with symbol ids; this is the
857   // mapping from symbol id to the corresponding type and full name.
858   std::unordered_map<std::string, std::pair<Id, std::string>>
859       symbol_id_and_full_name_;
860 
861   // Full name of the current scope.
862   Scope scope_;
863 
864   Id GetEdge(xmlNodePtr element);
865   Id GetVariadic();
866   Function MakeFunctionType(xmlNodePtr function);
867 
868   void ProcessCorpusGroup(xmlNodePtr group);
869   void ProcessCorpus(xmlNodePtr corpus);
870   void ProcessSymbols(xmlNodePtr symbols);
871   void ProcessSymbol(xmlNodePtr symbol);
872 
873   bool ProcessUserDefinedType(std::string_view name, const std::string& id,
874                               xmlNodePtr decl);
875   void ProcessScope(xmlNodePtr scope);
876 
877   void ProcessInstr(xmlNodePtr instr);
878   void ProcessNamespace(xmlNodePtr scope);
879 
880   Id ProcessDecl(bool is_variable, xmlNodePtr decl);
881 
882   void ProcessFunctionType(const std::string& id, xmlNodePtr function);
883   void ProcessTypedef(const std::string& id, xmlNodePtr type_definition);
884   void ProcessPointer(const std::string& id, bool is_pointer,
885                       xmlNodePtr pointer);
886   void ProcessQualified(const std::string& id, xmlNodePtr qualified);
887   void ProcessArray(const std::string& id, xmlNodePtr array);
888   void ProcessTypeDecl(const std::string& id, xmlNodePtr type_decl);
889   void ProcessStructUnion(const std::string& id, bool is_struct,
890                           xmlNodePtr struct_union);
891   void ProcessEnum(const std::string& id, xmlNodePtr enumeration);
892 
893   Id ProcessBaseClass(xmlNodePtr base_class);
894   std::optional<Id> ProcessDataMember(bool is_struct, xmlNodePtr data_member);
895   void ProcessMemberFunction(std::vector<Id>& methods, xmlNodePtr method);
896   void ProcessMemberType(xmlNodePtr member_type);
897 
898   Id BuildSymbol(const SymbolInfo& info,
899                  std::optional<Id> type_id,
900                  const std::optional<std::string>& name);
901   Id BuildSymbols();
902 };
903 
Abigail(Graph & graph)904 Abigail::Abigail(Graph& graph) : maker_(graph) {}
905 
GetEdge(xmlNodePtr element)906 Id Abigail::GetEdge(xmlNodePtr element) {
907   return maker_.Get(GetAttributeOrDie(element, "type-id"));
908 }
909 
GetVariadic()910 Id Abigail::GetVariadic() {
911   if (!variadic_) {
912     variadic_ = {maker_.Add<Special>(Special::Kind::VARIADIC)};
913   }
914   return *variadic_;
915 }
916 
MakeFunctionType(xmlNodePtr function)917 Function Abigail::MakeFunctionType(xmlNodePtr function) {
918   std::vector<Id> parameters;
919   std::optional<Id> return_type;
920   for (auto* child = Child(function); child; child = Next(child)) {
921     const auto child_name = GetName(child);
922     if (return_type) {
923       Die() << "unexpected element after return-type";
924     }
925     if (child_name == "parameter") {
926       const auto is_variadic = ReadAttribute<bool>(child, "is-variadic", false);
927       parameters.push_back(is_variadic ? GetVariadic() : GetEdge(child));
928     } else if (child_name == "return") {
929       return_type = {GetEdge(child)};
930     } else {
931       Die() << "unrecognised " << GetName(function)
932             << " child element '" << child_name << "'";
933     }
934   }
935   if (!return_type) {
936     Die() << "missing return-type";
937   }
938   return {*return_type, parameters};
939 }
940 
ProcessRoot(xmlNodePtr root)941 Id Abigail::ProcessRoot(xmlNodePtr root) {
942   Clean(root);
943   Tidy(root);
944   const auto name = GetName(root);
945   if (name == "abi-corpus-group") {
946     ProcessCorpusGroup(root);
947   } else if (name == "abi-corpus") {
948     ProcessCorpus(root);
949   } else {
950     Die() << "unrecognised root element '" << name << "'";
951   }
952   return BuildSymbols();
953 }
954 
ProcessCorpusGroup(xmlNodePtr group)955 void Abigail::ProcessCorpusGroup(xmlNodePtr group) {
956   for (auto* corpus = Child(group); corpus; corpus = Next(corpus)) {
957     CheckName("abi-corpus", corpus);
958     ProcessCorpus(corpus);
959   }
960 }
961 
ProcessCorpus(xmlNodePtr corpus)962 void Abigail::ProcessCorpus(xmlNodePtr corpus) {
963   for (auto* element = Child(corpus); element; element = Next(element)) {
964     const auto name = GetName(element);
965     if (name == "elf-function-symbols" || name == "elf-variable-symbols") {
966       ProcessSymbols(element);
967     } else if (name == "elf-needed") {
968       // ignore this
969     } else if (name == "abi-instr") {
970       ProcessInstr(element);
971     } else {
972       Die() << "unrecognised abi-corpus child element '" << name << "'";
973     }
974   }
975 }
976 
ProcessSymbols(xmlNodePtr symbols)977 void Abigail::ProcessSymbols(xmlNodePtr symbols) {
978   for (auto* element = Child(symbols); element; element = Next(element)) {
979     CheckName("elf-symbol", element);
980     ProcessSymbol(element);
981   }
982 }
983 
ProcessSymbol(xmlNodePtr symbol)984 void Abigail::ProcessSymbol(xmlNodePtr symbol) {
985   // Symbol processing is done in two parts. In this first part, we parse just
986   // enough XML attributes to generate a symbol id and determine any aliases.
987   // Symbol ids in this format can be found in elf-symbol alias attributes and
988   // in {var,function}-decl elf-symbol-id attributes.
989   const auto name = GetAttributeOrDie(symbol, "name");
990   const auto version =
991       ReadAttribute<std::string>(symbol, "version", std::string());
992   const bool is_default_version =
993       ReadAttribute<bool>(symbol, "is-default-version", false);
994   const auto alias = GetAttribute(symbol, "alias");
995 
996   std::string elf_symbol_id = name;
997   std::optional<ElfSymbol::VersionInfo> version_info;
998   if (!version.empty()) {
999     version_info = ElfSymbol::VersionInfo{is_default_version, version};
1000     elf_symbol_id += VersionInfoToString(*version_info);
1001   }
1002 
1003   Check(symbol_info_map_
1004             .emplace(elf_symbol_id, SymbolInfo{name, version_info, symbol})
1005             .second)
1006       << "multiple symbols with id " << elf_symbol_id;
1007 
1008   if (alias) {
1009     std::istringstream is(*alias);
1010     std::string item;
1011     while (std::getline(is, item, ',')) {
1012       Check(alias_to_main_.insert({item, elf_symbol_id}).second)
1013           << "multiple aliases with id " << elf_symbol_id;
1014     }
1015   }
1016 }
1017 
ProcessUserDefinedType(std::string_view name,const std::string & id,xmlNodePtr decl)1018 bool Abigail::ProcessUserDefinedType(
1019     std::string_view name, const std::string& id, xmlNodePtr decl) {
1020   if (name == "typedef-decl") {
1021     ProcessTypedef(id, decl);
1022   } else if (name == "class-decl") {
1023     ProcessStructUnion(id, true, decl);
1024   } else if (name == "union-decl") {
1025     ProcessStructUnion(id, false, decl);
1026   } else if (name == "enum-decl") {
1027     ProcessEnum(id, decl);
1028   } else {
1029     return false;
1030   }
1031   return true;
1032 }
1033 
ProcessScope(xmlNodePtr scope)1034 void Abigail::ProcessScope(xmlNodePtr scope) {
1035   for (auto* element = Child(scope); element; element = Next(element)) {
1036     const auto name = GetName(element);
1037     const auto maybe_id = GetAttribute(element, "id");
1038     // all type elements have "id", all non-types do not
1039     if (maybe_id) {
1040       const auto& id = *maybe_id;
1041       if (name == "function-type") {
1042         ProcessFunctionType(id, element);
1043       } else if (name == "pointer-type-def") {
1044         ProcessPointer(id, true, element);
1045       } else if (name == "reference-type-def") {
1046         ProcessPointer(id, false, element);
1047       } else if (name == "qualified-type-def") {
1048         ProcessQualified(id, element);
1049       } else if (name == "array-type-def") {
1050         ProcessArray(id, element);
1051       } else if (name == "type-decl") {
1052         ProcessTypeDecl(id, element);
1053       } else if (!ProcessUserDefinedType(name, id, element)) {
1054         Die() << "bad abi-instr type child element '" << name << "'";
1055       }
1056     } else {
1057       if (name == "var-decl") {
1058         ProcessDecl(true, element);
1059       } else if (name == "function-decl") {
1060         ProcessDecl(false, element);
1061       } else if (name == "namespace-decl") {
1062         ProcessNamespace(element);
1063       } else {
1064         Die() << "bad abi-instr non-type child element '" << name << "'";
1065       }
1066     }
1067   }
1068 }
1069 
ProcessInstr(xmlNodePtr instr)1070 void Abigail::ProcessInstr(xmlNodePtr instr) {
1071   ProcessScope(instr);
1072 }
1073 
ProcessNamespace(xmlNodePtr scope)1074 void Abigail::ProcessNamespace(xmlNodePtr scope) {
1075   const auto name = GetAttributeOrDie(scope, "name");
1076   const PushScopeName push_scope_name(scope_, "namespace", name);
1077   ProcessScope(scope);
1078 }
1079 
ProcessDecl(bool is_variable,xmlNodePtr decl)1080 Id Abigail::ProcessDecl(bool is_variable, xmlNodePtr decl) {
1081   const auto name = scope_.name + GetAttributeOrDie(decl, "name");
1082   const auto symbol_id = GetAttribute(decl, "elf-symbol-id");
1083   const auto type = is_variable ? GetEdge(decl)
1084                                 : maker_.Add<Function>(MakeFunctionType(decl));
1085   if (symbol_id) {
1086     // There's a link to an ELF symbol.
1087     const auto [it, inserted] = symbol_id_and_full_name_.emplace(
1088         *symbol_id, std::make_pair(type, name));
1089     if (!inserted) {
1090       Die() << "duplicate type for '" << *symbol_id << "'";
1091     }
1092   }
1093   return type;
1094 }
1095 
ProcessFunctionType(const std::string & id,xmlNodePtr function)1096 void Abigail::ProcessFunctionType(const std::string& id, xmlNodePtr function) {
1097   maker_.MaybeSet<Function>(id, MakeFunctionType(function));
1098 }
1099 
ProcessTypedef(const std::string & id,xmlNodePtr type_definition)1100 void Abigail::ProcessTypedef(const std::string& id,
1101                              xmlNodePtr type_definition) {
1102   const auto name = scope_.name + GetAttributeOrDie(type_definition, "name");
1103   const auto type = GetEdge(type_definition);
1104   maker_.MaybeSet<Typedef>(id, name, type);
1105 }
1106 
ProcessPointer(const std::string & id,bool is_pointer,xmlNodePtr pointer)1107 void Abigail::ProcessPointer(const std::string& id, bool is_pointer,
1108                              xmlNodePtr pointer) {
1109   const auto type = GetEdge(pointer);
1110   const auto kind = is_pointer ? PointerReference::Kind::POINTER
1111                                : ReadAttribute<PointerReference::Kind>(
1112                                      pointer, "kind", &ParseReferenceKind);
1113   maker_.MaybeSet<PointerReference>(id, kind, type);
1114 }
1115 
ProcessQualified(const std::string & id,xmlNodePtr qualified)1116 void Abigail::ProcessQualified(const std::string& id, xmlNodePtr qualified) {
1117   std::vector<Qualifier> qualifiers;
1118   // Do these in reverse order so we get CVR ordering.
1119   if (ReadAttribute<bool>(qualified, "restrict", false)) {
1120     qualifiers.push_back(Qualifier::RESTRICT);
1121   }
1122   if (ReadAttribute<bool>(qualified, "volatile", false)) {
1123     qualifiers.push_back(Qualifier::VOLATILE);
1124   }
1125   if (ReadAttribute<bool>(qualified, "const", false)) {
1126     qualifiers.push_back(Qualifier::CONST);
1127   }
1128   Check(!qualifiers.empty()) << "qualified-type-def has no qualifiers";
1129   // Handle multiple qualifiers by unconditionally adding as new nodes all but
1130   // the last qualifier which is set into place.
1131   auto type = GetEdge(qualified);
1132   auto count = qualifiers.size();
1133   for (auto qualifier : qualifiers) {
1134     --count;
1135     const Qualified node(qualifier, type);
1136     if (count) {
1137       type = maker_.Add<Qualified>(node);
1138     } else {
1139       maker_.MaybeSet<Qualified>(id, node);
1140     }
1141   }
1142 }
1143 
ProcessArray(const std::string & id,xmlNodePtr array)1144 void Abigail::ProcessArray(const std::string& id, xmlNodePtr array) {
1145   std::vector<size_t> dimensions;
1146   for (auto* child = Child(array); child; child = Next(child)) {
1147     CheckName("subrange", child);
1148     const auto length = ReadAttribute<uint64_t>(child, "length", &ParseLength);
1149     dimensions.push_back(length);
1150   }
1151   Check(!dimensions.empty()) << "array-type-def element has no children";
1152   // int[M][N] means array[M] of array[N] of int
1153   //
1154   // We need to chain a bunch of types together:
1155   //
1156   // id = array[n] of id = ... = array[n] of id
1157   //
1158   // where the first id is the new type in slot ix
1159   // and the last id is the old type in slot type
1160   //
1161   // Use the same approach as for qualifiers.
1162   auto type = GetEdge(array);
1163   auto count = dimensions.size();
1164   for (auto it = dimensions.crbegin(); it != dimensions.crend(); ++it) {
1165     --count;
1166     const auto size = *it;
1167     const Array node(size, type);
1168     if (count) {
1169       type = maker_.Add<Array>(node);
1170     } else {
1171       maker_.MaybeSet<Array>(id, node);
1172     }
1173   }
1174 }
1175 
ProcessTypeDecl(const std::string & id,xmlNodePtr type_decl)1176 void Abigail::ProcessTypeDecl(const std::string& id, xmlNodePtr type_decl) {
1177   const auto name = scope_.name + GetAttributeOrDie(type_decl, "name");
1178   const auto bits = ReadAttribute<size_t>(type_decl, "size-in-bits", 0);
1179   if (bits % 8) {
1180     Die() << "size-in-bits is not a multiple of 8";
1181   }
1182   const auto bytes = bits / 8;
1183 
1184   if (name == "void") {
1185     maker_.MaybeSet<Special>(id, Special::Kind::VOID);
1186   } else {
1187     // libabigail doesn't model encoding at all and we don't want to parse names
1188     // (which will not always work) in an attempt to reconstruct it.
1189     maker_.MaybeSet<Primitive>(id, name, /* encoding= */ std::nullopt, bytes);
1190   }
1191 }
1192 
ProcessStructUnion(const std::string & id,bool is_struct,xmlNodePtr struct_union)1193 void Abigail::ProcessStructUnion(const std::string& id, bool is_struct,
1194                                  xmlNodePtr struct_union) {
1195   // Libabigail sometimes reports is-declaration-only but still provides some
1196   // child elements. So we check both things.
1197   const bool forward =
1198       ReadAttribute<bool>(struct_union, "is-declaration-only", false)
1199       && Child(struct_union) == nullptr;
1200   const auto kind = is_struct
1201                     ? StructUnion::Kind::STRUCT
1202                     : StructUnion::Kind::UNION;
1203   const bool is_anonymous =
1204       ReadAttribute<bool>(struct_union, "is-anonymous", false);
1205   const auto name =
1206       is_anonymous ? std::string() : GetAttributeOrDie(struct_union, "name");
1207   const auto full_name =
1208       is_anonymous ? std::string() : scope_.name + name;
1209   const PushScopeName push_scope_name(scope_, kind, name);
1210   if (forward) {
1211     maker_.MaybeSet<StructUnion>(id, kind, full_name);
1212     return;
1213   }
1214   const auto bits = ReadAttribute<size_t>(struct_union, "size-in-bits", 0);
1215   const auto bytes = (bits + 7) / 8;
1216 
1217   std::vector<Id> base_classes;
1218   std::vector<Id> methods;
1219   std::vector<Id> members;
1220   for (auto* child = Child(struct_union); child; child = Next(child)) {
1221     const auto child_name = GetName(child);
1222     if (child_name == "data-member") {
1223       if (const auto member = ProcessDataMember(is_struct, child)) {
1224         members.push_back(*member);
1225       }
1226     } else if (child_name == "member-type") {
1227       ProcessMemberType(child);
1228     } else if (child_name == "base-class") {
1229       base_classes.push_back(ProcessBaseClass(child));
1230     } else if (child_name == "member-function") {
1231       ProcessMemberFunction(methods, child);
1232     } else {
1233       Die() << "unrecognised " << kind << "-decl child element '" << child_name
1234             << "'";
1235     }
1236   }
1237 
1238   maker_.MaybeSet<StructUnion>(id, kind, full_name, bytes, base_classes,
1239                                methods, members);
1240 }
1241 
ProcessEnum(const std::string & id,xmlNodePtr enumeration)1242 void Abigail::ProcessEnum(const std::string& id, xmlNodePtr enumeration) {
1243   const bool forward =
1244       ReadAttribute<bool>(enumeration, "is-declaration-only", false);
1245   const auto name = ReadAttribute<bool>(enumeration, "is-anonymous", false)
1246                     ? std::string()
1247                     : scope_.name + GetAttributeOrDie(enumeration, "name");
1248   if (forward) {
1249     maker_.MaybeSet<Enumeration>(id, name);
1250     return;
1251   }
1252 
1253   const xmlNodePtr underlying = Child(enumeration);
1254   Check(underlying != nullptr) << "enum-decl has no child elements";
1255   CheckName("underlying-type", underlying);
1256   const auto type = GetEdge(underlying);
1257 
1258   std::vector<std::pair<std::string, int64_t>> enumerators;
1259   for (auto* enumerator = Next(underlying); enumerator;
1260        enumerator = Next(enumerator)) {
1261     CheckName("enumerator", enumerator);
1262     const auto enumerator_name = GetAttributeOrDie(enumerator, "name");
1263     // libabigail currently supports anything that fits in an int64_t
1264     const auto enumerator_value =
1265         ReadAttributeOrDie<int64_t>(enumerator, "value");
1266     enumerators.emplace_back(enumerator_name, enumerator_value);
1267   }
1268 
1269   maker_.MaybeSet<Enumeration>(id, name, type, enumerators);
1270 }
1271 
ProcessBaseClass(xmlNodePtr base_class)1272 Id Abigail::ProcessBaseClass(xmlNodePtr base_class) {
1273   const auto& type = GetEdge(base_class);
1274   const auto offset =
1275       ReadAttributeOrDie<size_t>(base_class, "layout-offset-in-bits");
1276   const auto inheritance = ReadAttribute<bool>(base_class, "is-virtual", false)
1277                            ? BaseClass::Inheritance::VIRTUAL
1278                            : BaseClass::Inheritance::NON_VIRTUAL;
1279   return maker_.Add<BaseClass>(type, offset, inheritance);
1280 }
1281 
ProcessDataMember(bool is_struct,xmlNodePtr data_member)1282 std::optional<Id> Abigail::ProcessDataMember(bool is_struct,
1283                                              xmlNodePtr data_member) {
1284   const xmlNodePtr decl = GetOnlyChild(data_member);
1285   CheckName("var-decl", decl);
1286   if (ReadAttribute<bool>(data_member, "static", false)) {
1287     ProcessDecl(true, decl);
1288     return {};
1289   }
1290 
1291   const auto offset = is_struct
1292                       ? ReadAttributeOrDie<size_t>(data_member,
1293                                                    "layout-offset-in-bits")
1294                       : 0;
1295   const auto name = GetAttributeOrDie(decl, "name");
1296   const auto type = GetEdge(decl);
1297 
1298   // Note: libabigail does not model member size, yet
1299   return {maker_.Add<Member>(name, type, offset, 0)};
1300 }
1301 
ProcessMemberFunction(std::vector<Id> & methods,xmlNodePtr method)1302 void Abigail::ProcessMemberFunction(std::vector<Id>& methods,
1303                                     xmlNodePtr method) {
1304   const xmlNodePtr decl = GetOnlyChild(method);
1305   CheckName("function-decl", decl);
1306   // ProcessDecl creates symbol references so must be called unconditionally.
1307   const auto type = ProcessDecl(false, decl);
1308   const auto vtable_offset = ReadAttribute<uint64_t>(method, "vtable-offset");
1309   if (vtable_offset) {
1310     static const std::string missing = "{missing}";
1311     const auto mangled_name = ReadAttribute(decl, "mangled-name", missing);
1312     const auto name = GetAttributeOrDie(decl, "name");
1313     methods.push_back(
1314         maker_.Add<Method>(mangled_name, name, vtable_offset.value(), type));
1315   }
1316 }
1317 
ProcessMemberType(xmlNodePtr member_type)1318 void Abigail::ProcessMemberType(xmlNodePtr member_type) {
1319   const xmlNodePtr decl = GetOnlyChild(member_type);
1320   const auto id = GetAttributeOrDie(decl, "id");
1321   const auto name = GetName(decl);
1322   if (!ProcessUserDefinedType(name, id, decl)) {
1323     Die() << "unrecognised member-type child element '" << name << "'";
1324   }
1325 }
1326 
BuildSymbol(const SymbolInfo & info,std::optional<Id> type_id,const std::optional<std::string> & name)1327 Id Abigail::BuildSymbol(const SymbolInfo& info,
1328                         std::optional<Id> type_id,
1329                         const std::optional<std::string>& name) {
1330   const xmlNodePtr symbol = info.node;
1331   const bool is_defined = ReadAttributeOrDie<bool>(symbol, "is-defined");
1332   const auto crc = ReadAttribute<ElfSymbol::CRC>(symbol, "crc");
1333   const auto ns = ReadAttribute<std::string>(symbol, "namespace");
1334   const auto type = ReadAttributeOrDie<ElfSymbol::SymbolType>(symbol, "type");
1335   const auto binding =
1336       ReadAttributeOrDie<ElfSymbol::Binding>(symbol, "binding");
1337   const auto visibility =
1338       ReadAttributeOrDie<ElfSymbol::Visibility>(symbol, "visibility");
1339 
1340   return maker_.Add<ElfSymbol>(
1341       info.name, info.version_info,
1342       is_defined, type, binding, visibility, crc, ns, type_id, name);
1343 }
1344 
BuildSymbols()1345 Id Abigail::BuildSymbols() {
1346   // Libabigail's model is (approximately):
1347   //
1348   //   (alias)* -> main symbol <- some decl -> type
1349   //
1350   // which we turn into:
1351   //
1352   //   symbol / alias -> type
1353   //
1354   for (const auto& [alias, main] : alias_to_main_) {
1355     Check(!alias_to_main_.contains(main))
1356         << "found main symbol and alias with id " << main;
1357   }
1358   // Build final symbol table, tying symbols to their types.
1359   std::map<std::string, Id> symbols;
1360   for (const auto& [id, symbol_info] : symbol_info_map_) {
1361     const auto main = alias_to_main_.find(id);
1362     const auto lookup = main != alias_to_main_.end() ? main->second : id;
1363     const auto type_id_and_name_it = symbol_id_and_full_name_.find(lookup);
1364     std::optional<Id> type_id;
1365     std::optional<std::string> name;
1366     if (type_id_and_name_it != symbol_id_and_full_name_.end()) {
1367       const auto& type_id_and_name = type_id_and_name_it->second;
1368       type_id = {type_id_and_name.first};
1369       name = {type_id_and_name.second};
1370     }
1371     symbols.insert({id, BuildSymbol(symbol_info, type_id, name)});
1372   }
1373   return maker_.Add<Interface>(symbols);
1374 }
1375 
1376 using Parser = xmlDocPtr(xmlParserCtxtPtr context, const char* url,
1377                          const char* encoding, int options);
1378 
Parse(Runtime & runtime,const std::function<Parser> & parser)1379 Document Parse(Runtime& runtime, const std::function<Parser>& parser) {
1380   const std::unique_ptr<
1381       std::remove_pointer_t<xmlParserCtxtPtr>, void(*)(xmlParserCtxtPtr)>
1382       context(xmlNewParserCtxt(), xmlFreeParserCtxt);
1383   Document document(nullptr, xmlFreeDoc);
1384   {
1385     const Time t(runtime, "abigail.libxml_parse");
1386     document.reset(parser(context.get(), nullptr, nullptr, XML_PARSE_NONET));
1387   }
1388   Check(document != nullptr) << "failed to parse input as XML";
1389   return document;
1390 }
1391 
1392 }  // namespace
1393 
ProcessDocument(Graph & graph,xmlDocPtr document)1394 Id ProcessDocument(Graph& graph, xmlDocPtr document) {
1395   xmlNodePtr root = xmlDocGetRootElement(document);
1396   Check(root != nullptr) << "XML document has no root element";
1397   const Id id = Abigail(graph).ProcessRoot(root);
1398   return RemoveUselessQualifiers(graph, id);
1399 }
1400 
Read(Runtime & runtime,const std::string & path)1401 Document Read(Runtime& runtime, const std::string& path) {
1402   const FileDescriptor fd(path.c_str(), O_RDONLY);
1403   return Parse(runtime, [&](xmlParserCtxtPtr context, const char* url,
1404                             const char* encoding, int options) {
1405     return xmlCtxtReadFd(context, fd.Value(), url, encoding, options);
1406   });
1407 }
1408 
Read(Runtime & runtime,Graph & graph,const std::string & path)1409 Id Read(Runtime& runtime, Graph& graph, const std::string& path) {
1410   // Read the XML.
1411   const Document document = Read(runtime, path);
1412   // Process the XML.
1413   return ProcessDocument(graph, document.get());
1414 }
1415 
ReadFromString(Runtime & runtime,Graph & graph,const std::string_view xml)1416 Id ReadFromString(Runtime& runtime, Graph& graph, const std::string_view xml) {
1417   // Read the XML.
1418   const Document document =
1419       Parse(runtime, [&](xmlParserCtxtPtr context, const char* url,
1420                          const char* encoding, int options) {
1421     return xmlCtxtReadMemory(context, xml.data(), static_cast<int>(xml.size()),
1422                              url, encoding, options);
1423   });
1424   // Process the XML.
1425   return ProcessDocument(graph, document.get());
1426 }
1427 
1428 }  // namespace abixml
1429 }  // namespace stg
1430