1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2021-2024 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Giuliano Procida
19 // Author: Ignes Simeonova
20
21 #include "abigail_reader.h"
22
23 #include <fcntl.h>
24
25 #include <algorithm>
26 #include <array>
27 #include <cstddef>
28 #include <cstdint>
29 #include <functional>
30 #include <ios>
31 #include <map>
32 #include <memory>
33 #include <optional>
34 #include <set>
35 #include <sstream>
36 #include <string>
37 #include <string_view>
38 #include <type_traits>
39 #include <unordered_map>
40 #include <utility>
41 #include <vector>
42
43 #include <libxml/globals.h> // xmlFree moves to xmlmemory.h later
44 #include <libxml/parser.h>
45 #include <libxml/tree.h>
46 #include <libxml/xmlstring.h>
47 #include "error.h"
48 #include "file_descriptor.h"
49 #include "graph.h"
50 #include "runtime.h"
51 #include "scope.h"
52 #include "type_normalisation.h"
53
54 namespace stg {
55 namespace abixml {
56
57 namespace {
58
59 // Cast a libxml string to C string and present it as a string_view.
FromLibxml(const xmlChar * str)60 std::string_view FromLibxml(const xmlChar* str) {
61 return reinterpret_cast<const char*>(str);
62 }
63
64 // Cast a C string to a libxml string.
ToLibxml(const char * str)65 const xmlChar* ToLibxml(const char* str) {
66 return reinterpret_cast<const xmlChar*>(str);
67 }
68
69 // Get the name of an XML element.
GetName(xmlNodePtr element)70 std::string_view GetName(xmlNodePtr element) {
71 return FromLibxml(element->name);
72 }
73
CheckName(const char * name,xmlNodePtr element)74 void CheckName(const char* name, xmlNodePtr element) {
75 const auto element_name = GetName(element);
76 if (element_name != name) {
77 Die() << "expected element '" << name
78 << "' but got '" << element_name << "'";
79 }
80 }
81
Child(xmlNodePtr node)82 xmlNodePtr Child(xmlNodePtr node) {
83 return node->children;
84 }
85
Next(xmlNodePtr node)86 xmlNodePtr Next(xmlNodePtr node) {
87 return node->next;
88 }
89
GetOnlyChild(xmlNodePtr element)90 xmlNodePtr GetOnlyChild(xmlNodePtr element) {
91 const xmlNodePtr child = Child(element);
92 if (child == nullptr || Next(child) != nullptr) {
93 Die() << "element '" << GetName(element) << "' without exactly one child";
94 }
95 return child;
96 }
97
98 // Get an optional attribute.
GetAttribute(xmlNodePtr node,const char * name)99 std::optional<std::string> GetAttribute(xmlNodePtr node, const char* name) {
100 std::optional<std::string> result;
101 xmlChar* attribute = xmlGetProp(node, ToLibxml(name));
102 if (attribute) {
103 result.emplace(FromLibxml(attribute));
104 xmlFree(attribute);
105 }
106 return result;
107 }
108
109 // Get an attribute.
GetAttributeOrDie(xmlNodePtr node,const char * name)110 std::string GetAttributeOrDie(xmlNodePtr node, const char* name) {
111 xmlChar* attribute = xmlGetProp(node, ToLibxml(name));
112 if (!attribute) {
113 Die() << "element '" << GetName(node)
114 << "' missing attribute '" << name << "'";
115 }
116 const std::string result(FromLibxml(attribute));
117 xmlFree(attribute);
118 return result;
119 }
120
121 // Set an attribute value.
SetAttribute(xmlNodePtr node,const char * name,const std::string & value)122 void SetAttribute(xmlNodePtr node, const char* name, const std::string& value) {
123 xmlSetProp(node, ToLibxml(name), ToLibxml(value.c_str()));
124 }
125
126 // Unset an attribute value.
UnsetAttribute(xmlNodePtr node,const char * name)127 void UnsetAttribute(xmlNodePtr node, const char* name) {
128 xmlUnsetProp(node, ToLibxml(name));
129 }
130
131 // Remove a node and free its storage.
RemoveNode(xmlNodePtr node)132 void RemoveNode(xmlNodePtr node) {
133 xmlUnlinkNode(node);
134 xmlFreeNode(node);
135 }
136
137 // Move a node to be the last child of another.
MoveNode(xmlNodePtr node,xmlNodePtr destination)138 void MoveNode(xmlNodePtr node, xmlNodePtr destination) {
139 xmlUnlinkNode(node);
140 xmlAddChild(destination, node);
141 }
142
143 template <typename T>
Parse(const std::string & value)144 std::optional<T> Parse(const std::string& value) {
145 T result;
146 std::istringstream is(value);
147 is >> std::noskipws >> result;
148 if (is && is.eof()) {
149 return {result};
150 }
151 return {};
152 }
153
154 template <>
Parse(const std::string & value)155 std::optional<bool> Parse<bool>(const std::string& value) {
156 if (value == "yes") {
157 return {true};
158 } else if (value == "no") {
159 return {false};
160 }
161 return {};
162 }
163
164 template <>
Parse(const std::string & value)165 std::optional<ElfSymbol::SymbolType> Parse<ElfSymbol::SymbolType>(
166 const std::string& value) {
167 if (value == "no-type") {
168 return {ElfSymbol::SymbolType::NOTYPE};
169 } else if (value == "object-type") {
170 return {ElfSymbol::SymbolType::OBJECT};
171 } else if (value == "func-type") {
172 return {ElfSymbol::SymbolType::FUNCTION};
173 } else if (value == "common-type") {
174 return {ElfSymbol::SymbolType::COMMON};
175 } else if (value == "tls-type") {
176 return {ElfSymbol::SymbolType::TLS};
177 } else if (value == "gnu-ifunc-type") {
178 return {ElfSymbol::SymbolType::GNU_IFUNC};
179 }
180 return {};
181 }
182
183 template <>
Parse(const std::string & value)184 std::optional<ElfSymbol::Binding> Parse<ElfSymbol::Binding>(
185 const std::string& value) {
186 if (value == "global-binding") {
187 return {ElfSymbol::Binding::GLOBAL};
188 } else if (value == "local-binding") {
189 return {ElfSymbol::Binding::LOCAL};
190 } else if (value == "weak-binding") {
191 return {ElfSymbol::Binding::WEAK};
192 } else if (value == "gnu-unique-binding") {
193 return {ElfSymbol::Binding::GNU_UNIQUE};
194 }
195 return {};
196 }
197
198 template <>
Parse(const std::string & value)199 std::optional<ElfSymbol::Visibility> Parse<ElfSymbol::Visibility>(
200 const std::string& value) {
201 if (value == "default-visibility") {
202 return {ElfSymbol::Visibility::DEFAULT};
203 } else if (value == "protected-visibility") {
204 return {ElfSymbol::Visibility::PROTECTED};
205 } else if (value == "hidden-visibility") {
206 return {ElfSymbol::Visibility::HIDDEN};
207 } else if (value == "internal-visibility") {
208 return {ElfSymbol::Visibility::INTERNAL};
209 }
210 return {};
211 }
212
213 template <>
Parse(const std::string & value)214 std::optional<ElfSymbol::CRC> Parse<ElfSymbol::CRC>(const std::string& value) {
215 uint32_t number;
216 std::istringstream is(value);
217 is >> std::noskipws >> std::hex >> number;
218 if (is && is.eof()) {
219 return std::make_optional<ElfSymbol::CRC>(number);
220 }
221 return std::nullopt;
222 }
223
224 template <typename T>
GetParsedValueOrDie(xmlNodePtr element,const char * name,const std::string & value,const std::optional<T> & parse)225 T GetParsedValueOrDie(xmlNodePtr element, const char* name,
226 const std::string& value, const std::optional<T>& parse) {
227 if (parse) {
228 return *parse;
229 }
230 Die() << "element '" << GetName(element)
231 << "' has attribute '" << name
232 << "' with bad value '" << value << "'";
233 }
234
235 template <typename T>
ReadAttributeOrDie(xmlNodePtr element,const char * name)236 T ReadAttributeOrDie(xmlNodePtr element, const char* name) {
237 const auto value = GetAttributeOrDie(element, name);
238 return GetParsedValueOrDie(element, name, value, Parse<T>(value));
239 }
240
241 template <typename T>
ReadAttribute(xmlNodePtr element,const char * name)242 std::optional<T> ReadAttribute(xmlNodePtr element, const char* name) {
243 const auto value = GetAttribute(element, name);
244 if (value) {
245 return {GetParsedValueOrDie(element, name, *value, Parse<T>(*value))};
246 }
247 return {};
248 }
249
250 template <typename T>
ReadAttribute(xmlNodePtr element,const char * name,const T & default_value)251 T ReadAttribute(xmlNodePtr element, const char* name, const T& default_value) {
252 const auto value = GetAttribute(element, name);
253 if (value) {
254 return GetParsedValueOrDie(element, name, *value, Parse<T>(*value));
255 }
256 return default_value;
257 }
258
259 template <typename T>
ReadAttribute(xmlNodePtr element,const char * name,std::function<std::optional<T> (const std::string &)> parse)260 T ReadAttribute(xmlNodePtr element, const char* name,
261 std::function<std::optional<T>(const std::string&)> parse) {
262 const auto value = GetAttributeOrDie(element, name);
263 return GetParsedValueOrDie(element, name, value, parse(value));
264 }
265
266 // Remove non-element nodes, recursively.
267 //
268 // This simplifies subsequent manipulation. This should only remove comment,
269 // text and possibly CDATA nodes.
StripNonElements(xmlNodePtr node)270 void StripNonElements(xmlNodePtr node) {
271 switch (node->type) {
272 case XML_COMMENT_NODE:
273 case XML_TEXT_NODE:
274 case XML_CDATA_SECTION_NODE:
275 RemoveNode(node);
276 break;
277 case XML_ELEMENT_NODE: {
278 xmlNodePtr child = Child(node);
279 while (child) {
280 const xmlNodePtr next = Next(child);
281 StripNonElements(child);
282 child = next;
283 }
284 break;
285 }
286 default:
287 Die() << "unexpected XML node type: " << node->type;
288 }
289 }
290
291 // Determine whether one XML element is a subtree of another, and optionally,
292 // actually equal to it.
SubOrEqualTree(bool also_equal,xmlNodePtr left,xmlNodePtr right)293 bool SubOrEqualTree(bool also_equal, xmlNodePtr left, xmlNodePtr right) {
294 // Node names must match.
295 const auto left_name = GetName(left);
296 const auto right_name = GetName(right);
297 if (left_name != right_name) {
298 return false;
299 }
300
301 // Attributes may be missing on the left, but must match otherwise.
302 size_t left_attributes = 0;
303 for (auto* p = left->properties; p; p = p->next) {
304 ++left_attributes;
305 const auto attribute = FromLibxml(p->name);
306 const char* attribute_name = attribute.data();
307 const auto left_value = GetAttributeOrDie(left, attribute_name);
308 const auto right_value = GetAttribute(right, attribute_name);
309 if (!right_value || left_value != right_value.value()) {
310 return false;
311 }
312 }
313 // To also be equal, we just need to check the counts are the same.
314 if (also_equal) {
315 size_t right_attributes = 0;
316 for (auto* p = right->properties; p; p = p->next) {
317 ++right_attributes;
318 }
319 if (left_attributes != right_attributes) {
320 return false;
321 }
322 }
323
324 // The left subelements must be a subsequence of the right ones and to also be
325 // equal, we must not have skipped any right ones.
326 xmlNodePtr left_child = Child(left);
327 xmlNodePtr right_child = Child(right);
328 while (left_child != nullptr && right_child != nullptr) {
329 if (SubOrEqualTree(also_equal, left_child, right_child)) {
330 left_child = Next(left_child);
331 } else if (also_equal) {
332 return false;
333 }
334 right_child = Next(right_child);
335 }
336 return left_child == nullptr && (right_child == nullptr || !also_equal);
337 }
338
339 } // namespace
340
341 // Determine whether one XML element is a subtree of another.
SubTree(xmlNodePtr left,xmlNodePtr right)342 bool SubTree(xmlNodePtr left, xmlNodePtr right) {
343 return SubOrEqualTree(false, left, right);
344 }
345
346 // Determine whether one XML element is the same as another.
EqualTree(xmlNodePtr left,xmlNodePtr right)347 bool EqualTree(xmlNodePtr left, xmlNodePtr right) {
348 return SubOrEqualTree(true, left, right);
349 }
350
351 // Find a maximal XML element if one exists.
MaximalTree(const std::vector<xmlNodePtr> & nodes)352 std::optional<size_t> MaximalTree(const std::vector<xmlNodePtr>& nodes) {
353 if (nodes.empty()) {
354 return std::nullopt;
355 }
356
357 // Find a potentially maximal candidate by scanning through and retaining the
358 // new node if it's a supertree of the current candidate.
359 const auto count = nodes.size();
360 std::vector<bool> ok(count);
361 size_t candidate = 0;
362 ok[candidate] = true;
363 for (size_t ix = 1; ix < count; ++ix) {
364 if (SubTree(nodes[candidate], nodes[ix])) {
365 candidate = ix;
366 ok[candidate] = true;
367 }
368 }
369
370 // Verify the candidate is indeed maximal by comparing it with the nodes not
371 // already known to be subtrees of it.
372 const auto& candidate_node = nodes[candidate];
373 for (size_t ix = 0; ix < count; ++ix) {
374 const auto& node = nodes[ix];
375 if (!ok[ix] && !SubTree(node, candidate_node)) {
376 return std::nullopt;
377 }
378 }
379
380 return std::make_optional(candidate);
381 }
382
383 namespace {
384
385 // Check if string_view is in an array.
386 template<size_t N>
Contains(const std::array<std::string_view,N> & haystack,std::string_view needle)387 bool Contains(const std::array<std::string_view, N>& haystack,
388 std::string_view needle) {
389 return std::find(haystack.begin(), haystack.end(), needle) != haystack.end();
390 }
391
392 // Remove source location attributes.
393 //
394 // This simplifies element comparison later.
StripLocationInfo(xmlNodePtr node)395 void StripLocationInfo(xmlNodePtr node) {
396 static const std::array<std::string_view, 7> has_location_info = {
397 "class-decl",
398 "enum-decl",
399 "function-decl",
400 "parameter",
401 "typedef-decl",
402 "union-decl",
403 "var-decl"
404 };
405
406 if (Contains(has_location_info, GetName(node))) {
407 UnsetAttribute(node, "filepath");
408 UnsetAttribute(node, "line");
409 UnsetAttribute(node, "column");
410 }
411 for (auto* child = Child(node); child; child = Next(child)) {
412 StripLocationInfo(child);
413 }
414 }
415
416 // Remove access attribute.
417 //
418 // This simplifies element comparison later in a very specific way: libabigail
419 // (possibly older versions) uses the access specifier for the type it's trying
420 // to "emit in scope", even for its containing types, making deduplicating types
421 // trickier. We don't care about access anyway, so just remove it everywhere.
StripAccess(xmlNodePtr node)422 void StripAccess(xmlNodePtr node) {
423 static const std::array<std::string_view, 5> has_access = {
424 "base-class",
425 "data-member",
426 "member-function",
427 "member-template",
428 "member-type",
429 };
430
431 if (Contains(has_access, GetName(node))) {
432 UnsetAttribute(node, "access");
433 }
434 for (auto* child = Child(node); child; child = Next(child)) {
435 StripAccess(child);
436 }
437 }
438
439 // Elements corresponding to named types that can be anonymous or marked as
440 // unreachable by libabigail, so user-defined types, excepting typedefs.
441 const std::array<std::string_view, 3> kNamedTypes = {
442 "class-decl",
443 "enum-decl",
444 "union-decl",
445 };
446
447 // Remove attributes emitted by abidw --load-all-types.
448 //
449 // With this invocation and if any user-defined types are deemed unreachable,
450 // libabigail will output a tracking-non-reachable-types attribute on top-level
451 // elements and an is-non-reachable attribute on each such type element.
452 //
453 // We have our own graph-theoretic notion of reachability and these attributes
454 // have no ABI relevance and can interfere with element comparisons.
StripReachabilityAttributes(xmlNodePtr node)455 void StripReachabilityAttributes(xmlNodePtr node) {
456 const auto node_name = GetName(node);
457
458 if (node_name == "abi-corpus-group" || node_name == "abi-corpus") {
459 UnsetAttribute(node, "tracking-non-reachable-types");
460 } else if (Contains(kNamedTypes, node_name)) {
461 UnsetAttribute(node, "is-non-reachable");
462 }
463
464 for (auto* child = Child(node); child; child = Next(child)) {
465 StripReachabilityAttributes(child);
466 }
467 }
468
469 // Fix bad DWARF -> ELF links caused by size zero symbol confusion.
470 //
471 // libabigail used to be confused by these sorts of symbols, resulting in
472 // declarations pointing at the wrong ELF symbols:
473 //
474 // 573623: ffffffc0122383c0 256 OBJECT GLOBAL DEFAULT 33 vm_node_stat
475 // 573960: ffffffc0122383c0 0 OBJECT GLOBAL DEFAULT 33 vm_numa_stat
FixBadDwarfElfLinks(xmlNodePtr root)476 void FixBadDwarfElfLinks(xmlNodePtr root) {
477 std::unordered_map<std::string, size_t> elf_links;
478
479 // See which ELF symbol IDs have multiple declarations.
480 const std::function<void(xmlNodePtr)> count = [&](xmlNodePtr node) {
481 if (GetName(node) == "var-decl") {
482 const auto symbol_id = GetAttribute(node, "elf-symbol-id");
483 if (symbol_id) {
484 ++elf_links[symbol_id.value()];
485 }
486 }
487
488 for (auto* child = Child(node); child; child = Next(child)) {
489 count(child);
490 }
491 };
492 count(root);
493
494 // Fix up likely bad links from DWARF declaration to ELF symbol.
495 const std::function<void(xmlNodePtr)> fix = [&](xmlNodePtr node) {
496 if (GetName(node) == "var-decl") {
497 const auto name = GetAttributeOrDie(node, "name");
498 const auto mangled_name = GetAttribute(node, "mangled-name");
499 const auto symbol_id = GetAttribute(node, "elf-symbol-id");
500 if (mangled_name && symbol_id && name != symbol_id.value()
501 && elf_links[symbol_id.value()] > 1) {
502 if (mangled_name.value() == name) {
503 Warn() << "fixing up ELF symbol for '" << name
504 << "' (was '" << symbol_id.value() << "')";
505 SetAttribute(node, "elf-symbol-id", name);
506 } else if (mangled_name.value() == symbol_id.value()) {
507 Warn() << "fixing up mangled name and ELF symbol for '" << name
508 << "' (was '" << symbol_id.value() << "')";
509 SetAttribute(node, "mangled-name", name);
510 SetAttribute(node, "elf-symbol-id", name);
511 }
512 }
513 }
514
515 for (auto* child = Child(node); child; child = Next(child)) {
516 fix(child);
517 }
518 };
519 fix(root);
520 }
521
522 // Tidy anonymous types in various ways.
523 //
524 // 1. Normalise anonymous type names by dropping the name attribute.
525 //
526 // Anonymous type names take the form __anonymous_foo__N where foo is one of
527 // enum, struct or union and N is an optional numerical suffix. We don't care
528 // about these names but they may cause trouble when comparing elements.
529 //
530 // 2. Reanonymise anonymous types that have been given names.
531 //
532 // At some point abidw changed its behaviour given an anonymous type with a
533 // naming typedef. In addition to linking the typedef and type in both
534 // directions, the code now gives (some) anonymous types the same name as the
535 // typedef. This misrepresents the original types.
536 //
537 // Such types should be anonymous. We set is-anonymous and drop the name.
538 //
539 // 3. Discard naming typedef backlinks.
540 //
541 // The attribute naming-typedef-id is a backwards link from an anonymous type to
542 // the typedef that refers to it.
543 //
544 // We don't care about these attributes and they may cause comparison issues.
TidyAnonymousTypes(xmlNodePtr node)545 void TidyAnonymousTypes(xmlNodePtr node) {
546 if (Contains(kNamedTypes, GetName(node))) {
547 const bool is_anon = ReadAttribute<bool>(node, "is-anonymous", false);
548 const auto naming_attribute = GetAttribute(node, "naming-typedef-id");
549 if (is_anon) {
550 UnsetAttribute(node, "name");
551 } else if (naming_attribute) {
552 SetAttribute(node, "is-anonymous", "yes");
553 UnsetAttribute(node, "name");
554 }
555 if (naming_attribute) {
556 UnsetAttribute(node, "naming-typedef-id");
557 }
558 }
559
560 for (auto* child = Child(node); child; child = Next(child)) {
561 TidyAnonymousTypes(child);
562 }
563 }
564
565 // Remove duplicate members.
RemoveDuplicateMembers(xmlNodePtr root)566 void RemoveDuplicateMembers(xmlNodePtr root) {
567 std::vector<xmlNodePtr> types;
568
569 // find all structs and unions
570 std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
571 const auto node_name = GetName(node);
572 // preorder in case we delete a nested element
573 for (auto* child = Child(node); child; child = Next(child)) {
574 dfs(child);
575 }
576 if (node_name == "class-decl" || node_name == "union-decl") {
577 types.push_back(node);
578 }
579 };
580 dfs(root);
581
582 for (const auto& node : types) {
583 // partition members by node name
584 std::map<std::string_view, std::vector<xmlNodePtr>> member_map;
585 for (auto* child = Child(node); child; child = Next(child)) {
586 member_map[GetName(child)].push_back(child);
587 }
588 // for each kind of member...
589 for (auto& [name, members] : member_map) {
590 // ... remove identical duplicate members - O(n^2)
591 for (size_t i = 0; i < members.size(); ++i) {
592 xmlNodePtr& i_node = members[i];
593 bool duplicate = false;
594 for (size_t j = 0; j < i; ++j) {
595 const xmlNodePtr& j_node = members[j];
596 if (j_node != nullptr && EqualTree(i_node, j_node)) {
597 duplicate = true;
598 break;
599 }
600 }
601 if (duplicate) {
602 RemoveNode(i_node);
603 i_node = nullptr;
604 }
605 }
606 }
607 }
608 }
609
610 // Eliminate non-conflicting / report conflicting duplicate definitions.
611 //
612 // XML elements representing types are sometimes emitted multiple times,
613 // identically. Also, member typedefs are sometimes emitted separately from
614 // their types, resulting in duplicate XML fragments.
615 //
616 // Both these issues can be resolved by first detecting duplicate occurrences of
617 // a given type id and then checking to see if there's an instance that subsumes
618 // the others, which can then be eliminated.
619 //
620 // This function eliminates exact type duplicates and duplicates where there is
621 // at least one maximal definition. It can report the remaining duplicate
622 // definitions.
623 //
624 // If a type has duplicate definitions in multiple namespace scopes or
625 // definitions with different effective names, these are considered to be
626 // *conflicting* duplicate definitions. TODO: update text
HandleDuplicateTypes(xmlNodePtr root)627 void HandleDuplicateTypes(xmlNodePtr root) {
628 // Convenience typedef referring to a namespace scope.
629 using namespace_scope = std::vector<std::string>;
630 // map of type-id to pair of set of namespace scopes and vector of
631 // xmlNodes
632 std::unordered_map<
633 std::string,
634 std::pair<
635 std::set<namespace_scope>,
636 std::vector<xmlNodePtr>>> types;
637 namespace_scope namespaces;
638
639 // find all type occurrences
640 std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
641 const auto node_name = GetName(node);
642 std::optional<std::string> namespace_name;
643 if (node_name == "namespace-decl") {
644 namespace_name = GetAttribute(node, "name");
645 }
646 if (namespace_name) {
647 namespaces.push_back(namespace_name.value());
648 }
649 if (node_name == "abi-corpus-group"
650 || node_name == "abi-corpus"
651 || node_name == "abi-instr"
652 || namespace_name) {
653 for (auto* child = Child(node); child; child = Next(child)) {
654 dfs(child);
655 }
656 } else {
657 const auto id = GetAttribute(node, "id");
658 if (id) {
659 auto& info = types[id.value()];
660 info.first.insert(namespaces);
661 info.second.push_back(node);
662 }
663 }
664 if (namespace_name) {
665 namespaces.pop_back();
666 }
667 };
668 dfs(root);
669
670 for (const auto& [id, scopes_and_definitions] : types) {
671 const auto& [scopes, definitions] = scopes_and_definitions;
672
673 if (scopes.size() > 1) {
674 Warn() << "conflicting scopes found for type '" << id << '\'';
675 continue;
676 }
677
678 const auto possible_maximal = MaximalTree(definitions);
679 if (possible_maximal) {
680 // Remove all but the maximal definition.
681 const size_t maximal = possible_maximal.value();
682 for (size_t ix = 0; ix < definitions.size(); ++ix) {
683 if (ix != maximal) {
684 RemoveNode(definitions[ix]);
685 }
686 }
687 continue;
688 }
689
690 // As a rare alternative, check for a stray anonymous member that has been
691 // separated from the main definition.
692 size_t strays = 0;
693 std::optional<size_t> stray;
694 std::optional<size_t> non_stray;
695 for (size_t ix = 0; ix < definitions.size(); ++ix) {
696 auto node = definitions[ix];
697 auto member = Child(node);
698 if (member && !Next(member) && GetName(member) == "data-member") {
699 auto decl = Child(member);
700 if (decl && !Next(decl) && GetName(decl) == "var-decl") {
701 auto name = GetAttribute(decl, "name");
702 if (name && name.value().empty()) {
703 ++strays;
704 stray = ix;
705 continue;
706 }
707 }
708 }
709 non_stray = ix;
710 }
711 if (strays + 1 == definitions.size() && stray.has_value()
712 && non_stray.has_value()) {
713 const auto stray_index = stray.value();
714 const auto non_stray_index = non_stray.value();
715 bool good = true;
716 for (size_t ix = 0; ix < definitions.size(); ++ix) {
717 if (ix == stray_index || ix == non_stray_index) {
718 continue;
719 }
720 if (EqualTree(definitions[stray_index], definitions[ix])) {
721 // it doesn't hurt if we remove exact duplicates and then fail
722 RemoveNode(definitions[ix]);
723 } else {
724 good = false;
725 break;
726 }
727 }
728 if (good) {
729 MoveNode(Child(definitions[stray_index]), definitions[non_stray_index]);
730 RemoveNode(definitions[stray_index]);
731 continue;
732 }
733 }
734
735 Warn() << "unresolvable duplicate definitions found for type '" << id
736 << '\'';
737 }
738 }
739
740 } // namespace
741
742 // Remove XML nodes and attributes that are neither used or wanted.
Clean(xmlNodePtr root)743 void Clean(xmlNodePtr root) {
744 // Strip non-element nodes to simplify other operations.
745 StripNonElements(root);
746
747 // Strip location information.
748 StripLocationInfo(root);
749
750 // Strip access.
751 StripAccess(root);
752
753 // Strip reachability attributes.
754 StripReachabilityAttributes(root);
755 }
756
757 namespace {
758
759 // Transform XML elements to improve their semantics.
Tidy(xmlNodePtr root)760 void Tidy(xmlNodePtr root) {
761 // Fix bad ELF symbol links
762 FixBadDwarfElfLinks(root);
763
764 // Normalise anonymous type names.
765 // Reanonymise anonymous types.
766 // Discard naming typedef backlinks.
767 TidyAnonymousTypes(root);
768
769 // Remove duplicate members.
770 RemoveDuplicateMembers(root);
771
772 // Eliminate complete duplicates and extra fragments of types.
773 // Report conflicting duplicate defintions.
774 // Record whether there are conflicting duplicate definitions.
775 HandleDuplicateTypes(root);
776 }
777
ParseLength(const std::string & value)778 std::optional<uint64_t> ParseLength(const std::string& value) {
779 if (value == "infinite" || value == "unknown") {
780 return {0};
781 }
782 return Parse<uint64_t>(value);
783 }
784
ParseReferenceKind(const std::string & value)785 std::optional<PointerReference::Kind> ParseReferenceKind(
786 const std::string& value) {
787 if (value == "lvalue") {
788 return {PointerReference::Kind::LVALUE_REFERENCE};
789 } else if (value == "rvalue") {
790 return {PointerReference::Kind::RVALUE_REFERENCE};
791 }
792 return {};
793 }
794
795 // Parser for libabigail's ABI XML format, creating a Symbol-Type Graph.
796 //
797 // On construction Abigail consumes a libxml node tree and builds a graph.
798 //
799 // Note that the core parser sees a "clean and tidy" XML document due to
800 // preprocessing that simplifies the XML and resolves several issues. One
801 // notable exception is that duplicate nodes may still remain.
802 //
803 // The main producer of ABI XML is abidw. The format has no formal specification
804 // and has very limited semantic versioning. This parser makes no attempt to
805 // support or correct for deficiencies in older versions of the format.
806 //
807 // The parser detects and will abort on the presence of unexpected elements.
808 //
809 // The parser ignores attributes it doesn't care about, including member access
810 // specifiers and (meaningless) type ids on array dimensions.
811 //
812 // The STG IR and libabigail ABI XML models diverge in some ways. The parser has
813 // to do extra work for each of these, as follows.
814 //
815 // 0. XML uses type and symbol ids to link together elements. These become edges
816 // in the graph between symbols and types and between types and types. Dangling
817 // type references will cause an abort. libabigail is much more relaxed about
818 // symbols without type information and these are modelled as such.
819 //
820 // 1. XML function declarations have in-line types. The parser creates
821 // free-standing types on-the-fly. A useful space optimisation might be to
822 // prevent duplicate creation of such types.
823 //
824 // 2. Variadic parameters are currently flagged with an XML attribute. A
825 // variadic type node is created on demand and will be shared by all such
826 // paramerters.
827 //
828 // 3. XML symbols and aliases have a rather poor repesentation with aliases
829 // represented as comma-separated attribute values. Aliases are resolved in a
830 // post-processing phase.
831 //
832 // 4. XML anonymous types may also have names, these are ignored.
833 class Abigail {
834 public:
835 explicit Abigail(Graph& graph);
836 Id ProcessRoot(xmlNodePtr root);
837
838 private:
839 struct SymbolInfo {
840 std::string name;
841 std::optional<ElfSymbol::VersionInfo> version_info;
842 xmlNodePtr node;
843 };
844
845 // Map from libabigail type ids to STG node ids; except for the type of
846 // variadic parameters.
847 Maker<std::string> maker_;
848 // The STG IR uses a distinct node type for the variadic parameter type; if
849 // allocated, this is its STG node id.
850 std::optional<Id> variadic_;
851
852 // symbol id to symbol information
853 std::unordered_map<std::string, SymbolInfo> symbol_info_map_;
854 // alias symbol id to main symbol id
855 std::unordered_map<std::string, std::string> alias_to_main_;
856 // libabigail decorates certain declarations with symbol ids; this is the
857 // mapping from symbol id to the corresponding type and full name.
858 std::unordered_map<std::string, std::pair<Id, std::string>>
859 symbol_id_and_full_name_;
860
861 // Full name of the current scope.
862 Scope scope_;
863
864 Id GetEdge(xmlNodePtr element);
865 Id GetVariadic();
866 Function MakeFunctionType(xmlNodePtr function);
867
868 void ProcessCorpusGroup(xmlNodePtr group);
869 void ProcessCorpus(xmlNodePtr corpus);
870 void ProcessSymbols(xmlNodePtr symbols);
871 void ProcessSymbol(xmlNodePtr symbol);
872
873 bool ProcessUserDefinedType(std::string_view name, const std::string& id,
874 xmlNodePtr decl);
875 void ProcessScope(xmlNodePtr scope);
876
877 void ProcessInstr(xmlNodePtr instr);
878 void ProcessNamespace(xmlNodePtr scope);
879
880 Id ProcessDecl(bool is_variable, xmlNodePtr decl);
881
882 void ProcessFunctionType(const std::string& id, xmlNodePtr function);
883 void ProcessTypedef(const std::string& id, xmlNodePtr type_definition);
884 void ProcessPointer(const std::string& id, bool is_pointer,
885 xmlNodePtr pointer);
886 void ProcessQualified(const std::string& id, xmlNodePtr qualified);
887 void ProcessArray(const std::string& id, xmlNodePtr array);
888 void ProcessTypeDecl(const std::string& id, xmlNodePtr type_decl);
889 void ProcessStructUnion(const std::string& id, bool is_struct,
890 xmlNodePtr struct_union);
891 void ProcessEnum(const std::string& id, xmlNodePtr enumeration);
892
893 Id ProcessBaseClass(xmlNodePtr base_class);
894 std::optional<Id> ProcessDataMember(bool is_struct, xmlNodePtr data_member);
895 void ProcessMemberFunction(std::vector<Id>& methods, xmlNodePtr method);
896 void ProcessMemberType(xmlNodePtr member_type);
897
898 Id BuildSymbol(const SymbolInfo& info,
899 std::optional<Id> type_id,
900 const std::optional<std::string>& name);
901 Id BuildSymbols();
902 };
903
Abigail(Graph & graph)904 Abigail::Abigail(Graph& graph) : maker_(graph) {}
905
GetEdge(xmlNodePtr element)906 Id Abigail::GetEdge(xmlNodePtr element) {
907 return maker_.Get(GetAttributeOrDie(element, "type-id"));
908 }
909
GetVariadic()910 Id Abigail::GetVariadic() {
911 if (!variadic_) {
912 variadic_ = {maker_.Add<Special>(Special::Kind::VARIADIC)};
913 }
914 return *variadic_;
915 }
916
MakeFunctionType(xmlNodePtr function)917 Function Abigail::MakeFunctionType(xmlNodePtr function) {
918 std::vector<Id> parameters;
919 std::optional<Id> return_type;
920 for (auto* child = Child(function); child; child = Next(child)) {
921 const auto child_name = GetName(child);
922 if (return_type) {
923 Die() << "unexpected element after return-type";
924 }
925 if (child_name == "parameter") {
926 const auto is_variadic = ReadAttribute<bool>(child, "is-variadic", false);
927 parameters.push_back(is_variadic ? GetVariadic() : GetEdge(child));
928 } else if (child_name == "return") {
929 return_type = {GetEdge(child)};
930 } else {
931 Die() << "unrecognised " << GetName(function)
932 << " child element '" << child_name << "'";
933 }
934 }
935 if (!return_type) {
936 Die() << "missing return-type";
937 }
938 return {*return_type, parameters};
939 }
940
ProcessRoot(xmlNodePtr root)941 Id Abigail::ProcessRoot(xmlNodePtr root) {
942 Clean(root);
943 Tidy(root);
944 const auto name = GetName(root);
945 if (name == "abi-corpus-group") {
946 ProcessCorpusGroup(root);
947 } else if (name == "abi-corpus") {
948 ProcessCorpus(root);
949 } else {
950 Die() << "unrecognised root element '" << name << "'";
951 }
952 return BuildSymbols();
953 }
954
ProcessCorpusGroup(xmlNodePtr group)955 void Abigail::ProcessCorpusGroup(xmlNodePtr group) {
956 for (auto* corpus = Child(group); corpus; corpus = Next(corpus)) {
957 CheckName("abi-corpus", corpus);
958 ProcessCorpus(corpus);
959 }
960 }
961
ProcessCorpus(xmlNodePtr corpus)962 void Abigail::ProcessCorpus(xmlNodePtr corpus) {
963 for (auto* element = Child(corpus); element; element = Next(element)) {
964 const auto name = GetName(element);
965 if (name == "elf-function-symbols" || name == "elf-variable-symbols") {
966 ProcessSymbols(element);
967 } else if (name == "elf-needed") {
968 // ignore this
969 } else if (name == "abi-instr") {
970 ProcessInstr(element);
971 } else {
972 Die() << "unrecognised abi-corpus child element '" << name << "'";
973 }
974 }
975 }
976
ProcessSymbols(xmlNodePtr symbols)977 void Abigail::ProcessSymbols(xmlNodePtr symbols) {
978 for (auto* element = Child(symbols); element; element = Next(element)) {
979 CheckName("elf-symbol", element);
980 ProcessSymbol(element);
981 }
982 }
983
ProcessSymbol(xmlNodePtr symbol)984 void Abigail::ProcessSymbol(xmlNodePtr symbol) {
985 // Symbol processing is done in two parts. In this first part, we parse just
986 // enough XML attributes to generate a symbol id and determine any aliases.
987 // Symbol ids in this format can be found in elf-symbol alias attributes and
988 // in {var,function}-decl elf-symbol-id attributes.
989 const auto name = GetAttributeOrDie(symbol, "name");
990 const auto version =
991 ReadAttribute<std::string>(symbol, "version", std::string());
992 const bool is_default_version =
993 ReadAttribute<bool>(symbol, "is-default-version", false);
994 const auto alias = GetAttribute(symbol, "alias");
995
996 std::string elf_symbol_id = name;
997 std::optional<ElfSymbol::VersionInfo> version_info;
998 if (!version.empty()) {
999 version_info = ElfSymbol::VersionInfo{is_default_version, version};
1000 elf_symbol_id += VersionInfoToString(*version_info);
1001 }
1002
1003 Check(symbol_info_map_
1004 .emplace(elf_symbol_id, SymbolInfo{name, version_info, symbol})
1005 .second)
1006 << "multiple symbols with id " << elf_symbol_id;
1007
1008 if (alias) {
1009 std::istringstream is(*alias);
1010 std::string item;
1011 while (std::getline(is, item, ',')) {
1012 Check(alias_to_main_.insert({item, elf_symbol_id}).second)
1013 << "multiple aliases with id " << elf_symbol_id;
1014 }
1015 }
1016 }
1017
ProcessUserDefinedType(std::string_view name,const std::string & id,xmlNodePtr decl)1018 bool Abigail::ProcessUserDefinedType(
1019 std::string_view name, const std::string& id, xmlNodePtr decl) {
1020 if (name == "typedef-decl") {
1021 ProcessTypedef(id, decl);
1022 } else if (name == "class-decl") {
1023 ProcessStructUnion(id, true, decl);
1024 } else if (name == "union-decl") {
1025 ProcessStructUnion(id, false, decl);
1026 } else if (name == "enum-decl") {
1027 ProcessEnum(id, decl);
1028 } else {
1029 return false;
1030 }
1031 return true;
1032 }
1033
ProcessScope(xmlNodePtr scope)1034 void Abigail::ProcessScope(xmlNodePtr scope) {
1035 for (auto* element = Child(scope); element; element = Next(element)) {
1036 const auto name = GetName(element);
1037 const auto maybe_id = GetAttribute(element, "id");
1038 // all type elements have "id", all non-types do not
1039 if (maybe_id) {
1040 const auto& id = *maybe_id;
1041 if (name == "function-type") {
1042 ProcessFunctionType(id, element);
1043 } else if (name == "pointer-type-def") {
1044 ProcessPointer(id, true, element);
1045 } else if (name == "reference-type-def") {
1046 ProcessPointer(id, false, element);
1047 } else if (name == "qualified-type-def") {
1048 ProcessQualified(id, element);
1049 } else if (name == "array-type-def") {
1050 ProcessArray(id, element);
1051 } else if (name == "type-decl") {
1052 ProcessTypeDecl(id, element);
1053 } else if (!ProcessUserDefinedType(name, id, element)) {
1054 Die() << "bad abi-instr type child element '" << name << "'";
1055 }
1056 } else {
1057 if (name == "var-decl") {
1058 ProcessDecl(true, element);
1059 } else if (name == "function-decl") {
1060 ProcessDecl(false, element);
1061 } else if (name == "namespace-decl") {
1062 ProcessNamespace(element);
1063 } else {
1064 Die() << "bad abi-instr non-type child element '" << name << "'";
1065 }
1066 }
1067 }
1068 }
1069
ProcessInstr(xmlNodePtr instr)1070 void Abigail::ProcessInstr(xmlNodePtr instr) {
1071 ProcessScope(instr);
1072 }
1073
ProcessNamespace(xmlNodePtr scope)1074 void Abigail::ProcessNamespace(xmlNodePtr scope) {
1075 const auto name = GetAttributeOrDie(scope, "name");
1076 const PushScopeName push_scope_name(scope_, "namespace", name);
1077 ProcessScope(scope);
1078 }
1079
ProcessDecl(bool is_variable,xmlNodePtr decl)1080 Id Abigail::ProcessDecl(bool is_variable, xmlNodePtr decl) {
1081 const auto name = scope_.name + GetAttributeOrDie(decl, "name");
1082 const auto symbol_id = GetAttribute(decl, "elf-symbol-id");
1083 const auto type = is_variable ? GetEdge(decl)
1084 : maker_.Add<Function>(MakeFunctionType(decl));
1085 if (symbol_id) {
1086 // There's a link to an ELF symbol.
1087 const auto [it, inserted] = symbol_id_and_full_name_.emplace(
1088 *symbol_id, std::make_pair(type, name));
1089 if (!inserted) {
1090 Die() << "duplicate type for '" << *symbol_id << "'";
1091 }
1092 }
1093 return type;
1094 }
1095
ProcessFunctionType(const std::string & id,xmlNodePtr function)1096 void Abigail::ProcessFunctionType(const std::string& id, xmlNodePtr function) {
1097 maker_.MaybeSet<Function>(id, MakeFunctionType(function));
1098 }
1099
ProcessTypedef(const std::string & id,xmlNodePtr type_definition)1100 void Abigail::ProcessTypedef(const std::string& id,
1101 xmlNodePtr type_definition) {
1102 const auto name = scope_.name + GetAttributeOrDie(type_definition, "name");
1103 const auto type = GetEdge(type_definition);
1104 maker_.MaybeSet<Typedef>(id, name, type);
1105 }
1106
ProcessPointer(const std::string & id,bool is_pointer,xmlNodePtr pointer)1107 void Abigail::ProcessPointer(const std::string& id, bool is_pointer,
1108 xmlNodePtr pointer) {
1109 const auto type = GetEdge(pointer);
1110 const auto kind = is_pointer ? PointerReference::Kind::POINTER
1111 : ReadAttribute<PointerReference::Kind>(
1112 pointer, "kind", &ParseReferenceKind);
1113 maker_.MaybeSet<PointerReference>(id, kind, type);
1114 }
1115
ProcessQualified(const std::string & id,xmlNodePtr qualified)1116 void Abigail::ProcessQualified(const std::string& id, xmlNodePtr qualified) {
1117 std::vector<Qualifier> qualifiers;
1118 // Do these in reverse order so we get CVR ordering.
1119 if (ReadAttribute<bool>(qualified, "restrict", false)) {
1120 qualifiers.push_back(Qualifier::RESTRICT);
1121 }
1122 if (ReadAttribute<bool>(qualified, "volatile", false)) {
1123 qualifiers.push_back(Qualifier::VOLATILE);
1124 }
1125 if (ReadAttribute<bool>(qualified, "const", false)) {
1126 qualifiers.push_back(Qualifier::CONST);
1127 }
1128 Check(!qualifiers.empty()) << "qualified-type-def has no qualifiers";
1129 // Handle multiple qualifiers by unconditionally adding as new nodes all but
1130 // the last qualifier which is set into place.
1131 auto type = GetEdge(qualified);
1132 auto count = qualifiers.size();
1133 for (auto qualifier : qualifiers) {
1134 --count;
1135 const Qualified node(qualifier, type);
1136 if (count) {
1137 type = maker_.Add<Qualified>(node);
1138 } else {
1139 maker_.MaybeSet<Qualified>(id, node);
1140 }
1141 }
1142 }
1143
ProcessArray(const std::string & id,xmlNodePtr array)1144 void Abigail::ProcessArray(const std::string& id, xmlNodePtr array) {
1145 std::vector<size_t> dimensions;
1146 for (auto* child = Child(array); child; child = Next(child)) {
1147 CheckName("subrange", child);
1148 const auto length = ReadAttribute<uint64_t>(child, "length", &ParseLength);
1149 dimensions.push_back(length);
1150 }
1151 Check(!dimensions.empty()) << "array-type-def element has no children";
1152 // int[M][N] means array[M] of array[N] of int
1153 //
1154 // We need to chain a bunch of types together:
1155 //
1156 // id = array[n] of id = ... = array[n] of id
1157 //
1158 // where the first id is the new type in slot ix
1159 // and the last id is the old type in slot type
1160 //
1161 // Use the same approach as for qualifiers.
1162 auto type = GetEdge(array);
1163 auto count = dimensions.size();
1164 for (auto it = dimensions.crbegin(); it != dimensions.crend(); ++it) {
1165 --count;
1166 const auto size = *it;
1167 const Array node(size, type);
1168 if (count) {
1169 type = maker_.Add<Array>(node);
1170 } else {
1171 maker_.MaybeSet<Array>(id, node);
1172 }
1173 }
1174 }
1175
ProcessTypeDecl(const std::string & id,xmlNodePtr type_decl)1176 void Abigail::ProcessTypeDecl(const std::string& id, xmlNodePtr type_decl) {
1177 const auto name = scope_.name + GetAttributeOrDie(type_decl, "name");
1178 const auto bits = ReadAttribute<size_t>(type_decl, "size-in-bits", 0);
1179 if (bits % 8) {
1180 Die() << "size-in-bits is not a multiple of 8";
1181 }
1182 const auto bytes = bits / 8;
1183
1184 if (name == "void") {
1185 maker_.MaybeSet<Special>(id, Special::Kind::VOID);
1186 } else {
1187 // libabigail doesn't model encoding at all and we don't want to parse names
1188 // (which will not always work) in an attempt to reconstruct it.
1189 maker_.MaybeSet<Primitive>(id, name, /* encoding= */ std::nullopt, bytes);
1190 }
1191 }
1192
ProcessStructUnion(const std::string & id,bool is_struct,xmlNodePtr struct_union)1193 void Abigail::ProcessStructUnion(const std::string& id, bool is_struct,
1194 xmlNodePtr struct_union) {
1195 // Libabigail sometimes reports is-declaration-only but still provides some
1196 // child elements. So we check both things.
1197 const bool forward =
1198 ReadAttribute<bool>(struct_union, "is-declaration-only", false)
1199 && Child(struct_union) == nullptr;
1200 const auto kind = is_struct
1201 ? StructUnion::Kind::STRUCT
1202 : StructUnion::Kind::UNION;
1203 const bool is_anonymous =
1204 ReadAttribute<bool>(struct_union, "is-anonymous", false);
1205 const auto name =
1206 is_anonymous ? std::string() : GetAttributeOrDie(struct_union, "name");
1207 const auto full_name =
1208 is_anonymous ? std::string() : scope_.name + name;
1209 const PushScopeName push_scope_name(scope_, kind, name);
1210 if (forward) {
1211 maker_.MaybeSet<StructUnion>(id, kind, full_name);
1212 return;
1213 }
1214 const auto bits = ReadAttribute<size_t>(struct_union, "size-in-bits", 0);
1215 const auto bytes = (bits + 7) / 8;
1216
1217 std::vector<Id> base_classes;
1218 std::vector<Id> methods;
1219 std::vector<Id> members;
1220 for (auto* child = Child(struct_union); child; child = Next(child)) {
1221 const auto child_name = GetName(child);
1222 if (child_name == "data-member") {
1223 if (const auto member = ProcessDataMember(is_struct, child)) {
1224 members.push_back(*member);
1225 }
1226 } else if (child_name == "member-type") {
1227 ProcessMemberType(child);
1228 } else if (child_name == "base-class") {
1229 base_classes.push_back(ProcessBaseClass(child));
1230 } else if (child_name == "member-function") {
1231 ProcessMemberFunction(methods, child);
1232 } else {
1233 Die() << "unrecognised " << kind << "-decl child element '" << child_name
1234 << "'";
1235 }
1236 }
1237
1238 maker_.MaybeSet<StructUnion>(id, kind, full_name, bytes, base_classes,
1239 methods, members);
1240 }
1241
ProcessEnum(const std::string & id,xmlNodePtr enumeration)1242 void Abigail::ProcessEnum(const std::string& id, xmlNodePtr enumeration) {
1243 const bool forward =
1244 ReadAttribute<bool>(enumeration, "is-declaration-only", false);
1245 const auto name = ReadAttribute<bool>(enumeration, "is-anonymous", false)
1246 ? std::string()
1247 : scope_.name + GetAttributeOrDie(enumeration, "name");
1248 if (forward) {
1249 maker_.MaybeSet<Enumeration>(id, name);
1250 return;
1251 }
1252
1253 const xmlNodePtr underlying = Child(enumeration);
1254 Check(underlying != nullptr) << "enum-decl has no child elements";
1255 CheckName("underlying-type", underlying);
1256 const auto type = GetEdge(underlying);
1257
1258 std::vector<std::pair<std::string, int64_t>> enumerators;
1259 for (auto* enumerator = Next(underlying); enumerator;
1260 enumerator = Next(enumerator)) {
1261 CheckName("enumerator", enumerator);
1262 const auto enumerator_name = GetAttributeOrDie(enumerator, "name");
1263 // libabigail currently supports anything that fits in an int64_t
1264 const auto enumerator_value =
1265 ReadAttributeOrDie<int64_t>(enumerator, "value");
1266 enumerators.emplace_back(enumerator_name, enumerator_value);
1267 }
1268
1269 maker_.MaybeSet<Enumeration>(id, name, type, enumerators);
1270 }
1271
ProcessBaseClass(xmlNodePtr base_class)1272 Id Abigail::ProcessBaseClass(xmlNodePtr base_class) {
1273 const auto& type = GetEdge(base_class);
1274 const auto offset =
1275 ReadAttributeOrDie<size_t>(base_class, "layout-offset-in-bits");
1276 const auto inheritance = ReadAttribute<bool>(base_class, "is-virtual", false)
1277 ? BaseClass::Inheritance::VIRTUAL
1278 : BaseClass::Inheritance::NON_VIRTUAL;
1279 return maker_.Add<BaseClass>(type, offset, inheritance);
1280 }
1281
ProcessDataMember(bool is_struct,xmlNodePtr data_member)1282 std::optional<Id> Abigail::ProcessDataMember(bool is_struct,
1283 xmlNodePtr data_member) {
1284 const xmlNodePtr decl = GetOnlyChild(data_member);
1285 CheckName("var-decl", decl);
1286 if (ReadAttribute<bool>(data_member, "static", false)) {
1287 ProcessDecl(true, decl);
1288 return {};
1289 }
1290
1291 const auto offset = is_struct
1292 ? ReadAttributeOrDie<size_t>(data_member,
1293 "layout-offset-in-bits")
1294 : 0;
1295 const auto name = GetAttributeOrDie(decl, "name");
1296 const auto type = GetEdge(decl);
1297
1298 // Note: libabigail does not model member size, yet
1299 return {maker_.Add<Member>(name, type, offset, 0)};
1300 }
1301
ProcessMemberFunction(std::vector<Id> & methods,xmlNodePtr method)1302 void Abigail::ProcessMemberFunction(std::vector<Id>& methods,
1303 xmlNodePtr method) {
1304 const xmlNodePtr decl = GetOnlyChild(method);
1305 CheckName("function-decl", decl);
1306 // ProcessDecl creates symbol references so must be called unconditionally.
1307 const auto type = ProcessDecl(false, decl);
1308 const auto vtable_offset = ReadAttribute<uint64_t>(method, "vtable-offset");
1309 if (vtable_offset) {
1310 static const std::string missing = "{missing}";
1311 const auto mangled_name = ReadAttribute(decl, "mangled-name", missing);
1312 const auto name = GetAttributeOrDie(decl, "name");
1313 methods.push_back(
1314 maker_.Add<Method>(mangled_name, name, vtable_offset.value(), type));
1315 }
1316 }
1317
ProcessMemberType(xmlNodePtr member_type)1318 void Abigail::ProcessMemberType(xmlNodePtr member_type) {
1319 const xmlNodePtr decl = GetOnlyChild(member_type);
1320 const auto id = GetAttributeOrDie(decl, "id");
1321 const auto name = GetName(decl);
1322 if (!ProcessUserDefinedType(name, id, decl)) {
1323 Die() << "unrecognised member-type child element '" << name << "'";
1324 }
1325 }
1326
BuildSymbol(const SymbolInfo & info,std::optional<Id> type_id,const std::optional<std::string> & name)1327 Id Abigail::BuildSymbol(const SymbolInfo& info,
1328 std::optional<Id> type_id,
1329 const std::optional<std::string>& name) {
1330 const xmlNodePtr symbol = info.node;
1331 const bool is_defined = ReadAttributeOrDie<bool>(symbol, "is-defined");
1332 const auto crc = ReadAttribute<ElfSymbol::CRC>(symbol, "crc");
1333 const auto ns = ReadAttribute<std::string>(symbol, "namespace");
1334 const auto type = ReadAttributeOrDie<ElfSymbol::SymbolType>(symbol, "type");
1335 const auto binding =
1336 ReadAttributeOrDie<ElfSymbol::Binding>(symbol, "binding");
1337 const auto visibility =
1338 ReadAttributeOrDie<ElfSymbol::Visibility>(symbol, "visibility");
1339
1340 return maker_.Add<ElfSymbol>(
1341 info.name, info.version_info,
1342 is_defined, type, binding, visibility, crc, ns, type_id, name);
1343 }
1344
BuildSymbols()1345 Id Abigail::BuildSymbols() {
1346 // Libabigail's model is (approximately):
1347 //
1348 // (alias)* -> main symbol <- some decl -> type
1349 //
1350 // which we turn into:
1351 //
1352 // symbol / alias -> type
1353 //
1354 for (const auto& [alias, main] : alias_to_main_) {
1355 Check(!alias_to_main_.contains(main))
1356 << "found main symbol and alias with id " << main;
1357 }
1358 // Build final symbol table, tying symbols to their types.
1359 std::map<std::string, Id> symbols;
1360 for (const auto& [id, symbol_info] : symbol_info_map_) {
1361 const auto main = alias_to_main_.find(id);
1362 const auto lookup = main != alias_to_main_.end() ? main->second : id;
1363 const auto type_id_and_name_it = symbol_id_and_full_name_.find(lookup);
1364 std::optional<Id> type_id;
1365 std::optional<std::string> name;
1366 if (type_id_and_name_it != symbol_id_and_full_name_.end()) {
1367 const auto& type_id_and_name = type_id_and_name_it->second;
1368 type_id = {type_id_and_name.first};
1369 name = {type_id_and_name.second};
1370 }
1371 symbols.insert({id, BuildSymbol(symbol_info, type_id, name)});
1372 }
1373 return maker_.Add<Interface>(symbols);
1374 }
1375
1376 using Parser = xmlDocPtr(xmlParserCtxtPtr context, const char* url,
1377 const char* encoding, int options);
1378
Parse(Runtime & runtime,const std::function<Parser> & parser)1379 Document Parse(Runtime& runtime, const std::function<Parser>& parser) {
1380 const std::unique_ptr<
1381 std::remove_pointer_t<xmlParserCtxtPtr>, void(*)(xmlParserCtxtPtr)>
1382 context(xmlNewParserCtxt(), xmlFreeParserCtxt);
1383 Document document(nullptr, xmlFreeDoc);
1384 {
1385 const Time t(runtime, "abigail.libxml_parse");
1386 document.reset(parser(context.get(), nullptr, nullptr, XML_PARSE_NONET));
1387 }
1388 Check(document != nullptr) << "failed to parse input as XML";
1389 return document;
1390 }
1391
1392 } // namespace
1393
ProcessDocument(Graph & graph,xmlDocPtr document)1394 Id ProcessDocument(Graph& graph, xmlDocPtr document) {
1395 xmlNodePtr root = xmlDocGetRootElement(document);
1396 Check(root != nullptr) << "XML document has no root element";
1397 const Id id = Abigail(graph).ProcessRoot(root);
1398 return RemoveUselessQualifiers(graph, id);
1399 }
1400
Read(Runtime & runtime,const std::string & path)1401 Document Read(Runtime& runtime, const std::string& path) {
1402 const FileDescriptor fd(path.c_str(), O_RDONLY);
1403 return Parse(runtime, [&](xmlParserCtxtPtr context, const char* url,
1404 const char* encoding, int options) {
1405 return xmlCtxtReadFd(context, fd.Value(), url, encoding, options);
1406 });
1407 }
1408
Read(Runtime & runtime,Graph & graph,const std::string & path)1409 Id Read(Runtime& runtime, Graph& graph, const std::string& path) {
1410 // Read the XML.
1411 const Document document = Read(runtime, path);
1412 // Process the XML.
1413 return ProcessDocument(graph, document.get());
1414 }
1415
ReadFromString(Runtime & runtime,Graph & graph,const std::string_view xml)1416 Id ReadFromString(Runtime& runtime, Graph& graph, const std::string_view xml) {
1417 // Read the XML.
1418 const Document document =
1419 Parse(runtime, [&](xmlParserCtxtPtr context, const char* url,
1420 const char* encoding, int options) {
1421 return xmlCtxtReadMemory(context, xml.data(), static_cast<int>(xml.size()),
1422 url, encoding, options);
1423 });
1424 // Process the XML.
1425 return ProcessDocument(graph, document.get());
1426 }
1427
1428 } // namespace abixml
1429 } // namespace stg
1430