xref: /aosp_15_r20/external/emboss/compiler/back_end/cpp/header_generator.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""C++ header code generator.
16
17Call generate_header(ir) to get the text of a C++ header file implementing View
18classes for the ir.
19"""
20
21import collections
22import pkgutil
23import re
24from typing import NamedTuple
25
26from compiler.back_end.cpp import attributes
27from compiler.back_end.util import code_template
28from compiler.util import attribute_util
29from compiler.util import error
30from compiler.util import ir_data
31from compiler.util import ir_data_utils
32from compiler.util import ir_util
33from compiler.util import name_conversion
34from compiler.util import resources
35from compiler.util import traverse_ir
36
37_TEMPLATES = code_template.parse_templates(resources.load(
38    "compiler.back_end.cpp", "generated_code_templates"))
39
40_CPP_RESERVED_WORDS = set((
41    # C keywords.  A few of these are not (yet) C++ keywords, but some compilers
42    # accept the superset of C and C++, so we still want to avoid them.
43    "asm", "auto", "break", "case", "char", "const", "continue", "default",
44    "do", "double", "else", "enum", "extern", "float", "for", "fortran", "goto",
45    "if", "inline", "int", "long", "register", "restrict", "return", "short",
46    "signed", "sizeof", "static", "struct", "switch", "typedef", "union",
47    "unsigned", "void", "volatile", "while", "_Alignas", "_Alignof", "_Atomic",
48    "_Bool", "_Complex", "_Generic", "_Imaginary", "_Noreturn", "_Pragma",
49    "_Static_assert", "_Thread_local",
50    # The following are not technically reserved words, but collisions are
51    # likely due to the standard macros.
52    "complex", "imaginary", "noreturn",
53    # C++ keywords that are not also C keywords.
54    "alignas", "alignof", "and", "and_eq", "asm", "bitand", "bitor", "bool",
55    "catch", "char16_t", "char32_t", "class", "compl", "concept", "constexpr",
56    "const_cast", "decltype", "delete", "dynamic_cast", "explicit", "export",
57    "false", "friend", "mutable", "namespace", "new", "noexcept", "not",
58    "not_eq", "nullptr", "operator", "or", "or_eq", "private", "protected",
59    "public", "reinterpret_cast", "requires", "static_assert", "static_cast",
60    "template", "this", "thread_local", "throw", "true", "try", "typeid",
61    "typename", "using", "virtual", "wchar_t", "xor", "xor_eq",
62    # "NULL" is not a keyword, but is still very likely to cause problems if
63    # used as a namespace name.
64    "NULL",
65))
66
67# The support namespace, as a C++ namespace prefix.  This namespace contains the
68# Emboss C++ support classes.
69_SUPPORT_NAMESPACE = "::emboss::support"
70
71# Regex matching a C++ namespace component. Captures component name.
72_NS_COMPONENT_RE = r"(?:^\s*|::)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*(?=\s*$|::)"
73# Regex matching a full C++ namespace (at least one namespace component).
74_NS_RE = fr"^\s*(?:{_NS_COMPONENT_RE})+\s*$"
75# Regex matching an empty C++ namespace.
76_NS_EMPTY_RE = r"^\s*$"
77# Regex matching only the global C++ namespace.
78_NS_GLOBAL_RE = r"^\s*::\s*$"
79
80# TODO(bolms): This should be a command-line flag.
81_PRELUDE_INCLUDE_FILE = "runtime/cpp/emboss_prelude.h"
82_ENUM_VIEW_INCLUDE_FILE = "runtime/cpp/emboss_enum_view.h"
83_TEXT_UTIL_INCLUDE_FILE = "runtime/cpp/emboss_text_util.h"
84
85# Cases allowed in the `enum_case` attribute.
86_SUPPORTED_ENUM_CASES = ("SHOUTY_CASE", "kCamelCase")
87
88# Verify that all supported enum cases have valid, implemented conversions.
89for _enum_case in _SUPPORTED_ENUM_CASES:
90  assert name_conversion.is_case_conversion_supported("SHOUTY_CASE", _enum_case)
91
92
93class Config(NamedTuple):
94  """Configuration for C++ header generation."""
95
96  include_enum_traits: bool = True
97  """Whether or not to include EnumTraits in the generated header."""
98
99
100def _get_namespace_components(namespace):
101  """Gets the components of a C++ namespace
102
103  Examples:
104    "::some::name::detail" -> ["some", "name", "detail"]
105    "product::name" -> ["product", "name"]
106    "simple" -> ["simple"]
107
108  Arguments:
109    namespace: A string containing the namespace. May be fully-qualified.
110
111  Returns:
112    A list of strings, one per namespace component."""
113  return re.findall(_NS_COMPONENT_RE, namespace)
114
115
116def _get_module_namespace(module):
117  """Returns the C++ namespace of the module, as a list of components.
118
119  Arguments:
120    module: The IR of an Emboss module whose namespace should be returned.
121
122  Returns:
123    A list of strings, one per namespace component.  This list can be formatted
124    as appropriate by the caller.
125  """
126  namespace_attr = ir_util.get_attribute(module.attribute, "namespace")
127  if namespace_attr and namespace_attr.string_constant.text:
128    namespace = namespace_attr.string_constant.text
129  else:
130    namespace = "emboss_generated_code"
131  return _get_namespace_components(namespace)
132
133
134def _cpp_string_escape(string):
135  return re.sub("['\"\\\\]", r"\\\0", string)
136
137
138def _get_includes(module, config: Config):
139  """Returns the appropriate #includes based on module's imports."""
140  includes = []
141  for import_ in module.foreign_import:
142    if import_.file_name.text:
143      includes.append(
144          code_template.format_template(
145              _TEMPLATES.include,
146              file_name=_cpp_string_escape(import_.file_name.text + ".h")))
147    else:
148      includes.append(
149          code_template.format_template(
150              _TEMPLATES.include,
151              file_name=_cpp_string_escape(_PRELUDE_INCLUDE_FILE)))
152      if config.include_enum_traits:
153        includes.extend(
154          [code_template.format_template(
155              _TEMPLATES.include,
156              file_name=_cpp_string_escape(file_name))
157            for file_name in (_ENUM_VIEW_INCLUDE_FILE, _TEXT_UTIL_INCLUDE_FILE)
158          ])
159  return "".join(includes)
160
161
162def _render_namespace_prefix(namespace):
163  """Returns namespace rendered as a prefix, like ::foo::bar::baz."""
164  return "".join(["::" + n for n in namespace])
165
166
167def _render_integer(value):
168  """Returns a C++ string representation of a constant integer."""
169  integer_type = _cpp_integer_type_for_range(value, value)
170  assert integer_type, ("Bug: value should never be outside [-2**63, 2**64), "
171                        "got {}.".format(value))
172  # C++ literals are always positive.  Negative constants are actually the
173  # positive literal with the unary `-` operator applied.
174  #
175  # This means that C++ compilers for 2s-complement systems get finicky about
176  # minimum integers: if you feed `-9223372036854775808` into GCC, with -Wall,
177  # you get:
178  #
179  #     warning: integer constant is so large that it is unsigned
180  #
181  # and Clang gives:
182  #
183  #     warning: integer literal is too large to be represented in a signed
184  #     integer type, interpreting as unsigned [-Wimplicitly-unsigned-literal]
185  #
186  # and MSVC:
187  #
188  #     warning C4146: unary minus operator applied to unsigned type, result
189  #     still unsigned
190  #
191  # So, workaround #1: -(2**63) must be written `(-9223372036854775807 - 1)`.
192  #
193  # The next problem is that MSVC (but not Clang or GCC) will pick `unsigned`
194  # as the type of a literal like `2147483648`.  As far as I can tell, this is a
195  # violation of the C++11 standard, but it's possible that the final standard
196  # has different rules.  (MSVC seems to treat decimal literals the way that the
197  # standard says octal and hexadecimal literals should be treated.)
198  #
199  # Luckily, workaround #2: we can unconditionally append `LL` to all constants
200  # to force them to be interpreted as `long long` (or `unsigned long long` for
201  # `ULL`-suffixed constants), and then use a narrowing cast to the appropriate
202  # type, without any warnings on any major compilers.
203  #
204  # TODO(bolms): This suffix computation is kind of a hack.
205  suffix = "U" if "uint" in integer_type else ""
206  if value == -(2**63):
207    return "static_cast</**/{0}>({1}LL - 1)".format(integer_type, -(2**63 - 1))
208  else:
209    return "static_cast</**/{0}>({1}{2}LL)".format(integer_type, value, suffix)
210
211
212def _maybe_type(wrapped_type):
213  return "::emboss::support::Maybe</**/{}>".format(wrapped_type)
214
215
216def _render_integer_for_expression(value):
217  integer_type = _cpp_integer_type_for_range(value, value)
218  return "{0}({1})".format(_maybe_type(integer_type), _render_integer(value))
219
220
221def _wrap_in_namespace(body, namespace):
222  """Returns the given body wrapped in the given namespace."""
223  for component in reversed(namespace):
224    body = code_template.format_template(_TEMPLATES.namespace_wrap,
225                                         component=component,
226                                         body=body) + "\n"
227  return body
228
229
230def _get_type_size(type_ir, ir):
231  size = ir_util.fixed_size_of_type_in_bits(type_ir, ir)
232  assert size is not None, (
233      "_get_type_size should only be called for constant-sized types.")
234  return size
235
236
237def _offset_storage_adapter(buffer_type, alignment, static_offset):
238  return "{}::template OffsetStorageType</**/{}, {}>".format(
239      buffer_type, alignment, static_offset)
240
241
242def _bytes_to_bits_convertor(buffer_type, byte_order, size):
243  assert byte_order, "byte_order should not be empty."
244  return "{}::BitBlock</**/{}::{}ByteOrderer<typename {}>, {}>".format(
245      _SUPPORT_NAMESPACE,
246      _SUPPORT_NAMESPACE,
247      byte_order,
248      buffer_type,
249      size)
250
251
252def _get_fully_qualified_namespace(name, ir):
253  module = ir_util.find_object((name.module_file,), ir)
254  namespace = _render_namespace_prefix(_get_module_namespace(module))
255  return namespace + "".join(["::" + str(s) for s in name.object_path[:-1]])
256
257
258def _get_unqualified_name(name):
259  return name.object_path[-1]
260
261
262def _get_fully_qualified_name(name, ir):
263  return (_get_fully_qualified_namespace(name, ir) + "::" +
264          _get_unqualified_name(name))
265
266
267def _get_adapted_cpp_buffer_type_for_field(type_definition, size_in_bits,
268                                           buffer_type, byte_order,
269                                           parent_addressable_unit):
270  """Returns the adapted C++ type information needed to construct a view."""
271  if (parent_addressable_unit == ir_data.AddressableUnit.BYTE and
272      type_definition.addressable_unit == ir_data.AddressableUnit.BIT):
273    assert byte_order
274    return _bytes_to_bits_convertor(buffer_type, byte_order, size_in_bits)
275  else:
276    assert parent_addressable_unit == type_definition.addressable_unit, (
277        "Addressable unit mismatch: {} vs {}".format(
278            parent_addressable_unit,
279            type_definition.addressable_unit))
280    return buffer_type
281
282
283def _get_cpp_view_type_for_type_definition(
284    type_definition, size, ir, buffer_type, byte_order, parent_addressable_unit,
285    validator):
286  """Returns the C++ type information needed to construct a view.
287
288  Returns the C++ type for a view of the given Emboss TypeDefinition, and the
289  C++ types of its parameters, if any.
290
291  Arguments:
292      type_definition: The ir_data.TypeDefinition whose view should be
293          constructed.
294      size: The size, in type_definition.addressable_units, of the instantiated
295          type, or None if it is not known at compile time.
296      ir: The complete IR.
297      buffer_type: The C++ type to be used as the Storage parameter of the view
298          (e.g., "ContiguousBuffer<...>").
299      byte_order: For BIT types which are direct children of BYTE types,
300          "LittleEndian", "BigEndian", or "None".  Otherwise, None.
301      parent_addressable_unit: The addressable_unit_size of the structure
302          containing this structure.
303      validator: The name of the validator type to be injected into the view.
304
305  Returns:
306      A tuple of: the C++ view type and a (possibly-empty) list of the C++ types
307      of Emboss parameters which must be passed to the view's constructor.
308  """
309  adapted_buffer_type = _get_adapted_cpp_buffer_type_for_field(
310      type_definition, size, buffer_type, byte_order, parent_addressable_unit)
311  if type_definition.HasField("external"):
312    # Externals do not (yet) support runtime parameters.
313    return code_template.format_template(
314        _TEMPLATES.external_view_type,
315        namespace=_get_fully_qualified_namespace(
316            type_definition.name.canonical_name, ir),
317        name=_get_unqualified_name(type_definition.name.canonical_name),
318        bits=size,
319        validator=validator,
320        buffer_type=adapted_buffer_type), []
321  elif type_definition.HasField("structure"):
322    parameter_types = []
323    for parameter in type_definition.runtime_parameter:
324      parameter_types.append(
325          _cpp_basic_type_for_expression_type(parameter.type, ir))
326    return code_template.format_template(
327        _TEMPLATES.structure_view_type,
328        namespace=_get_fully_qualified_namespace(
329            type_definition.name.canonical_name, ir),
330        name=_get_unqualified_name(type_definition.name.canonical_name),
331        buffer_type=adapted_buffer_type), parameter_types
332  elif type_definition.HasField("enumeration"):
333    return code_template.format_template(
334        _TEMPLATES.enum_view_type,
335        support_namespace=_SUPPORT_NAMESPACE,
336        enum_type=_get_fully_qualified_name(type_definition.name.canonical_name,
337                                            ir),
338        bits=size,
339        validator=validator,
340        buffer_type=adapted_buffer_type), []
341  else:
342    assert False, "Unknown variety of type {}".format(type_definition)
343
344
345def _get_cpp_view_type_for_physical_type(
346    type_ir, size, byte_order, ir, buffer_type, parent_addressable_unit,
347    validator):
348  """Returns the C++ type information needed to construct a field's view.
349
350  Returns the C++ type of an ir_data.Type, and the C++ types of its parameters,
351  if any.
352
353  Arguments:
354      type_ir: The ir_data.Type whose view should be constructed.
355      size: The size, in type_definition.addressable_units, of the instantiated
356          type, or None if it is not known at compile time.
357      byte_order: For BIT types which are direct children of BYTE types,
358          "LittleEndian", "BigEndian", or "None".  Otherwise, None.
359      ir: The complete IR.
360      buffer_type: The C++ type to be used as the Storage parameter of the view
361          (e.g., "ContiguousBuffer<...>").
362      parent_addressable_unit: The addressable_unit_size of the structure
363          containing this type.
364      validator: The name of the validator type to be injected into the view.
365
366  Returns:
367      A tuple of: the C++ type for a view of the given Emboss Type and a list of
368      the C++ types of any parameters of the view type, which should be passed
369      to the view's constructor.
370  """
371  if ir_util.is_array(type_ir):
372    # An array view is parameterized by the element's view type.
373    base_type = type_ir.array_type.base_type
374    element_size_in_bits = _get_type_size(base_type, ir)
375    assert element_size_in_bits, (
376        "TODO(bolms): Implement arrays of dynamically-sized elements.")
377    assert element_size_in_bits % parent_addressable_unit == 0, (
378        "Array elements must fall on byte boundaries.")
379    element_size = element_size_in_bits // parent_addressable_unit
380    element_view_type, element_view_parameter_types, element_view_parameters = (
381        _get_cpp_view_type_for_physical_type(
382            base_type, element_size_in_bits, byte_order, ir,
383            _offset_storage_adapter(buffer_type, element_size, 0),
384            parent_addressable_unit, validator))
385    return (
386        code_template.format_template(
387            _TEMPLATES.array_view_adapter,
388            support_namespace=_SUPPORT_NAMESPACE,
389            # TODO(bolms): The element size should be calculable from the field
390            # size and array length.
391            element_view_type=element_view_type,
392            element_view_parameter_types="".join(
393                ", " + p for p in element_view_parameter_types),
394            element_size=element_size,
395            addressable_unit_size=int(parent_addressable_unit),
396            buffer_type=buffer_type),
397        element_view_parameter_types,
398        element_view_parameters
399    )
400  else:
401    assert type_ir.HasField("atomic_type")
402    reference = type_ir.atomic_type.reference
403    referenced_type = ir_util.find_object(reference, ir)
404    if parent_addressable_unit > referenced_type.addressable_unit:
405      assert byte_order, repr(type_ir)
406    reader, parameter_types = _get_cpp_view_type_for_type_definition(
407        referenced_type, size, ir, buffer_type, byte_order,
408        parent_addressable_unit, validator)
409    return reader, parameter_types, list(type_ir.atomic_type.runtime_parameter)
410
411
412def _render_variable(variable, prefix=""):
413  """Renders a variable reference (e.g., `foo` or `foo.bar.baz`) in C++ code."""
414  # A "variable" could be an immediate field or a subcomponent of an immediate
415  # field.  For either case, in C++ it is valid to just use the last component
416  # of the name; it is not necessary to qualify the method with the type.
417  components = []
418  for component in variable:
419    components.append(_cpp_field_name(component[-1]) + "()")
420  components[-1] = prefix + components[-1]
421  return ".".join(components)
422
423
424def _render_enum_value(enum_type, ir):
425  cpp_enum_type = _get_fully_qualified_name(enum_type.name.canonical_name, ir)
426  return "{}(static_cast</**/{}>({}))".format(
427      _maybe_type(cpp_enum_type), cpp_enum_type, enum_type.value)
428
429
430def _builtin_function_name(function):
431  """Returns the C++ operator name corresponding to an Emboss operator."""
432  functions = {
433      ir_data.FunctionMapping.ADDITION: "Sum",
434      ir_data.FunctionMapping.SUBTRACTION: "Difference",
435      ir_data.FunctionMapping.MULTIPLICATION: "Product",
436      ir_data.FunctionMapping.EQUALITY: "Equal",
437      ir_data.FunctionMapping.INEQUALITY: "NotEqual",
438      ir_data.FunctionMapping.AND: "And",
439      ir_data.FunctionMapping.OR: "Or",
440      ir_data.FunctionMapping.LESS: "LessThan",
441      ir_data.FunctionMapping.LESS_OR_EQUAL: "LessThanOrEqual",
442      ir_data.FunctionMapping.GREATER: "GreaterThan",
443      ir_data.FunctionMapping.GREATER_OR_EQUAL: "GreaterThanOrEqual",
444      ir_data.FunctionMapping.CHOICE: "Choice",
445      ir_data.FunctionMapping.MAXIMUM: "Maximum",
446  }
447  return functions[function]
448
449
450def _cpp_basic_type_for_expression_type(expression_type, ir):
451  """Returns the C++ basic type (int32_t, bool, etc.) for an ExpressionType."""
452  if expression_type.WhichOneof("type") == "integer":
453    return _cpp_integer_type_for_range(
454        int(expression_type.integer.minimum_value),
455        int(expression_type.integer.maximum_value))
456  elif expression_type.WhichOneof("type") == "boolean":
457    return "bool"
458  elif expression_type.WhichOneof("type") == "enumeration":
459    return _get_fully_qualified_name(
460        expression_type.enumeration.name.canonical_name, ir)
461  else:
462    assert False, "Unknown expression type " + expression_type.WhichOneof(
463        "type")
464
465
466def _cpp_basic_type_for_expression(expression, ir):
467  """Returns the C++ basic type (int32_t, bool, etc.) for an Expression."""
468  return _cpp_basic_type_for_expression_type(expression.type, ir)
469
470
471def _cpp_integer_type_for_range(min_val, max_val):
472  """Returns the appropriate C++ integer type to hold min_val up to max_val."""
473  # The choice of int32_t, uint32_t, int64_t, then uint64_t is somewhat
474  # arbitrary here, and might not be perfectly ideal.  I (bolms@) have chosen
475  # this set of types to a) minimize the number of casts that occur in
476  # arithmetic expressions, and b) favor 32-bit arithmetic, which is mostly
477  # "cheapest" on current (2018) systems.  Signed integers are also preferred
478  # over unsigned so that the C++ compiler can take advantage of undefined
479  # overflow.
480  for size in (32, 64):
481    if min_val >= -(2**(size - 1)) and max_val <= 2**(size - 1) - 1:
482      return "::std::int{}_t".format(size)
483    elif min_val >= 0 and max_val <= 2**size - 1:
484      return "::std::uint{}_t".format(size)
485  return None
486
487
488def _cpp_integer_type_for_enum(max_bits, is_signed):
489  """Returns the appropriate C++ integer type to hold an enum."""
490  # This is used to determine the `X` in `enum class : X`.
491  #
492  # Unlike _cpp_integer_type_for_range, the type chosen here is used for actual
493  # storage.  Further, sizes smaller than 64 are explicitly chosen by a human
494  # author, so take the smallest size that can hold the given number of bits.
495  #
496  # Technically, the C++ standard allows some of these sizes of integer to not
497  # exist, and other sizes (say, int24_t) might exist, but in practice this set
498  # is almost always available.  If you're compiling for some exotic DSP that
499  # uses unusual int sizes, email [email protected].
500  for size in (8, 16, 32, 64):
501    if max_bits <= size:
502      return "::std::{}int{}_t".format("" if is_signed else "u", size)
503  assert False, f"Invalid value {max_bits} for maximum_bits"
504
505
506def _render_builtin_operation(expression, ir, field_reader, subexpressions):
507  """Renders a built-in operation (+, -, &&, etc.) into C++ code."""
508  assert expression.function.function not in (
509      ir_data.FunctionMapping.UPPER_BOUND, ir_data.FunctionMapping.LOWER_BOUND), (
510          "UPPER_BOUND and LOWER_BOUND should be constant.")
511  if expression.function.function == ir_data.FunctionMapping.PRESENCE:
512    return field_reader.render_existence(expression.function.args[0],
513                                         subexpressions)
514  args = expression.function.args
515  rendered_args = [
516      _render_expression(arg, ir, field_reader, subexpressions).rendered
517      for arg in args]
518  minimum_integers = []
519  maximum_integers = []
520  enum_types = set()
521  have_boolean_types = False
522  for subexpression in [expression] + list(args):
523    if subexpression.type.WhichOneof("type") == "integer":
524      minimum_integers.append(int(subexpression.type.integer.minimum_value))
525      maximum_integers.append(int(subexpression.type.integer.maximum_value))
526    elif subexpression.type.WhichOneof("type") == "enumeration":
527      enum_types.add(_cpp_basic_type_for_expression(subexpression, ir))
528    elif subexpression.type.WhichOneof("type") == "boolean":
529      have_boolean_types = True
530  # At present, all Emboss functions other than `$has` take and return one of
531  # the following:
532  #
533  #     integers
534  #     integers and booleans
535  #     a single enum type
536  #     a single enum type and booleans
537  #     booleans
538  #
539  # Really, the intermediate type is only necessary for integers, but it
540  # simplifies the C++ somewhat if the appropriate enum/boolean type is provided
541  # as "IntermediateT" -- it means that, e.g., the choice ("?:") operator does
542  # not have to have two versions, one of which casts (some of) its arguments to
543  # IntermediateT, and one of which does not.
544  #
545  # This is not a particularly robust scheme, but it works for all of the Emboss
546  # functions I (bolms@) have written and am considering (division, modulus,
547  # exponentiation, logical negation, bit shifts, bitwise and/or/xor, $min,
548  # $floor, $ceil, $has).
549  if minimum_integers and not enum_types:
550    intermediate_type = _cpp_integer_type_for_range(min(minimum_integers),
551                                                    max(maximum_integers))
552  elif len(enum_types) == 1 and not minimum_integers:
553    intermediate_type = list(enum_types)[0]
554  else:
555    assert have_boolean_types
556    assert not enum_types
557    assert not minimum_integers
558    intermediate_type = "bool"
559  arg_types = [_cpp_basic_type_for_expression(arg, ir) for arg in args]
560  result_type = _cpp_basic_type_for_expression(expression, ir)
561  function_variant = "</**/{}, {}, {}>".format(
562      intermediate_type, result_type, ", ".join(arg_types))
563  return "::emboss::support::{}{}({})".format(
564      _builtin_function_name(expression.function.function),
565      function_variant, ", ".join(rendered_args))
566
567
568class _FieldRenderer(object):
569  """Base class for rendering field reads."""
570
571  def render_field_read_with_context(self, expression, ir, prefix,
572                                     subexpressions):
573    field = (
574        prefix +
575        _render_variable(ir_util.hashable_form_of_field_reference(
576            expression.field_reference)))
577    if subexpressions is None:
578      field_expression = field
579    else:
580      field_expression = subexpressions.add(field)
581    expression_cpp_type = _cpp_basic_type_for_expression(expression, ir)
582    return ("({0}.Ok()"
583            "    ? {1}(static_cast</**/{2}>({0}.UncheckedRead()))"
584            "    : {1}())".format(
585                field_expression,
586                _maybe_type(expression_cpp_type),
587                expression_cpp_type))
588
589  def render_existence_with_context(self, expression, prefix, subexpressions):
590    return "{1}{0}".format(
591        _render_variable(
592            ir_util.hashable_form_of_field_reference(
593                expression.field_reference),
594            "has_"),
595        prefix)
596
597
598class _DirectFieldRenderer(_FieldRenderer):
599  """Renderer for fields read from inside a structure's View type."""
600
601  def render_field(self, expression, ir, subexpressions):
602    return self.render_field_read_with_context(
603        expression, ir, "", subexpressions)
604
605  def render_existence(self, expression, subexpressions):
606    return self.render_existence_with_context(expression, "", subexpressions)
607
608
609class _VirtualViewFieldRenderer(_FieldRenderer):
610  """Renderer for field reads from inside a virtual field's View."""
611
612  def render_existence(self, expression, subexpressions):
613    return self.render_existence_with_context(
614        expression, "view_.", subexpressions)
615
616  def render_field(self, expression, ir, subexpressions):
617    return self.render_field_read_with_context(
618        expression, ir, "view_.", subexpressions)
619
620
621class _SubexpressionStore(object):
622  """Holder for subexpressions to be assigned to local variables."""
623
624  def __init__(self, prefix):
625    self._prefix = prefix
626    self._subexpr_to_name = {}
627    self._index_to_subexpr = []
628
629  def add(self, subexpr):
630    if subexpr not in self._subexpr_to_name:
631      self._index_to_subexpr.append(subexpr)
632      self._subexpr_to_name[subexpr] = (
633          self._prefix + str(len(self._index_to_subexpr)))
634    return self._subexpr_to_name[subexpr]
635
636  def subexprs(self):
637    return [(self._subexpr_to_name[subexpr], subexpr)
638            for subexpr in self._index_to_subexpr]
639
640
641_ExpressionResult = collections.namedtuple("ExpressionResult",
642                                           ["rendered", "is_constant"])
643
644
645def _render_expression(expression, ir, field_reader=None, subexpressions=None):
646  """Renders an expression into C++ code.
647
648  Arguments:
649      expression: The expression to render.
650      ir: The IR in which to look up references.
651      field_reader: An object with render_existence and render_field methods
652          appropriate for the C++ context of the expression.
653      subexpressions: A _SubexpressionStore in which to put subexpressions, or
654          None if subexpressions should be inline.
655
656  Returns:
657      A tuple of (rendered_text, is_constant), where rendered_text is C++ code
658      that can be emitted, and is_constant is True if the expression is a
659      compile-time constant suitable for use in a C++11 constexpr context,
660      otherwise False.
661  """
662  if field_reader is None:
663    field_reader = _DirectFieldRenderer()
664
665  # If the expression is constant, there are no guarantees that subexpressions
666  # will fit into C++ types, or that operator arguments and return types can fit
667  # in the same type: expressions like `-0x8000_0000_0000_0000` and
668  # `0x1_0000_0000_0000_0000 - 1` can appear.
669  if expression.type.WhichOneof("type") == "integer":
670    if expression.type.integer.modulus == "infinity":
671      return _ExpressionResult(_render_integer_for_expression(int(
672          expression.type.integer.modular_value)), True)
673  elif expression.type.WhichOneof("type") == "boolean":
674    if expression.type.boolean.HasField("value"):
675      if expression.type.boolean.value:
676        return _ExpressionResult(_maybe_type("bool") + "(true)", True)
677      else:
678        return _ExpressionResult(_maybe_type("bool") + "(false)", True)
679  elif expression.type.WhichOneof("type") == "enumeration":
680    if expression.type.enumeration.HasField("value"):
681      return _ExpressionResult(
682          _render_enum_value(expression.type.enumeration, ir), True)
683  else:
684    # There shouldn't be any "opaque" type expressions here.
685    assert False, "Unhandled expression type {}".format(
686        expression.type.WhichOneof("type"))
687
688  result = None
689  # Otherwise, render the operation.
690  if expression.WhichOneof("expression") == "function":
691    result = _render_builtin_operation(
692        expression, ir, field_reader, subexpressions)
693  elif expression.WhichOneof("expression") == "field_reference":
694    result = field_reader.render_field(expression, ir, subexpressions)
695  elif (expression.WhichOneof("expression") == "builtin_reference" and
696        expression.builtin_reference.canonical_name.object_path[-1] ==
697        "$logical_value"):
698    return _ExpressionResult(
699        _maybe_type("decltype(emboss_reserved_local_value)") +
700        "(emboss_reserved_local_value)", False)
701
702  # Any of the constant expression types should have been handled in the
703  # previous section.
704  assert result is not None, "Unable to render expression {}".format(
705      str(expression))
706
707  if subexpressions is None:
708    return _ExpressionResult(result, False)
709  else:
710    return _ExpressionResult(subexpressions.add(result), False)
711
712
713def _render_existence_test(field, ir, subexpressions=None):
714  return _render_expression(field.existence_condition, ir, subexpressions)
715
716
717def _alignment_of_location(location):
718  constraints = location.start.type.integer
719  if constraints.modulus == "infinity":
720    # The C++ templates use 0 as a sentinel value meaning infinity for
721    # alignment.
722    return 0, constraints.modular_value
723  else:
724    return constraints.modulus, constraints.modular_value
725
726
727def _get_cpp_type_reader_of_field(field_ir, ir, buffer_type, validator,
728                                  parent_addressable_unit):
729  """Returns the C++ view type for a field."""
730  field_size = None
731  if field_ir.type.HasField("size_in_bits"):
732    field_size = ir_util.constant_value(field_ir.type.size_in_bits)
733    assert field_size is not None
734  elif ir_util.is_constant(field_ir.location.size):
735    # TODO(bolms): Normalize the IR so that this clause is unnecessary.
736    field_size = (ir_util.constant_value(field_ir.location.size) *
737                  parent_addressable_unit)
738  byte_order_attr = ir_util.get_attribute(field_ir.attribute, "byte_order")
739  if byte_order_attr:
740    byte_order = byte_order_attr.string_constant.text
741  else:
742    byte_order = ""
743  field_alignment, field_offset = _alignment_of_location(field_ir.location)
744  return _get_cpp_view_type_for_physical_type(
745      field_ir.type, field_size, byte_order, ir,
746      _offset_storage_adapter(buffer_type, field_alignment, field_offset),
747      parent_addressable_unit, validator)
748
749
750def _generate_structure_field_methods(enclosing_type_name, field_ir, ir,
751                                      parent_addressable_unit):
752  if ir_util.field_is_virtual(field_ir):
753    return _generate_structure_virtual_field_methods(
754        enclosing_type_name, field_ir, ir)
755  else:
756    return _generate_structure_physical_field_methods(
757        enclosing_type_name, field_ir, ir, parent_addressable_unit)
758
759
760def _generate_custom_validator_expression_for(field_ir, ir):
761  """Returns a validator expression for the given field, or None."""
762  requires_attr = ir_util.get_attribute(field_ir.attribute, "requires")
763  if requires_attr:
764    class _ValidatorFieldReader(object):
765      """A "FieldReader" that translates the current field to `value`."""
766
767      def render_existence(self, expression, subexpressions):
768        del expression  # Unused.
769        assert False, "Shouldn't be here."
770
771      def render_field(self, expression, ir, subexpressions):
772        assert len(expression.field_reference.path) == 1
773        assert (expression.field_reference.path[0].canonical_name ==
774                field_ir.name.canonical_name)
775        expression_cpp_type = _cpp_basic_type_for_expression(expression, ir)
776        return "{}(emboss_reserved_local_value)".format(
777            _maybe_type(expression_cpp_type))
778
779    validation_body = _render_expression(requires_attr.expression, ir,
780                                         _ValidatorFieldReader())
781    return validation_body.rendered
782  else:
783    return None
784
785
786def _generate_validator_expression_for(field_ir, ir):
787  """Returns a validator expression for the given field."""
788  result = _generate_custom_validator_expression_for(field_ir, ir)
789  if result is None:
790    return "::emboss::support::Maybe<bool>(true)"
791  return result
792
793
794def _generate_structure_virtual_field_methods(enclosing_type_name, field_ir,
795                                              ir):
796  """Generates C++ code for methods for a single virtual field.
797
798  Arguments:
799    enclosing_type_name: The text name of the enclosing type.
800    field_ir: The IR for the field to generate methods for.
801    ir: The full IR for the module.
802
803  Returns:
804    A tuple of ("", declarations, definitions).  The declarations can be
805    inserted into the class definition for the enclosing type's View.  Any
806    definitions should be placed after the class definition.  These are
807    separated to satisfy C++'s declaration-before-use requirements.
808  """
809  if field_ir.write_method.WhichOneof("method") == "alias":
810    return _generate_field_indirection(field_ir, enclosing_type_name, ir)
811
812  read_subexpressions = _SubexpressionStore("emboss_reserved_local_subexpr_")
813  read_value = _render_expression(
814      field_ir.read_transform, ir,
815      field_reader=_VirtualViewFieldRenderer(),
816      subexpressions=read_subexpressions)
817  field_exists = _render_existence_test(field_ir, ir)
818  logical_type = _cpp_basic_type_for_expression(field_ir.read_transform, ir)
819
820  if read_value.is_constant and field_exists.is_constant:
821    assert not read_subexpressions.subexprs()
822    declaration_template = (
823        _TEMPLATES.structure_single_const_virtual_field_method_declarations)
824    definition_template = (
825        _TEMPLATES.structure_single_const_virtual_field_method_definitions)
826  else:
827    declaration_template = (
828        _TEMPLATES.structure_single_virtual_field_method_declarations)
829    definition_template = (
830        _TEMPLATES.structure_single_virtual_field_method_definitions)
831
832  if field_ir.write_method.WhichOneof("method") == "transform":
833    destination = _render_variable(
834        ir_util.hashable_form_of_field_reference(
835            field_ir.write_method.transform.destination))
836    transform = _render_expression(
837        field_ir.write_method.transform.function_body, ir,
838        field_reader=_VirtualViewFieldRenderer()).rendered
839    write_methods = code_template.format_template(
840        _TEMPLATES.structure_single_virtual_field_write_methods,
841        logical_type=logical_type,
842        destination=destination,
843        transform=transform)
844  else:
845    write_methods = ""
846
847  name = field_ir.name.canonical_name.object_path[-1]
848  if name.startswith("$"):
849    name = _cpp_field_name(field_ir.name.name.text)
850    virtual_view_type_name = "EmbossReservedDollarVirtual{}View".format(name)
851  else:
852    virtual_view_type_name = "EmbossReservedVirtual{}View".format(
853        name_conversion.snake_to_camel(name))
854  assert logical_type, "Could not find appropriate C++ type for {}".format(
855      field_ir.read_transform)
856  if field_ir.read_transform.type.WhichOneof("type") == "integer":
857    write_to_text_stream_function = "WriteIntegerViewToTextStream"
858  elif field_ir.read_transform.type.WhichOneof("type") == "boolean":
859    write_to_text_stream_function = "WriteBooleanViewToTextStream"
860  elif field_ir.read_transform.type.WhichOneof("type") == "enumeration":
861    write_to_text_stream_function = "WriteEnumViewToTextStream"
862  else:
863    assert False, "Unexpected read-only virtual field type {}".format(
864        field_ir.read_transform.type.WhichOneof("type"))
865
866  value_is_ok = _generate_validator_expression_for(field_ir, ir)
867  declaration = code_template.format_template(
868      declaration_template,
869      visibility=_visibility_for_field(field_ir),
870      name=name,
871      virtual_view_type_name=virtual_view_type_name,
872      logical_type=logical_type,
873      read_subexpressions="".join(
874          ["      const auto {} = {};\n".format(subexpr_name, subexpr)
875           for subexpr_name, subexpr in read_subexpressions.subexprs()]
876      ),
877      read_value=read_value.rendered,
878      write_to_text_stream_function=write_to_text_stream_function,
879      parent_type=enclosing_type_name,
880      write_methods=write_methods,
881      value_is_ok=value_is_ok)
882  definition = code_template.format_template(
883      definition_template,
884      name=name,
885      virtual_view_type_name=virtual_view_type_name,
886      logical_type=logical_type,
887      read_value=read_value.rendered,
888      parent_type=enclosing_type_name,
889      field_exists=field_exists.rendered)
890  return "", declaration, definition
891
892
893def _generate_validator_type_for(enclosing_type_name, field_ir, ir):
894  """Returns a validator type name and definition for the given field."""
895  result_expression = _generate_custom_validator_expression_for(field_ir, ir)
896  if result_expression is None:
897    return "::emboss::support::AllValuesAreOk", ""
898
899  field_name = field_ir.name.canonical_name.object_path[-1]
900  validator_type_name = "EmbossReservedValidatorFor{}".format(
901      name_conversion.snake_to_camel(field_name))
902  qualified_validator_type_name = "{}::{}".format(enclosing_type_name,
903                                                  validator_type_name)
904
905  validator_declaration = code_template.format_template(
906      _TEMPLATES.structure_field_validator,
907      name=validator_type_name,
908      expression=result_expression,
909  )
910  validator_declaration = _wrap_in_namespace(validator_declaration,
911                                             [enclosing_type_name])
912  return qualified_validator_type_name, validator_declaration
913
914
915def _generate_structure_physical_field_methods(enclosing_type_name, field_ir,
916                                               ir, parent_addressable_unit):
917  """Generates C++ code for methods for a single physical field.
918
919  Arguments:
920    enclosing_type_name: The text name of the enclosing type.
921    field_ir: The IR for the field to generate methods for.
922    ir: The full IR for the module.
923    parent_addressable_unit: The addressable unit (BIT or BYTE) of the enclosing
924        structure.
925
926  Returns:
927    A tuple of (declarations, definitions).  The declarations can be inserted
928    into the class definition for the enclosing type's View.  Any definitions
929    should be placed after the class definition.  These are separated to satisfy
930    C++'s declaration-before-use requirements.
931  """
932  validator_type, validator_declaration = _generate_validator_type_for(
933      enclosing_type_name, field_ir, ir)
934
935  type_reader, unused_parameter_types, parameter_expressions = (
936      _get_cpp_type_reader_of_field(field_ir, ir, "Storage", validator_type,
937                                    parent_addressable_unit))
938
939  field_name = field_ir.name.canonical_name.object_path[-1]
940
941  subexpressions = _SubexpressionStore("emboss_reserved_local_subexpr_")
942  parameter_values = []
943  parameters_known = []
944  for parameter in parameter_expressions:
945    parameter_cpp_expr = _render_expression(
946        parameter, ir, subexpressions=subexpressions)
947    parameter_values.append(
948        "{}.ValueOrDefault(), ".format(parameter_cpp_expr.rendered))
949    parameters_known.append(
950        "{}.Known() && ".format(parameter_cpp_expr.rendered))
951  parameter_subexpressions = "".join(
952      ["  const auto {} = {};\n".format(name, subexpr)
953       for name, subexpr in subexpressions.subexprs()]
954  )
955
956  first_size_and_offset_subexpr = len(subexpressions.subexprs())
957  offset = _render_expression(
958      field_ir.location.start, ir, subexpressions=subexpressions).rendered
959  size = _render_expression(
960      field_ir.location.size, ir, subexpressions=subexpressions).rendered
961  size_and_offset_subexpressions = "".join(
962      ["    const auto {} = {};\n".format(name, subexpr)
963       for name, subexpr in subexpressions.subexprs()[
964           first_size_and_offset_subexpr:]]
965  )
966
967  field_alignment, field_offset = _alignment_of_location(field_ir.location)
968  declaration = code_template.format_template(
969      _TEMPLATES.structure_single_field_method_declarations,
970      type_reader=type_reader,
971      visibility=_visibility_for_field(field_ir),
972      name=field_name)
973  definition = code_template.format_template(
974      _TEMPLATES.structure_single_field_method_definitions,
975      parent_type=enclosing_type_name,
976      name=field_name,
977      type_reader=type_reader,
978      offset=offset,
979      size=size,
980      size_and_offset_subexpressions=size_and_offset_subexpressions,
981      field_exists=_render_existence_test(field_ir, ir).rendered,
982      alignment=field_alignment,
983      parameters_known="".join(parameters_known),
984      parameter_values="".join(parameter_values),
985      parameter_subexpressions=parameter_subexpressions,
986      static_offset=field_offset)
987  return validator_declaration, declaration, definition
988
989
990def _render_size_method(fields, ir):
991  """Renders the Size methods of a struct or bits, using the correct templates.
992
993  Arguments:
994    fields: The list of fields in the struct or bits.  This is used to find the
995      $size_in_bits or $size_in_bytes virtual field.
996    ir: The IR to which fields belong.
997
998  Returns:
999    A string representation of the Size methods, suitable for inclusion in an
1000    Emboss View class.
1001  """
1002  # The SizeInBytes(), SizeInBits(), and SizeIsKnown() methods just forward to
1003  # the generated IntrinsicSizeIn$_units_$() method, which returns a virtual
1004  # field with Read() and Ok() methods.
1005  #
1006  # TODO(bolms): Remove these shims, rename IntrinsicSizeIn$_units_$ to
1007  # SizeIn$_units_$, and update all callers to the new API.
1008  for field in fields:
1009    if field.name.name.text in ("$size_in_bits", "$size_in_bytes"):
1010      # If the read_transform and existence_condition are constant, then the
1011      # size is constexpr.
1012      if (_render_expression(field.read_transform, ir).is_constant and
1013          _render_expression(field.existence_condition, ir).is_constant):
1014        template = _TEMPLATES.constant_structure_size_method
1015      else:
1016        template = _TEMPLATES.runtime_structure_size_method
1017      return code_template.format_template(
1018          template,
1019          units="Bits" if field.name.name.text == "$size_in_bits" else "Bytes")
1020  assert False, "Expected a $size_in_bits or $size_in_bytes field."
1021
1022
1023def _visibility_for_field(field_ir):
1024  """Returns the C++ visibility for field_ir within its parent view."""
1025  # Generally, the Google style guide for hand-written C++ forbids having
1026  # multiple public: and private: sections, but trying to conform to that bit of
1027  # the style guide would make this file significantly more complex.
1028  #
1029  # Alias fields are generated as simple methods that forward directly to the
1030  # aliased field's method:
1031  #
1032  #     auto alias() const -> decltype(parent().child().aliased_subchild()) {
1033  #       return parent().child().aliased_subchild();
1034  #     }
1035  #
1036  # Figuring out the return type of `parent().child().aliased_subchild()` is
1037  # quite complex, since there are several levels of template indirection
1038  # involved.  It is much easier to just leave it up to the C++ compiler.
1039  #
1040  # Unfortunately, the C++ compiler will complain if `parent()` is not declared
1041  # before `alias()`.  If the `parent` field happens to be anonymous, the Google
1042  # style guide would put `parent()`'s declaration after `alias()`'s
1043  # declaration, which causes the C++ compiler to complain that `parent` is
1044  # unknown.
1045  #
1046  # The easy fix to this is just to declare `parent()` before `alias()`, and
1047  # explicitly mark `parent()` as `private` and `alias()` as `public`.
1048  #
1049  # Perhaps surprisingly, this limitation does not apply when `parent()`'s type
1050  # is not yet complete at the point where `alias()` is declared; I believe this
1051  # is because both `parent()` and `alias()` exist in a templated `class`, and
1052  # by the time `parent().child().aliased_subchild()` is actually resolved, the
1053  # compiler is instantiating the class and has the full definitions of all the
1054  # other classes available.
1055  if field_ir.name.is_anonymous:
1056    return "private"
1057  else:
1058    return "public"
1059
1060
1061def _generate_field_indirection(field_ir, parent_type_name, ir):
1062  """Renders a method which forwards to a field's view."""
1063  rendered_aliased_field = _render_variable(
1064      ir_util.hashable_form_of_field_reference(field_ir.write_method.alias))
1065  declaration = code_template.format_template(
1066      _TEMPLATES.structure_single_field_indirect_method_declarations,
1067      aliased_field=rendered_aliased_field,
1068      visibility=_visibility_for_field(field_ir),
1069      parent_type=parent_type_name,
1070      name=field_ir.name.name.text)
1071  definition = code_template.format_template(
1072      _TEMPLATES.struct_single_field_indirect_method_definitions,
1073      parent_type=parent_type_name,
1074      name=field_ir.name.name.text,
1075      aliased_field=rendered_aliased_field,
1076      field_exists=_render_existence_test(field_ir, ir).rendered)
1077  return "", declaration, definition
1078
1079
1080def _generate_subtype_definitions(type_ir, ir, config: Config):
1081  """Generates C++ code for subtypes of type_ir."""
1082  subtype_bodies = []
1083  subtype_forward_declarations = []
1084  subtype_method_definitions = []
1085  type_name = type_ir.name.name.text
1086  for subtype in type_ir.subtype:
1087    inner_defs = _generate_type_definition(subtype, ir, config)
1088    subtype_forward_declaration, subtype_body, subtype_methods = inner_defs
1089    subtype_forward_declarations.append(subtype_forward_declaration)
1090    subtype_bodies.append(subtype_body)
1091    subtype_method_definitions.append(subtype_methods)
1092  wrapped_forward_declarations = _wrap_in_namespace(
1093      "\n".join(subtype_forward_declarations), [type_name])
1094  wrapped_bodies = _wrap_in_namespace("\n".join(subtype_bodies), [type_name])
1095  wrapped_method_definitions = _wrap_in_namespace(
1096      "\n".join(subtype_method_definitions), [type_name])
1097  return (wrapped_bodies, wrapped_forward_declarations,
1098          wrapped_method_definitions)
1099
1100
1101def _cpp_field_name(name):
1102  """Returns the C++ name for the given field name."""
1103  if name.startswith("$"):
1104    dollar_field_names = {
1105        "$size_in_bits": "IntrinsicSizeInBits",
1106        "$size_in_bytes": "IntrinsicSizeInBytes",
1107        "$max_size_in_bits": "MaxSizeInBits",
1108        "$min_size_in_bits": "MinSizeInBits",
1109        "$max_size_in_bytes": "MaxSizeInBytes",
1110        "$min_size_in_bytes": "MinSizeInBytes",
1111    }
1112    return dollar_field_names[name]
1113  else:
1114    return name
1115
1116
1117def _generate_structure_definition(type_ir, ir, config: Config):
1118  """Generates C++ for an Emboss structure (struct or bits).
1119
1120  Arguments:
1121    type_ir: The IR for the struct definition.
1122    ir: The full IR; used for type lookups.
1123
1124  Returns:
1125    A tuple of: (forward declaration for classes, class bodies, method bodies),
1126    suitable for insertion into the appropriate places in the generated header.
1127  """
1128  subtype_bodies, subtype_forward_declarations, subtype_method_definitions = (
1129      _generate_subtype_definitions(type_ir, ir, config))
1130  type_name = type_ir.name.name.text
1131  field_helper_type_definitions = []
1132  field_method_declarations = []
1133  field_method_definitions = []
1134  virtual_field_type_definitions = []
1135  decode_field_clauses = []
1136  write_field_clauses = []
1137  ok_method_clauses = []
1138  equals_method_clauses = []
1139  unchecked_equals_method_clauses = []
1140  enum_using_statements = []
1141  parameter_fields = []
1142  constructor_parameters = []
1143  forwarded_parameters = []
1144  parameter_initializers = []
1145  parameter_copy_initializers = []
1146  units = {1: "Bits", 8: "Bytes"}[type_ir.addressable_unit]
1147
1148  for subtype in type_ir.subtype:
1149    if subtype.HasField("enumeration"):
1150      enum_using_statements.append(
1151          code_template.format_template(
1152              _TEMPLATES.enum_using_statement,
1153              component=_get_fully_qualified_name(subtype.name.canonical_name,
1154                                                  ir),
1155              name=_get_unqualified_name(subtype.name.canonical_name)))
1156
1157  # TODO(bolms): Reorder parameter fields to optimize packing in the view type.
1158  for parameter in type_ir.runtime_parameter:
1159    parameter_type = _cpp_basic_type_for_expression_type(parameter.type, ir)
1160    parameter_name = parameter.name.name.text
1161    parameter_fields.append("{} {}_;".format(parameter_type, parameter_name))
1162    constructor_parameters.append(
1163        "{} {}, ".format(parameter_type, parameter_name))
1164    forwarded_parameters.append("::std::forward</**/{}>({}),".format(
1165        parameter_type, parameter_name))
1166    parameter_initializers.append(", {0}_({0})".format(parameter_name))
1167    parameter_copy_initializers.append(
1168        ", {0}_(emboss_reserved_local_other.{0}_)".format(parameter_name))
1169
1170    field_method_declarations.append(
1171        code_template.format_template(
1172            _TEMPLATES.structure_single_parameter_field_method_declarations,
1173            name=parameter_name,
1174            logical_type=parameter_type))
1175    # TODO(bolms): Should parameters appear in text format?
1176    equals_method_clauses.append(
1177        code_template.format_template(_TEMPLATES.equals_method_test,
1178                                      field=parameter_name + "()"))
1179    unchecked_equals_method_clauses.append(
1180        code_template.format_template(_TEMPLATES.unchecked_equals_method_test,
1181                                      field=parameter_name + "()"))
1182  if type_ir.runtime_parameter:
1183    flag_name = "parameters_initialized_"
1184    parameter_copy_initializers.append(
1185        ", {0}(emboss_reserved_local_other.{0})".format(flag_name))
1186    parameters_initialized_flag = "bool {} = false;".format(flag_name)
1187    initialize_parameters_initialized_true = ", {}(true)".format(flag_name)
1188    parameter_checks = ["if (!{}) return false;".format(flag_name)]
1189  else:
1190    parameters_initialized_flag = ""
1191    initialize_parameters_initialized_true = ""
1192    parameter_checks = [""]
1193
1194  for field_index in type_ir.structure.fields_in_dependency_order:
1195    field = type_ir.structure.field[field_index]
1196    helper_types, declaration, definition = (
1197        _generate_structure_field_methods(
1198            type_name, field, ir, type_ir.addressable_unit))
1199    field_helper_type_definitions.append(helper_types)
1200    field_method_definitions.append(definition)
1201    ok_method_clauses.append(
1202        code_template.format_template(
1203            _TEMPLATES.ok_method_test,
1204            field=_cpp_field_name(field.name.name.text) + "()"))
1205    if not ir_util.field_is_virtual(field):
1206      # Virtual fields do not participate in equality tests -- they are equal by
1207      # definition.
1208      equals_method_clauses.append(
1209          code_template.format_template(
1210              _TEMPLATES.equals_method_test, field=field.name.name.text + "()"))
1211      unchecked_equals_method_clauses.append(
1212          code_template.format_template(
1213              _TEMPLATES.unchecked_equals_method_test,
1214              field=field.name.name.text + "()"))
1215    field_method_declarations.append(declaration)
1216    if not field.name.is_anonymous and not ir_util.field_is_read_only(field):
1217      # As above, read-only fields cannot be decoded from text format.
1218      decode_field_clauses.append(
1219          code_template.format_template(
1220              _TEMPLATES.decode_field,
1221              field_name=field.name.canonical_name.object_path[-1]))
1222    text_output_attr = ir_util.get_attribute(field.attribute, "text_output")
1223    if not text_output_attr or text_output_attr.string_constant == "Emit":
1224      if ir_util.field_is_read_only(field):
1225        write_field_template = _TEMPLATES.write_read_only_field_to_text_stream
1226      else:
1227        write_field_template = _TEMPLATES.write_field_to_text_stream
1228      write_field_clauses.append(
1229          code_template.format_template(
1230              write_field_template,
1231              field_name=field.name.canonical_name.object_path[-1]))
1232
1233  requires_attr = ir_util.get_attribute(type_ir.attribute, "requires")
1234  if requires_attr is not None:
1235    requires_clause = _render_expression(
1236        requires_attr.expression, ir, _DirectFieldRenderer()).rendered
1237    requires_check = ("    if (!({}).ValueOr(false))\n"
1238                      "      return false;").format(requires_clause)
1239  else:
1240    requires_check = ""
1241
1242  if config.include_enum_traits:
1243    text_stream_methods = code_template.format_template(
1244      _TEMPLATES.struct_text_stream,
1245      decode_fields="\n".join(decode_field_clauses),
1246      write_fields="\n".join(write_field_clauses))
1247  else:
1248    text_stream_methods = ""
1249
1250
1251  class_forward_declarations = code_template.format_template(
1252      _TEMPLATES.structure_view_declaration,
1253      name=type_name)
1254  class_bodies = code_template.format_template(
1255      _TEMPLATES.structure_view_class,
1256      name=type_ir.name.canonical_name.object_path[-1],
1257      size_method=_render_size_method(type_ir.structure.field, ir),
1258      field_method_declarations="".join(field_method_declarations),
1259      field_ok_checks="\n".join(ok_method_clauses),
1260      parameter_ok_checks="\n".join(parameter_checks),
1261      requires_check=requires_check,
1262      equals_method_body="\n".join(equals_method_clauses),
1263      unchecked_equals_method_body="\n".join(unchecked_equals_method_clauses),
1264      enum_usings="\n".join(enum_using_statements),
1265      text_stream_methods=text_stream_methods,
1266      parameter_fields="\n".join(parameter_fields),
1267      constructor_parameters="".join(constructor_parameters),
1268      forwarded_parameters="".join(forwarded_parameters),
1269      parameter_initializers="\n".join(parameter_initializers),
1270      parameter_copy_initializers="\n".join(parameter_copy_initializers),
1271      parameters_initialized_flag=parameters_initialized_flag,
1272      initialize_parameters_initialized_true=(
1273          initialize_parameters_initialized_true),
1274      units=units)
1275  method_definitions = "\n".join(field_method_definitions)
1276  early_virtual_field_types = "\n".join(virtual_field_type_definitions)
1277  all_field_helper_type_definitions = "\n".join(field_helper_type_definitions)
1278  return (early_virtual_field_types + subtype_forward_declarations +
1279          class_forward_declarations,
1280          all_field_helper_type_definitions + subtype_bodies + class_bodies,
1281          subtype_method_definitions + method_definitions)
1282
1283
1284def _split_enum_case_values_into_spans(enum_case_value):
1285  """Yields spans containing each enum case in an enum_case attribute value.
1286
1287  Each span is of the form (start, end), which is the start and end position
1288  relative to the beginning of the enum_case_value string. To keep the grammar
1289  of this attribute simple, this only splits on delimiters and trims whitespace
1290  for each case.
1291
1292  Example: 'SHOUTY_CASE, kCamelCase' -> [(0, 11), (13, 23)]"""
1293  # Scan the string from left to right, finding commas and trimming whitespace.
1294  # This is essentially equivalent to (x.trim() fror x in str.split(','))
1295  # except that this yields spans within the string rather than the strings
1296  # themselves, and no span is yielded for a trailing comma.
1297  start, end = 0, len(enum_case_value)
1298  while start <= end:
1299    # Find a ',' delimiter to split on
1300    delimiter = enum_case_value.find(',', start, end)
1301    if delimiter < 0:
1302      delimiter = end
1303
1304    substr_start = start
1305    substr_end = delimiter
1306
1307    # Drop leading whitespace
1308    while (substr_start < substr_end and
1309           enum_case_value[substr_start].isspace()):
1310      substr_start += 1
1311    # Drop trailing whitespace
1312    while (substr_start < substr_end and
1313           enum_case_value[substr_end - 1].isspace()):
1314      substr_end -= 1
1315
1316    # Skip a trailing comma
1317    if substr_start == end and start != 0:
1318      break
1319
1320    yield substr_start, substr_end
1321    start = delimiter + 1
1322
1323
1324def _split_enum_case_values(enum_case_value):
1325  """Returns all enum cases in an enum case value.
1326
1327  Example: 'SHOUTY_CASE, kCamelCase' -> ['SHOUTY_CASE', 'kCamelCase']"""
1328  return [enum_case_value[start:end] for start, end
1329          in _split_enum_case_values_into_spans(enum_case_value)]
1330
1331
1332def _get_enum_value_names(enum_value):
1333  """Determines one or more enum names based on attributes"""
1334  cases = ["SHOUTY_CASE"]
1335  name = enum_value.name.name.text
1336  if enum_case := ir_util.get_attribute(enum_value.attribute,
1337                                        attributes.Attribute.ENUM_CASE):
1338    cases = _split_enum_case_values(enum_case.string_constant.text)
1339  return [name_conversion.convert_case("SHOUTY_CASE", case, name)
1340            for case in cases]
1341
1342
1343def _generate_enum_definition(type_ir, include_traits=True):
1344  """Generates C++ for an Emboss enum."""
1345  enum_values = []
1346  enum_from_string_statements = []
1347  string_from_enum_statements = []
1348  enum_is_known_statements = []
1349  previously_seen_numeric_values = set()
1350  max_bits = ir_util.get_integer_attribute(type_ir.attribute, "maximum_bits")
1351  is_signed = ir_util.get_boolean_attribute(type_ir.attribute, "is_signed")
1352  enum_type = _cpp_integer_type_for_enum(max_bits, is_signed)
1353  for value in type_ir.enumeration.value:
1354    numeric_value = ir_util.constant_value(value.value)
1355    enum_value_names = _get_enum_value_names(value)
1356
1357    for enum_value_name in enum_value_names:
1358      enum_values.append(
1359          code_template.format_template(_TEMPLATES.enum_value,
1360                                        name=enum_value_name,
1361                                        value=_render_integer(numeric_value)))
1362      if include_traits:
1363        enum_from_string_statements.append(
1364            code_template.format_template(_TEMPLATES.enum_from_name_case,
1365                                          enum=type_ir.name.name.text,
1366                                          value=enum_value_name,
1367                                          name=value.name.name.text))
1368
1369        if numeric_value not in previously_seen_numeric_values:
1370          string_from_enum_statements.append(
1371              code_template.format_template(_TEMPLATES.name_from_enum_case,
1372                                            enum=type_ir.name.name.text,
1373                                            value=enum_value_name,
1374                                            name=value.name.name.text))
1375
1376          enum_is_known_statements.append(
1377              code_template.format_template(_TEMPLATES.enum_is_known_case,
1378                                            enum=type_ir.name.name.text,
1379                                            name=enum_value_name))
1380      previously_seen_numeric_values.add(numeric_value)
1381
1382  declaration = code_template.format_template(
1383          _TEMPLATES.enum_declaration,
1384          enum=type_ir.name.name.text,
1385          enum_type=enum_type)
1386  definition = code_template.format_template(
1387          _TEMPLATES.enum_definition,
1388          enum=type_ir.name.name.text,
1389          enum_type=enum_type,
1390          enum_values="".join(enum_values))
1391  if include_traits:
1392    definition += code_template.format_template(
1393          _TEMPLATES.enum_traits,
1394          enum=type_ir.name.name.text,
1395          enum_from_name_cases="\n".join(enum_from_string_statements),
1396          name_from_enum_cases="\n".join(string_from_enum_statements),
1397          enum_is_known_cases="\n".join(enum_is_known_statements))
1398
1399  return (declaration, definition, "")
1400
1401
1402def _generate_type_definition(type_ir, ir, config: Config):
1403  """Generates C++ for an Emboss type."""
1404  if type_ir.HasField("structure"):
1405    return _generate_structure_definition(type_ir, ir, config)
1406  elif type_ir.HasField("enumeration"):
1407    return _generate_enum_definition(type_ir, config.include_enum_traits)
1408  elif type_ir.HasField("external"):
1409    # TODO(bolms): This should probably generate an #include.
1410    return "", "", ""
1411  else:
1412    # TODO(bolms): provide error message instead of ICE
1413    assert False, "Unknown type {}".format(type_ir)
1414
1415
1416def _generate_header_guard(file_path):
1417  # TODO(bolms): Make this configurable.
1418  header_path = file_path + ".h"
1419  uppercased_path = header_path.upper()
1420  no_punctuation_path = re.sub(r"[^A-Za-z0-9_]", "_", uppercased_path)
1421  suffixed_path = no_punctuation_path + "_"
1422  no_double_underscore_path = re.sub(r"__+", "_", suffixed_path)
1423  return no_double_underscore_path
1424
1425
1426def _add_missing_enum_case_attribute_on_enum_value(enum_value, defaults):
1427  """Adds an `enum_case` attribute if there isn't one but a default is set."""
1428  if ir_util.get_attribute(enum_value.attribute,
1429                           attributes.Attribute.ENUM_CASE) is None:
1430    if attributes.Attribute.ENUM_CASE in defaults:
1431      enum_value.attribute.extend([defaults[attributes.Attribute.ENUM_CASE]])
1432
1433
1434def _propagate_defaults(ir, targets, ancestors, add_fn):
1435  """Propagates default values
1436
1437  Traverses the IR to propagate default values to target nodes.
1438
1439  Arguments:
1440    targets: A list of target IR types to add attributes to.
1441    ancestors: Ancestor types which may contain the default values.
1442    add_fn: Function to add the attribute. May use any parameter available in
1443      fast_traverse_ir_top_down actions as well as `defaults` containing the
1444      default attributes set by ancestors.
1445
1446  Returns:
1447    None
1448  """
1449  traverse_ir.fast_traverse_ir_top_down(
1450    ir, targets, add_fn,
1451    incidental_actions={
1452      ancestor: attribute_util.gather_default_attributes
1453        for ancestor in ancestors
1454    },
1455    parameters={"defaults": {}})
1456
1457
1458def _offset_source_location_column(source_location, offset):
1459  """Adds offsets from the start column of the supplied source location
1460
1461  Returns a new source location with all of the same properties as the provided
1462  source location, but with the columns modified by offsets from the original
1463  start column.
1464
1465  Offset should be a tuple of (start, end), which are the offsets relative to
1466  source_location.start.column to set the new start.column and end.column."""
1467
1468  new_location = ir_data_utils.copy(source_location)
1469  new_location.start.column = source_location.start.column + offset[0]
1470  new_location.end.column = source_location.start.column + offset[1]
1471
1472  return new_location
1473
1474
1475def _verify_namespace_attribute(attr, source_file_name, errors):
1476  if attr.name.text != attributes.Attribute.NAMESPACE:
1477    return
1478  namespace_value = ir_data_utils.reader(attr).value.string_constant
1479  if not re.match(_NS_RE, namespace_value.text):
1480    if re.match(_NS_EMPTY_RE, namespace_value.text):
1481      errors.append([error.error(
1482          source_file_name, namespace_value.source_location,
1483          'Empty namespace value is not allowed.')])
1484    elif re.match(_NS_GLOBAL_RE, namespace_value.text):
1485      errors.append([error.error(
1486          source_file_name, namespace_value.source_location,
1487          'Global namespace is not allowed.')])
1488    else:
1489      errors.append([error.error(
1490          source_file_name, namespace_value.source_location,
1491          'Invalid namespace, must be a valid C++ namespace, such as "abc", '
1492          '"abc::def", or "::abc::def::ghi" (ISO/IEC 14882:2017 '
1493          'enclosing-namespace-specifier).')])
1494    return
1495  for word in _get_namespace_components(namespace_value.text):
1496    if word in _CPP_RESERVED_WORDS:
1497      errors.append([error.error(
1498          source_file_name, namespace_value.source_location,
1499          f'Reserved word "{word}" is not allowed as a namespace component.'
1500      )])
1501
1502
1503def _verify_enum_case_attribute(attr, source_file_name, errors):
1504  """Verify that `enum_case` values are supported."""
1505  if attr.name.text != attributes.Attribute.ENUM_CASE:
1506    return
1507
1508  VALID_CASES = ', '.join(case for case in _SUPPORTED_ENUM_CASES)
1509  enum_case_value = attr.value.string_constant
1510  case_spans = _split_enum_case_values_into_spans(enum_case_value.text)
1511  seen_cases = set()
1512
1513  for start, end in case_spans:
1514    case_source_location = _offset_source_location_column(
1515        enum_case_value.source_location, (start, end))
1516    case = enum_case_value.text[start:end]
1517
1518    if start == end:
1519      errors.append([error.error(
1520          source_file_name, case_source_location,
1521          'Empty enum case (or excess comma).')])
1522      continue
1523
1524    if case in seen_cases:
1525      errors.append([error.error(
1526          source_file_name, case_source_location,
1527          f'Duplicate enum case "{case}".')])
1528      continue
1529    seen_cases.add(case)
1530
1531    if case not in _SUPPORTED_ENUM_CASES:
1532      errors.append([error.error(
1533          source_file_name, case_source_location,
1534          f'Unsupported enum case "{case}", '
1535          f'supported cases are: {VALID_CASES}.')])
1536
1537
1538def _verify_attribute_values(ir):
1539  """Verify backend attribute values."""
1540  errors = []
1541
1542  traverse_ir.fast_traverse_ir_top_down(
1543      ir, [ir_data.Attribute], _verify_namespace_attribute,
1544      parameters={"errors": errors})
1545  traverse_ir.fast_traverse_ir_top_down(
1546      ir, [ir_data.Attribute], _verify_enum_case_attribute,
1547      parameters={"errors": errors})
1548
1549  return errors
1550
1551
1552def _propagate_defaults_and_verify_attributes(ir):
1553  """Verify attributes and ensure defaults are set when not overridden.
1554
1555  Returns a list of errors if there are errors present, or an empty list if
1556  verification completed successfully."""
1557  if errors := attribute_util.check_attributes_in_ir(
1558          ir,
1559          back_end="cpp",
1560          types=attributes.TYPES,
1561          module_attributes=attributes.Scope.MODULE,
1562          struct_attributes=attributes.Scope.STRUCT,
1563          bits_attributes=attributes.Scope.BITS,
1564          enum_attributes=attributes.Scope.ENUM,
1565          enum_value_attributes=attributes.Scope.ENUM_VALUE):
1566    return errors
1567
1568  if errors := _verify_attribute_values(ir):
1569    return errors
1570
1571  # Ensure defaults are set on EnumValues for `enum_case`.
1572  _propagate_defaults(
1573      ir,
1574      targets=[ir_data.EnumValue],
1575      ancestors=[ir_data.Module, ir_data.TypeDefinition],
1576      add_fn=_add_missing_enum_case_attribute_on_enum_value)
1577
1578  return []
1579
1580
1581def generate_header(ir, config=Config()):
1582  """Generates a C++ header from an Emboss module.
1583
1584  Arguments:
1585    ir: An EmbossIr of the module.
1586
1587  Returns:
1588    A tuple of (header, errors), where `header` is either a string containing
1589    the text of a C++ header which implements Views for the types in the Emboss
1590    module, or None, and `errors` is a possibly-empty list of error messages to
1591    display to the user.
1592  """
1593  errors = _propagate_defaults_and_verify_attributes(ir)
1594  if errors:
1595    return None, errors
1596  type_declarations = []
1597  type_definitions = []
1598  method_definitions = []
1599  for type_definition in ir.module[0].type:
1600    declaration, definition, methods = _generate_type_definition(
1601        type_definition, ir, config)
1602    type_declarations.append(declaration)
1603    type_definitions.append(definition)
1604    method_definitions.append(methods)
1605  body = code_template.format_template(
1606      _TEMPLATES.body,
1607      type_declarations="".join(type_declarations),
1608      type_definitions="".join(type_definitions),
1609      method_definitions="".join(method_definitions))
1610  body = _wrap_in_namespace(body, _get_module_namespace(ir.module[0]))
1611  includes = _get_includes(ir.module[0], config)
1612  return code_template.format_template(
1613      _TEMPLATES.outline,
1614      includes=includes,
1615      body=body,
1616      header_guard=_generate_header_guard(ir.module[0].source_file_name)), []
1617