xref: /aosp_15_r20/external/minijail/tools/generate_constants_json.py (revision 4b9c6d91573e8b3a96609339b46361b5476dd0f9)
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2019 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#      http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""Helper tool to generate cross-compiled syscall and constant tables to JSON.
18
19This script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and
20generates the `constants.json` file with that. LLVM IR files are moderately
21architecture-neutral (at least for this case).
22"""
23
24import argparse
25import collections
26import json
27import re
28import sys
29
30_STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*')
31_TABLE_ENTRY_RE = re.compile(
32    r'%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}')
33# This looks something like
34#
35#  i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5
36#
37# For arm-v7a. What we are interested in are the @.str.x and the very last
38# number.
39_TABLE_ENTRY_CONTENTS = re.compile(r'.*?(null|@[a-zA-Z0-9.]+).* (-?\d+)')
40
41# When testing clang-r458909, we found a new constant_entry pattern:
42#   %struct.constant_entry { ptr @.str.894, i32 ptrtoint (ptr @.str.895 to i32) },
43# For the same constant, current clang-r458507 generates:
44#   %struct.constant_entry { i8* getelementptr inbounds
45#    ([19 x i8], [19 x i8]* @.str.894, i32 0, i32 0),
46#    i32 ptrtoint ([9 x i8]* @.str.895 to i32) },
47# This is for a char* constant defined in linux-x86/libconstants.gen.c:
48#   { "FS_KEY_DESC_PREFIX", (unsigned long) FS_KEY_DESC_PREFIX },
49# and FS_KEY_DESC_PREFIX is defined as a char* "fscrypt:"
50# Current output for that constant in constants.json is:
51#   "FS_KEY_DESC_PREFIX": 0,
52# but that value does not seem to be useful or accurate.
53# So here we define a pattern to ignore such pointer constants:
54_IGNORED_ENTRY_CONTENTS = re.compile(r'.*? ptrto.* \(.*\)')
55
56ParseResults = collections.namedtuple('ParseResults', ['table_name',
57                                                       'table_entries'])
58
59HELP_EPILOG = """Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c
60"""
61
62
63def parse_llvm_ir(ir):
64    """Parses a single LLVM IR file."""
65    string_constants = collections.OrderedDict()
66    table_entries = collections.OrderedDict()
67    table_name = ''
68    for line in ir:
69        string_constant_match = _STRING_CONSTANT_RE.match(line)
70        if string_constant_match:
71            string_constants[string_constant_match.group(
72                1)] = string_constant_match.group(2)
73            continue
74
75        if '@syscall_table' in line or '@constant_table' in line:
76            if '@syscall_table' in line:
77                table_name = 'syscalls'
78            else:
79                table_name = 'constants'
80            for entry in _TABLE_ENTRY_RE.findall(line):
81                groups = _TABLE_ENTRY_CONTENTS.match(entry)
82                if not groups:
83                    if _IGNORED_ENTRY_CONTENTS.match(entry):
84                        continue
85                    raise ValueError('Failed to parse table entry %r' % entry)
86                name, value = groups.groups()
87                if name == 'null':
88                    # This is the end-of-table marker.
89                    break
90                table_entries[string_constants[name]] = int(value)
91
92    return ParseResults(table_name=table_name, table_entries=table_entries)
93
94
95def main(argv=None):
96    """Main entrypoint."""
97
98    if argv is None:
99        argv = sys.argv[1:]
100
101    parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG)
102    parser.add_argument('--output',
103                        help='The path of the generated constants.json file.',
104                        type=argparse.FileType('w'),
105                        required=True)
106    parser.add_argument(
107        'llvm_ir_files',
108        help='An LLVM IR file with one of the {constants,syscall} table.',
109        metavar='llvm_ir_file',
110        nargs='+',
111        type=argparse.FileType('r'))
112    opts = parser.parse_args(argv)
113
114    constants_json = {}
115    for ir in opts.llvm_ir_files:
116        parse_results = parse_llvm_ir(ir)
117        constants_json[parse_results.table_name] = parse_results.table_entries
118
119    # Populate the top-level fields.
120    constants_json['arch_nr'] = constants_json['constants']['MINIJAIL_ARCH_NR']
121    constants_json['bits'] = constants_json['constants']['MINIJAIL_ARCH_BITS']
122
123    # It is a bit more complicated to generate the arch_name, since the
124    # constants can only output numeric values. Use a hardcoded mapping instead.
125    if constants_json['arch_nr'] == 0xC000003E:
126        constants_json['arch_name'] = 'x86_64'
127    elif constants_json['arch_nr'] == 0x40000003:
128        constants_json['arch_name'] = 'x86'
129    elif constants_json['arch_nr'] == 0xC00000B7:
130        constants_json['arch_name'] = 'arm64'
131    elif constants_json['arch_nr'] == 0x40000028:
132        constants_json['arch_name'] = 'arm'
133    elif constants_json['arch_nr'] == 0xC00000F3:
134        constants_json['arch_name'] = 'riscv64'
135    else:
136        raise ValueError('Unknown architecture: 0x%08X' %
137                         constants_json['arch_nr'])
138
139    json.dump(constants_json, opts.output, indent='  ')
140    return 0
141
142
143if __name__ == '__main__':
144    sys.exit(main(sys.argv[1:]))
145