xref: /aosp_15_r20/external/minijail/tools/generate_constants_json.py (revision 4b9c6d91573e8b3a96609339b46361b5476dd0f9)
1*4b9c6d91SCole Faust#!/usr/bin/env python3
2*4b9c6d91SCole Faust# -*- coding: utf-8 -*-
3*4b9c6d91SCole Faust#
4*4b9c6d91SCole Faust# Copyright (C) 2019 The Android Open Source Project
5*4b9c6d91SCole Faust#
6*4b9c6d91SCole Faust# Licensed under the Apache License, Version 2.0 (the "License");
7*4b9c6d91SCole Faust# you may not use this file except in compliance with the License.
8*4b9c6d91SCole Faust# You may obtain a copy of the License at
9*4b9c6d91SCole Faust#
10*4b9c6d91SCole Faust#      http://www.apache.org/licenses/LICENSE-2.0
11*4b9c6d91SCole Faust#
12*4b9c6d91SCole Faust# Unless required by applicable law or agreed to in writing, software
13*4b9c6d91SCole Faust# distributed under the License is distributed on an "AS IS" BASIS,
14*4b9c6d91SCole Faust# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15*4b9c6d91SCole Faust# See the License for the specific language governing permissions and
16*4b9c6d91SCole Faust# limitations under the License.
17*4b9c6d91SCole Faust"""Helper tool to generate cross-compiled syscall and constant tables to JSON.
18*4b9c6d91SCole Faust
19*4b9c6d91SCole FaustThis script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and
20*4b9c6d91SCole Faustgenerates the `constants.json` file with that. LLVM IR files are moderately
21*4b9c6d91SCole Faustarchitecture-neutral (at least for this case).
22*4b9c6d91SCole Faust"""
23*4b9c6d91SCole Faust
24*4b9c6d91SCole Faustimport argparse
25*4b9c6d91SCole Faustimport collections
26*4b9c6d91SCole Faustimport json
27*4b9c6d91SCole Faustimport re
28*4b9c6d91SCole Faustimport sys
29*4b9c6d91SCole Faust
30*4b9c6d91SCole Faust_STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*')
31*4b9c6d91SCole Faust_TABLE_ENTRY_RE = re.compile(
32*4b9c6d91SCole Faust    r'%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}')
33*4b9c6d91SCole Faust# This looks something like
34*4b9c6d91SCole Faust#
35*4b9c6d91SCole Faust#  i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5
36*4b9c6d91SCole Faust#
37*4b9c6d91SCole Faust# For arm-v7a. What we are interested in are the @.str.x and the very last
38*4b9c6d91SCole Faust# number.
39*4b9c6d91SCole Faust_TABLE_ENTRY_CONTENTS = re.compile(r'.*?(null|@[a-zA-Z0-9.]+).* (-?\d+)')
40*4b9c6d91SCole Faust
41*4b9c6d91SCole Faust# When testing clang-r458909, we found a new constant_entry pattern:
42*4b9c6d91SCole Faust#   %struct.constant_entry { ptr @.str.894, i32 ptrtoint (ptr @.str.895 to i32) },
43*4b9c6d91SCole Faust# For the same constant, current clang-r458507 generates:
44*4b9c6d91SCole Faust#   %struct.constant_entry { i8* getelementptr inbounds
45*4b9c6d91SCole Faust#    ([19 x i8], [19 x i8]* @.str.894, i32 0, i32 0),
46*4b9c6d91SCole Faust#    i32 ptrtoint ([9 x i8]* @.str.895 to i32) },
47*4b9c6d91SCole Faust# This is for a char* constant defined in linux-x86/libconstants.gen.c:
48*4b9c6d91SCole Faust#   { "FS_KEY_DESC_PREFIX", (unsigned long) FS_KEY_DESC_PREFIX },
49*4b9c6d91SCole Faust# and FS_KEY_DESC_PREFIX is defined as a char* "fscrypt:"
50*4b9c6d91SCole Faust# Current output for that constant in constants.json is:
51*4b9c6d91SCole Faust#   "FS_KEY_DESC_PREFIX": 0,
52*4b9c6d91SCole Faust# but that value does not seem to be useful or accurate.
53*4b9c6d91SCole Faust# So here we define a pattern to ignore such pointer constants:
54*4b9c6d91SCole Faust_IGNORED_ENTRY_CONTENTS = re.compile(r'.*? ptrto.* \(.*\)')
55*4b9c6d91SCole Faust
56*4b9c6d91SCole FaustParseResults = collections.namedtuple('ParseResults', ['table_name',
57*4b9c6d91SCole Faust                                                       'table_entries'])
58*4b9c6d91SCole Faust
59*4b9c6d91SCole FaustHELP_EPILOG = """Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c
60*4b9c6d91SCole Faust"""
61*4b9c6d91SCole Faust
62*4b9c6d91SCole Faust
63*4b9c6d91SCole Faustdef parse_llvm_ir(ir):
64*4b9c6d91SCole Faust    """Parses a single LLVM IR file."""
65*4b9c6d91SCole Faust    string_constants = collections.OrderedDict()
66*4b9c6d91SCole Faust    table_entries = collections.OrderedDict()
67*4b9c6d91SCole Faust    table_name = ''
68*4b9c6d91SCole Faust    for line in ir:
69*4b9c6d91SCole Faust        string_constant_match = _STRING_CONSTANT_RE.match(line)
70*4b9c6d91SCole Faust        if string_constant_match:
71*4b9c6d91SCole Faust            string_constants[string_constant_match.group(
72*4b9c6d91SCole Faust                1)] = string_constant_match.group(2)
73*4b9c6d91SCole Faust            continue
74*4b9c6d91SCole Faust
75*4b9c6d91SCole Faust        if '@syscall_table' in line or '@constant_table' in line:
76*4b9c6d91SCole Faust            if '@syscall_table' in line:
77*4b9c6d91SCole Faust                table_name = 'syscalls'
78*4b9c6d91SCole Faust            else:
79*4b9c6d91SCole Faust                table_name = 'constants'
80*4b9c6d91SCole Faust            for entry in _TABLE_ENTRY_RE.findall(line):
81*4b9c6d91SCole Faust                groups = _TABLE_ENTRY_CONTENTS.match(entry)
82*4b9c6d91SCole Faust                if not groups:
83*4b9c6d91SCole Faust                    if _IGNORED_ENTRY_CONTENTS.match(entry):
84*4b9c6d91SCole Faust                        continue
85*4b9c6d91SCole Faust                    raise ValueError('Failed to parse table entry %r' % entry)
86*4b9c6d91SCole Faust                name, value = groups.groups()
87*4b9c6d91SCole Faust                if name == 'null':
88*4b9c6d91SCole Faust                    # This is the end-of-table marker.
89*4b9c6d91SCole Faust                    break
90*4b9c6d91SCole Faust                table_entries[string_constants[name]] = int(value)
91*4b9c6d91SCole Faust
92*4b9c6d91SCole Faust    return ParseResults(table_name=table_name, table_entries=table_entries)
93*4b9c6d91SCole Faust
94*4b9c6d91SCole Faust
95*4b9c6d91SCole Faustdef main(argv=None):
96*4b9c6d91SCole Faust    """Main entrypoint."""
97*4b9c6d91SCole Faust
98*4b9c6d91SCole Faust    if argv is None:
99*4b9c6d91SCole Faust        argv = sys.argv[1:]
100*4b9c6d91SCole Faust
101*4b9c6d91SCole Faust    parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG)
102*4b9c6d91SCole Faust    parser.add_argument('--output',
103*4b9c6d91SCole Faust                        help='The path of the generated constants.json file.',
104*4b9c6d91SCole Faust                        type=argparse.FileType('w'),
105*4b9c6d91SCole Faust                        required=True)
106*4b9c6d91SCole Faust    parser.add_argument(
107*4b9c6d91SCole Faust        'llvm_ir_files',
108*4b9c6d91SCole Faust        help='An LLVM IR file with one of the {constants,syscall} table.',
109*4b9c6d91SCole Faust        metavar='llvm_ir_file',
110*4b9c6d91SCole Faust        nargs='+',
111*4b9c6d91SCole Faust        type=argparse.FileType('r'))
112*4b9c6d91SCole Faust    opts = parser.parse_args(argv)
113*4b9c6d91SCole Faust
114*4b9c6d91SCole Faust    constants_json = {}
115*4b9c6d91SCole Faust    for ir in opts.llvm_ir_files:
116*4b9c6d91SCole Faust        parse_results = parse_llvm_ir(ir)
117*4b9c6d91SCole Faust        constants_json[parse_results.table_name] = parse_results.table_entries
118*4b9c6d91SCole Faust
119*4b9c6d91SCole Faust    # Populate the top-level fields.
120*4b9c6d91SCole Faust    constants_json['arch_nr'] = constants_json['constants']['MINIJAIL_ARCH_NR']
121*4b9c6d91SCole Faust    constants_json['bits'] = constants_json['constants']['MINIJAIL_ARCH_BITS']
122*4b9c6d91SCole Faust
123*4b9c6d91SCole Faust    # It is a bit more complicated to generate the arch_name, since the
124*4b9c6d91SCole Faust    # constants can only output numeric values. Use a hardcoded mapping instead.
125*4b9c6d91SCole Faust    if constants_json['arch_nr'] == 0xC000003E:
126*4b9c6d91SCole Faust        constants_json['arch_name'] = 'x86_64'
127*4b9c6d91SCole Faust    elif constants_json['arch_nr'] == 0x40000003:
128*4b9c6d91SCole Faust        constants_json['arch_name'] = 'x86'
129*4b9c6d91SCole Faust    elif constants_json['arch_nr'] == 0xC00000B7:
130*4b9c6d91SCole Faust        constants_json['arch_name'] = 'arm64'
131*4b9c6d91SCole Faust    elif constants_json['arch_nr'] == 0x40000028:
132*4b9c6d91SCole Faust        constants_json['arch_name'] = 'arm'
133*4b9c6d91SCole Faust    elif constants_json['arch_nr'] == 0xC00000F3:
134*4b9c6d91SCole Faust        constants_json['arch_name'] = 'riscv64'
135*4b9c6d91SCole Faust    else:
136*4b9c6d91SCole Faust        raise ValueError('Unknown architecture: 0x%08X' %
137*4b9c6d91SCole Faust                         constants_json['arch_nr'])
138*4b9c6d91SCole Faust
139*4b9c6d91SCole Faust    json.dump(constants_json, opts.output, indent='  ')
140*4b9c6d91SCole Faust    return 0
141*4b9c6d91SCole Faust
142*4b9c6d91SCole Faust
143*4b9c6d91SCole Faustif __name__ == '__main__':
144*4b9c6d91SCole Faust    sys.exit(main(sys.argv[1:]))
145