1*4b9c6d91SCole Faust#!/usr/bin/env python3 2*4b9c6d91SCole Faust# -*- coding: utf-8 -*- 3*4b9c6d91SCole Faust# 4*4b9c6d91SCole Faust# Copyright (C) 2019 The Android Open Source Project 5*4b9c6d91SCole Faust# 6*4b9c6d91SCole Faust# Licensed under the Apache License, Version 2.0 (the "License"); 7*4b9c6d91SCole Faust# you may not use this file except in compliance with the License. 8*4b9c6d91SCole Faust# You may obtain a copy of the License at 9*4b9c6d91SCole Faust# 10*4b9c6d91SCole Faust# http://www.apache.org/licenses/LICENSE-2.0 11*4b9c6d91SCole Faust# 12*4b9c6d91SCole Faust# Unless required by applicable law or agreed to in writing, software 13*4b9c6d91SCole Faust# distributed under the License is distributed on an "AS IS" BASIS, 14*4b9c6d91SCole Faust# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15*4b9c6d91SCole Faust# See the License for the specific language governing permissions and 16*4b9c6d91SCole Faust# limitations under the License. 17*4b9c6d91SCole Faust"""Helper tool to generate cross-compiled syscall and constant tables to JSON. 18*4b9c6d91SCole Faust 19*4b9c6d91SCole FaustThis script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and 20*4b9c6d91SCole Faustgenerates the `constants.json` file with that. LLVM IR files are moderately 21*4b9c6d91SCole Faustarchitecture-neutral (at least for this case). 22*4b9c6d91SCole Faust""" 23*4b9c6d91SCole Faust 24*4b9c6d91SCole Faustimport argparse 25*4b9c6d91SCole Faustimport collections 26*4b9c6d91SCole Faustimport json 27*4b9c6d91SCole Faustimport re 28*4b9c6d91SCole Faustimport sys 29*4b9c6d91SCole Faust 30*4b9c6d91SCole Faust_STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*') 31*4b9c6d91SCole Faust_TABLE_ENTRY_RE = re.compile( 32*4b9c6d91SCole Faust r'%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}') 33*4b9c6d91SCole Faust# This looks something like 34*4b9c6d91SCole Faust# 35*4b9c6d91SCole Faust# i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5 36*4b9c6d91SCole Faust# 37*4b9c6d91SCole Faust# For arm-v7a. What we are interested in are the @.str.x and the very last 38*4b9c6d91SCole Faust# number. 39*4b9c6d91SCole Faust_TABLE_ENTRY_CONTENTS = re.compile(r'.*?(null|@[a-zA-Z0-9.]+).* (-?\d+)') 40*4b9c6d91SCole Faust 41*4b9c6d91SCole Faust# When testing clang-r458909, we found a new constant_entry pattern: 42*4b9c6d91SCole Faust# %struct.constant_entry { ptr @.str.894, i32 ptrtoint (ptr @.str.895 to i32) }, 43*4b9c6d91SCole Faust# For the same constant, current clang-r458507 generates: 44*4b9c6d91SCole Faust# %struct.constant_entry { i8* getelementptr inbounds 45*4b9c6d91SCole Faust# ([19 x i8], [19 x i8]* @.str.894, i32 0, i32 0), 46*4b9c6d91SCole Faust# i32 ptrtoint ([9 x i8]* @.str.895 to i32) }, 47*4b9c6d91SCole Faust# This is for a char* constant defined in linux-x86/libconstants.gen.c: 48*4b9c6d91SCole Faust# { "FS_KEY_DESC_PREFIX", (unsigned long) FS_KEY_DESC_PREFIX }, 49*4b9c6d91SCole Faust# and FS_KEY_DESC_PREFIX is defined as a char* "fscrypt:" 50*4b9c6d91SCole Faust# Current output for that constant in constants.json is: 51*4b9c6d91SCole Faust# "FS_KEY_DESC_PREFIX": 0, 52*4b9c6d91SCole Faust# but that value does not seem to be useful or accurate. 53*4b9c6d91SCole Faust# So here we define a pattern to ignore such pointer constants: 54*4b9c6d91SCole Faust_IGNORED_ENTRY_CONTENTS = re.compile(r'.*? ptrto.* \(.*\)') 55*4b9c6d91SCole Faust 56*4b9c6d91SCole FaustParseResults = collections.namedtuple('ParseResults', ['table_name', 57*4b9c6d91SCole Faust 'table_entries']) 58*4b9c6d91SCole Faust 59*4b9c6d91SCole FaustHELP_EPILOG = """Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c 60*4b9c6d91SCole Faust""" 61*4b9c6d91SCole Faust 62*4b9c6d91SCole Faust 63*4b9c6d91SCole Faustdef parse_llvm_ir(ir): 64*4b9c6d91SCole Faust """Parses a single LLVM IR file.""" 65*4b9c6d91SCole Faust string_constants = collections.OrderedDict() 66*4b9c6d91SCole Faust table_entries = collections.OrderedDict() 67*4b9c6d91SCole Faust table_name = '' 68*4b9c6d91SCole Faust for line in ir: 69*4b9c6d91SCole Faust string_constant_match = _STRING_CONSTANT_RE.match(line) 70*4b9c6d91SCole Faust if string_constant_match: 71*4b9c6d91SCole Faust string_constants[string_constant_match.group( 72*4b9c6d91SCole Faust 1)] = string_constant_match.group(2) 73*4b9c6d91SCole Faust continue 74*4b9c6d91SCole Faust 75*4b9c6d91SCole Faust if '@syscall_table' in line or '@constant_table' in line: 76*4b9c6d91SCole Faust if '@syscall_table' in line: 77*4b9c6d91SCole Faust table_name = 'syscalls' 78*4b9c6d91SCole Faust else: 79*4b9c6d91SCole Faust table_name = 'constants' 80*4b9c6d91SCole Faust for entry in _TABLE_ENTRY_RE.findall(line): 81*4b9c6d91SCole Faust groups = _TABLE_ENTRY_CONTENTS.match(entry) 82*4b9c6d91SCole Faust if not groups: 83*4b9c6d91SCole Faust if _IGNORED_ENTRY_CONTENTS.match(entry): 84*4b9c6d91SCole Faust continue 85*4b9c6d91SCole Faust raise ValueError('Failed to parse table entry %r' % entry) 86*4b9c6d91SCole Faust name, value = groups.groups() 87*4b9c6d91SCole Faust if name == 'null': 88*4b9c6d91SCole Faust # This is the end-of-table marker. 89*4b9c6d91SCole Faust break 90*4b9c6d91SCole Faust table_entries[string_constants[name]] = int(value) 91*4b9c6d91SCole Faust 92*4b9c6d91SCole Faust return ParseResults(table_name=table_name, table_entries=table_entries) 93*4b9c6d91SCole Faust 94*4b9c6d91SCole Faust 95*4b9c6d91SCole Faustdef main(argv=None): 96*4b9c6d91SCole Faust """Main entrypoint.""" 97*4b9c6d91SCole Faust 98*4b9c6d91SCole Faust if argv is None: 99*4b9c6d91SCole Faust argv = sys.argv[1:] 100*4b9c6d91SCole Faust 101*4b9c6d91SCole Faust parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG) 102*4b9c6d91SCole Faust parser.add_argument('--output', 103*4b9c6d91SCole Faust help='The path of the generated constants.json file.', 104*4b9c6d91SCole Faust type=argparse.FileType('w'), 105*4b9c6d91SCole Faust required=True) 106*4b9c6d91SCole Faust parser.add_argument( 107*4b9c6d91SCole Faust 'llvm_ir_files', 108*4b9c6d91SCole Faust help='An LLVM IR file with one of the {constants,syscall} table.', 109*4b9c6d91SCole Faust metavar='llvm_ir_file', 110*4b9c6d91SCole Faust nargs='+', 111*4b9c6d91SCole Faust type=argparse.FileType('r')) 112*4b9c6d91SCole Faust opts = parser.parse_args(argv) 113*4b9c6d91SCole Faust 114*4b9c6d91SCole Faust constants_json = {} 115*4b9c6d91SCole Faust for ir in opts.llvm_ir_files: 116*4b9c6d91SCole Faust parse_results = parse_llvm_ir(ir) 117*4b9c6d91SCole Faust constants_json[parse_results.table_name] = parse_results.table_entries 118*4b9c6d91SCole Faust 119*4b9c6d91SCole Faust # Populate the top-level fields. 120*4b9c6d91SCole Faust constants_json['arch_nr'] = constants_json['constants']['MINIJAIL_ARCH_NR'] 121*4b9c6d91SCole Faust constants_json['bits'] = constants_json['constants']['MINIJAIL_ARCH_BITS'] 122*4b9c6d91SCole Faust 123*4b9c6d91SCole Faust # It is a bit more complicated to generate the arch_name, since the 124*4b9c6d91SCole Faust # constants can only output numeric values. Use a hardcoded mapping instead. 125*4b9c6d91SCole Faust if constants_json['arch_nr'] == 0xC000003E: 126*4b9c6d91SCole Faust constants_json['arch_name'] = 'x86_64' 127*4b9c6d91SCole Faust elif constants_json['arch_nr'] == 0x40000003: 128*4b9c6d91SCole Faust constants_json['arch_name'] = 'x86' 129*4b9c6d91SCole Faust elif constants_json['arch_nr'] == 0xC00000B7: 130*4b9c6d91SCole Faust constants_json['arch_name'] = 'arm64' 131*4b9c6d91SCole Faust elif constants_json['arch_nr'] == 0x40000028: 132*4b9c6d91SCole Faust constants_json['arch_name'] = 'arm' 133*4b9c6d91SCole Faust elif constants_json['arch_nr'] == 0xC00000F3: 134*4b9c6d91SCole Faust constants_json['arch_name'] = 'riscv64' 135*4b9c6d91SCole Faust else: 136*4b9c6d91SCole Faust raise ValueError('Unknown architecture: 0x%08X' % 137*4b9c6d91SCole Faust constants_json['arch_nr']) 138*4b9c6d91SCole Faust 139*4b9c6d91SCole Faust json.dump(constants_json, opts.output, indent=' ') 140*4b9c6d91SCole Faust return 0 141*4b9c6d91SCole Faust 142*4b9c6d91SCole Faust 143*4b9c6d91SCole Faustif __name__ == '__main__': 144*4b9c6d91SCole Faust sys.exit(main(sys.argv[1:])) 145