1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# 4# Copyright (C) 2019 The Android Open Source Project 5# 6# Licensed under the Apache License, Version 2.0 (the "License"); 7# you may not use this file except in compliance with the License. 8# You may obtain a copy of the License at 9# 10# http://www.apache.org/licenses/LICENSE-2.0 11# 12# Unless required by applicable law or agreed to in writing, software 13# distributed under the License is distributed on an "AS IS" BASIS, 14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15# See the License for the specific language governing permissions and 16# limitations under the License. 17"""Helper tool to generate cross-compiled syscall and constant tables to JSON. 18 19This script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and 20generates the `constants.json` file with that. LLVM IR files are moderately 21architecture-neutral (at least for this case). 22""" 23 24import argparse 25import collections 26import json 27import re 28import sys 29 30_STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*') 31_TABLE_ENTRY_RE = re.compile( 32 r'%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}') 33# This looks something like 34# 35# i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5 36# 37# For arm-v7a. What we are interested in are the @.str.x and the very last 38# number. 39_TABLE_ENTRY_CONTENTS = re.compile(r'.*?(null|@[a-zA-Z0-9.]+).* (-?\d+)') 40 41# When testing clang-r458909, we found a new constant_entry pattern: 42# %struct.constant_entry { ptr @.str.894, i32 ptrtoint (ptr @.str.895 to i32) }, 43# For the same constant, current clang-r458507 generates: 44# %struct.constant_entry { i8* getelementptr inbounds 45# ([19 x i8], [19 x i8]* @.str.894, i32 0, i32 0), 46# i32 ptrtoint ([9 x i8]* @.str.895 to i32) }, 47# This is for a char* constant defined in linux-x86/libconstants.gen.c: 48# { "FS_KEY_DESC_PREFIX", (unsigned long) FS_KEY_DESC_PREFIX }, 49# and FS_KEY_DESC_PREFIX is defined as a char* "fscrypt:" 50# Current output for that constant in constants.json is: 51# "FS_KEY_DESC_PREFIX": 0, 52# but that value does not seem to be useful or accurate. 53# So here we define a pattern to ignore such pointer constants: 54_IGNORED_ENTRY_CONTENTS = re.compile(r'.*? ptrto.* \(.*\)') 55 56ParseResults = collections.namedtuple('ParseResults', ['table_name', 57 'table_entries']) 58 59HELP_EPILOG = """Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c 60""" 61 62 63def parse_llvm_ir(ir): 64 """Parses a single LLVM IR file.""" 65 string_constants = collections.OrderedDict() 66 table_entries = collections.OrderedDict() 67 table_name = '' 68 for line in ir: 69 string_constant_match = _STRING_CONSTANT_RE.match(line) 70 if string_constant_match: 71 string_constants[string_constant_match.group( 72 1)] = string_constant_match.group(2) 73 continue 74 75 if '@syscall_table' in line or '@constant_table' in line: 76 if '@syscall_table' in line: 77 table_name = 'syscalls' 78 else: 79 table_name = 'constants' 80 for entry in _TABLE_ENTRY_RE.findall(line): 81 groups = _TABLE_ENTRY_CONTENTS.match(entry) 82 if not groups: 83 if _IGNORED_ENTRY_CONTENTS.match(entry): 84 continue 85 raise ValueError('Failed to parse table entry %r' % entry) 86 name, value = groups.groups() 87 if name == 'null': 88 # This is the end-of-table marker. 89 break 90 table_entries[string_constants[name]] = int(value) 91 92 return ParseResults(table_name=table_name, table_entries=table_entries) 93 94 95def main(argv=None): 96 """Main entrypoint.""" 97 98 if argv is None: 99 argv = sys.argv[1:] 100 101 parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG) 102 parser.add_argument('--output', 103 help='The path of the generated constants.json file.', 104 type=argparse.FileType('w'), 105 required=True) 106 parser.add_argument( 107 'llvm_ir_files', 108 help='An LLVM IR file with one of the {constants,syscall} table.', 109 metavar='llvm_ir_file', 110 nargs='+', 111 type=argparse.FileType('r')) 112 opts = parser.parse_args(argv) 113 114 constants_json = {} 115 for ir in opts.llvm_ir_files: 116 parse_results = parse_llvm_ir(ir) 117 constants_json[parse_results.table_name] = parse_results.table_entries 118 119 # Populate the top-level fields. 120 constants_json['arch_nr'] = constants_json['constants']['MINIJAIL_ARCH_NR'] 121 constants_json['bits'] = constants_json['constants']['MINIJAIL_ARCH_BITS'] 122 123 # It is a bit more complicated to generate the arch_name, since the 124 # constants can only output numeric values. Use a hardcoded mapping instead. 125 if constants_json['arch_nr'] == 0xC000003E: 126 constants_json['arch_name'] = 'x86_64' 127 elif constants_json['arch_nr'] == 0x40000003: 128 constants_json['arch_name'] = 'x86' 129 elif constants_json['arch_nr'] == 0xC00000B7: 130 constants_json['arch_name'] = 'arm64' 131 elif constants_json['arch_nr'] == 0x40000028: 132 constants_json['arch_name'] = 'arm' 133 elif constants_json['arch_nr'] == 0xC00000F3: 134 constants_json['arch_name'] = 'riscv64' 135 else: 136 raise ValueError('Unknown architecture: 0x%08X' % 137 constants_json['arch_nr']) 138 139 json.dump(constants_json, opts.output, indent=' ') 140 return 0 141 142 143if __name__ == '__main__': 144 sys.exit(main(sys.argv[1:])) 145