1# Copyright (C) 2018 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15 16"""Utility for ICU4C code generation""" 17 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import print_function 21 22import logging 23import os 24import site 25import sys 26import textwrap 27from collections import deque 28 29import jinja2 30 31THIS_DIR = os.path.dirname(os.path.realpath(__file__)) 32ANDROID_TOP = os.path.realpath(os.path.join(THIS_DIR, '../../../..')) 33 34JINJA_ENV = jinja2.Environment(loader=jinja2.FileSystemLoader( 35 os.path.join(THIS_DIR, 'jinja_templates'))) 36JINJA_ENV.trim_blocks = True 37JINJA_ENV.lstrip_blocks = True 38 39def generate_shim(functions, includes, suffix, template_file): 40 """Generates the library source file from the given functions.""" 41 data = { 42 'functions': functions, 43 'icu_headers': includes, 44 'suffix': suffix, 45 } 46 return JINJA_ENV.get_template(template_file).render(data) 47 48def generate_symbol_txt(shim_functions, extra_function_names, template_file): 49 """Generates the symbol txt file from the given functions.""" 50 data = { 51 # Each shim_function is given a suffix. 52 'shim_functions' : shim_functions, 53 # Each extra function name is included as given. 54 'extra_function_names': extra_function_names, 55 } 56 return JINJA_ENV.get_template(template_file).render(data) 57 58def get_jinja_env(): 59 """Return a jinja2 environment""" 60 return JINJA_ENV 61 62def get_allowlisted_apis(allowlist_file): 63 """Return all allowlisted API in allowlist_file""" 64 allowlisted_apis = set() 65 with open(os.path.join(THIS_DIR, allowlist_file), 'r') as file: 66 for line in file: 67 line = line.strip() 68 if line and not line.startswith("#"): 69 allowlisted_apis.add(line) 70 return allowlisted_apis 71 72def android_path(*args): 73 """Returns the absolute path to a directory within the Android tree.""" 74 return os.path.join(ANDROID_TOP, *args) 75 76 77def get_clang_path(): 78 """Find the latest clang version and return the full path""" 79 base_path = android_path('prebuilts/clang/host/linux-x86/') 80 files = [f for f in os.listdir(base_path) if f.startswith('clang-r')] 81 # TODO: Don't use sort() because it assumes the same number of digits in the version name 82 files.sort(reverse=True) 83 selected = files[0] 84 print("Using clang version %s" % selected) 85 path = os.path.join(base_path, selected) 86 return path 87 88 89def get_clang_lib_path(clang_path): 90 """Return the libclang.so path""" 91 base_path = os.path.join(clang_path, 'lib') 92 files = [f for f in os.listdir(base_path) if f.startswith('libclang.so')] 93 return os.path.join(base_path, files[0]) 94 95 96def get_clang_header_dir(clang_path): 97 """Return the path to clang header directory""" 98 base_path = os.path.join(clang_path, 'lib/clang/') 99 files = os.listdir(base_path) 100 return os.path.join(base_path, files[0], 'include/') 101 102 103CLANG_PATH = get_clang_path() 104CLANG_LIB_PATH = get_clang_lib_path(CLANG_PATH) 105CLANG_HEADER_PATH = get_clang_header_dir(CLANG_PATH) 106 107site.addsitedir(os.path.join(CLANG_PATH, 'lib/python3/site-packages/')) 108import clang.cindex # pylint: disable=import-error,wrong-import-position 109 110 111class Function: 112 """A visible function found in an ICU header.""" 113 114 def __init__(self, name, result_type, params, is_variadic, module): 115 self.name = name 116 self.result_type = result_type 117 self.params = params 118 self.is_variadic = is_variadic 119 self.va_list_insert_position = -1 120 121 # callee will be used in dlsym and may be identical to others for 122 # functions with variable argument lists. 123 self.callee = self.name 124 if self.is_variadic: 125 self.last_param = self.params[-1][1] 126 self.handle = 'handle_' + module 127 self.return_void = self.result_type == 'void' 128 129 @property 130 def param_str(self): 131 """Returns a string usable as a parameter list in a function decl.""" 132 params = [] 133 for param_type, param_name in self.params: 134 if '[' in param_type: 135 # `int foo[42]` will be a param_type of `int [42]` and a 136 # param_name of `foo`. We need to put these back in the right 137 # order. 138 param_name += param_type[param_type.find('['):] 139 param_type = param_type[:param_type.find('[')] 140 params.append('{} {}'.format(param_type, param_name)) 141 if self.is_variadic: 142 params.append('...') 143 return ', '.join(params) 144 145 @property 146 def arg_str(self): 147 """Returns a string usable as an argument list in a function call.""" 148 args = [] 149 for _, param_name in self.params: 150 args.append(param_name) 151 if self.is_variadic: 152 if self.va_list_insert_position >= 0: 153 args.insert(self.va_list_insert_position, 'args') 154 else: 155 raise ValueError(textwrap.dedent("""\ 156 {}({}) is variadic, but has no valid \ 157 inserted position""".format( 158 self.name, 159 self.param_str))) 160 return ', '.join(args) 161 162 def set_variadic_callee(self, callee, inserted_position): 163 """Set variadic callee with callee name and inserted position""" 164 if self.is_variadic: 165 self.callee = callee 166 self.va_list_insert_position = inserted_position 167 168 169def logger(): 170 """Returns the module level logger.""" 171 return logging.getLogger(__name__) 172 173 174class DeclaredFunctionsParser: 175 """Parser to get declared functions from ICU4C headers. """ 176 177 def __init__(self, decl_filters, allowlisted_decl_filter): 178 """ 179 Args: 180 decl_filters: A list of filters for declared functions. 181 allowlisted_decl_filter: A list of allowlisting filters for declared functions. 182 If the function is allowlisted here, the function will not filtered by the filter added 183 in decl_filters 184 """ 185 self.decl_filters = decl_filters 186 self.allowlisted_decl_filters = allowlisted_decl_filter 187 self.va_functions_mapping = {} 188 self.ignored_include_dependency = {} 189 190 # properties to store the parsing result 191 self.all_headers = [] 192 self.all_header_paths_to_copy = set() 193 self.all_declared_functions = [] 194 self.seen_functions = set() 195 self.all_header_to_function_names = {} 196 197 # Configures libclang to load in our environment 198 # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, etc. Note 199 # that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. 200 clang.cindex.Config.set_library_file(CLANG_LIB_PATH) 201 202 def set_va_functions_mapping(self, mapping): 203 """Set mapping from a variable argument function to an implementation. 204 205 Functions w/ variable argument lists (...) need special care to call 206 their corresponding v- versions that accept a va_list argument. Note that 207 although '...' will always appear as the last parameter, its v- version 208 may put the va_list arg in a different place. Hence we provide an index 209 to indicate the position. 210 e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of 211 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg 212 inserted as the 3rd argument.""" 213 self.va_functions_mapping = mapping 214 215 def set_ignored_include_dependency(self, mapping): 216 """ 217 A sample mapping is { "ulocdata.h" : [ "uloc.h", "ures.h" ] }. 218 The include dependencies will explicitly be ignored when producing header_paths_to_copy. 219 """ 220 self.ignored_include_dependency = mapping 221 222 @property 223 def header_includes(self): 224 """Return all headers declaring the functions returned in get_all_declared_functions. 225 226 If all functions in the header are filtered, the header is not included in here.""" 227 return [DeclaredFunctionsParser.short_header_path(header) for header in self.all_headers] 228 229 @property 230 def header_paths_to_copy(self): 231 """Return all headers needed to be copied""" 232 return self.all_header_paths_to_copy 233 234 @property 235 def declared_functions(self): 236 """Return all declared functions after filtering""" 237 return self.all_declared_functions 238 239 @property 240 def header_to_function_names(self): 241 """Return the mapping from the header file name to a list of function names in the file""" 242 return self.all_header_to_function_names 243 244 @staticmethod 245 def get_cflags(): 246 """Returns the cflags that should be used for parsing.""" 247 clang_flags = [ 248 '-x', 249 'c', 250 '-std=c99', 251 '-DU_DISABLE_RENAMING=1', 252 '-DU_SHOW_CPLUSPLUS_API=0', 253 '-DU_HIDE_DRAFT_API', 254 '-DU_HIDE_DEPRECATED_API', 255 '-DU_HIDE_INTERNAL_API', 256 '-DANDROID_LINK_SHARED_ICU4C', 257 ] 258 259 include_dirs = [ 260 CLANG_HEADER_PATH, 261 android_path('bionic/libc/include'), 262 android_path('external/icu/android_icu4c/include'), 263 android_path('external/icu/icu4c/source/common'), 264 android_path('external/icu/icu4c/source/i18n'), 265 ] 266 267 for include_dir in include_dirs: 268 clang_flags.append('-I' + include_dir) 269 return clang_flags 270 271 @staticmethod 272 def get_all_cpp_headers(): 273 """Return all C++ header names in icu4c/source/test/hdrtst/cxxfiles.txt""" 274 cpp_headers = [] 275 with open(android_path('external/icu/tools/icu4c_srcgen/cxxfiles.txt'), 'r') as file: 276 for line in file: 277 line = line.strip() 278 if not line.startswith("#"): 279 cpp_headers.append(line) 280 return cpp_headers 281 282 def parse(self): 283 """Parse the headers and collect the declared functions after filtering 284 and the headers containing the functions.""" 285 index = clang.cindex.Index.create() 286 287 icu_modules = ( 288 'common', 289 'i18n', 290 ) 291 header_dependencies = {} 292 for module in icu_modules: 293 path = android_path(android_path('external/icu/icu4c/source', module, 'unicode')) 294 files = [os.path.join(path, f) 295 for f in os.listdir(path) if f.endswith('.h')] 296 297 for file_path in files: 298 base_header_name = os.path.basename(file_path) 299 # Ignore C++ headers. 300 if base_header_name in DeclaredFunctionsParser.get_all_cpp_headers(): 301 continue 302 303 tunit = index.parse(file_path, DeclaredFunctionsParser.get_cflags()) 304 DeclaredFunctionsParser.handle_diagnostics(tunit) 305 header_dependencies[file_path] = [file_inclusion.include.name for file_inclusion 306 in tunit.get_includes()] 307 visible_functions = self.get_visible_functions( 308 tunit.cursor, module, file_path) 309 self.all_header_to_function_names[base_header_name] = \ 310 [f.name for f in visible_functions] 311 for function in visible_functions: 312 self.seen_functions.add(function.name) 313 self.all_declared_functions.append(function) 314 if visible_functions: 315 self.all_headers.append(file_path) 316 317 # Sort to produce an deterministic output 318 self.all_declared_functions = sorted(self.all_declared_functions, key=lambda f: f.name) 319 self.all_headers = sorted(self.all_headers) 320 321 # Build the headers required for using your restricted API set, and put the set into 322 # all_header_files_to_copy. 323 # header_dependencies is a map from icu4c header file path to a list of included headers. 324 # The key must be a ICU4C header, but the value could contain non-ICU4C headers, e.g. 325 # { 326 # ".../icu4c/source/common/unicode/utype.h": [ 327 # ".../icu4c/source/common/unicode/uversion.h", 328 # ".../bionic/libc/include/ctype.h", 329 # ], 330 # ... 331 # } 332 file_queue = deque() 333 file_processed = set() 334 for header in self.all_headers: 335 file_queue.appendleft(header) 336 self.all_header_paths_to_copy.add(header) 337 while file_queue: 338 file = file_queue.pop() 339 file_basename = os.path.basename(file) 340 if file in file_processed: 341 continue 342 file_processed.add(file) 343 for header in header_dependencies[file]: 344 header_basename = os.path.basename(header) 345 # Skip this header if this dependency is explicitly ignored 346 if file_basename in self.ignored_include_dependency and \ 347 header_basename in self.ignored_include_dependency[file_basename]: 348 continue 349 if header in header_dependencies: # Do not care non-icu4c headers 350 self.all_header_paths_to_copy.add(header) 351 file_queue.appendleft(header) 352 353 @staticmethod 354 def handle_diagnostics(tunit): 355 """Prints compiler diagnostics to stdout. Exits if errors occurred.""" 356 errors = 0 357 for diag in tunit.diagnostics: 358 if diag.severity == clang.cindex.Diagnostic.Fatal: 359 level = logging.CRITICAL 360 errors += 1 361 elif diag.severity == clang.cindex.Diagnostic.Error: 362 level = logging.ERROR 363 errors += 1 364 elif diag.severity == clang.cindex.Diagnostic.Warning: 365 level = logging.WARNING 366 elif diag.severity == clang.cindex.Diagnostic.Note: 367 level = logging.INFO 368 logger().log( 369 level, '%s:%s:%s %s', diag.location.file, diag.location.line, 370 diag.location.column, diag.spelling) 371 if errors: 372 sys.exit('Errors occurred during parsing. Exiting.') 373 374 def get_visible_functions(self, cursor, module, file_name): 375 """Returns a list of all visible functions in a header file.""" 376 functions = [] 377 for child in cursor.get_children(): 378 if self.should_process_decl(child, file_name): 379 functions.append(self.from_cursor(child, module)) 380 return functions 381 382 def should_process_decl(self, decl, file_name): 383 """Returns True if this function needs to be processed.""" 384 if decl.kind != clang.cindex.CursorKind.FUNCTION_DECL: 385 return False 386 if decl.location.file.name != file_name: 387 return False 388 if decl.spelling in self.seen_functions: 389 return False 390 if not DeclaredFunctionsParser.is_function_visible(decl): 391 return False 392 for allowlisted_decl_filter in self.allowlisted_decl_filters: 393 if allowlisted_decl_filter(decl): 394 return True 395 for decl_filter in self.decl_filters: 396 if not decl_filter(decl): 397 return False 398 return True 399 400 @staticmethod 401 def is_function_visible(decl): 402 """Returns True if the function has default visibility.""" 403 visible = False 404 vis_attrs = DeclaredFunctionsParser.get_children_by_kind( 405 decl, clang.cindex.CursorKind.VISIBILITY_ATTR) 406 for child in vis_attrs: 407 visible = child.spelling == 'default' 408 return visible 409 410 @staticmethod 411 def get_children_by_kind(cursor, kind): 412 """Returns a generator of cursor's children of a specific kind.""" 413 for child in cursor.get_children(): 414 if child.kind == kind: 415 yield child 416 417 @staticmethod 418 def short_header_path(name): 419 """Trim the given file name to 'unicode/xyz.h'.""" 420 return name[name.rfind('unicode/'):] 421 422 def from_cursor(self, cursor, module): 423 """Creates a Function object from the decl at the cursor.""" 424 if cursor.type.kind != clang.cindex.TypeKind.FUNCTIONPROTO: 425 raise ValueError(textwrap.dedent("""\ 426 {}'s type kind is {}, expected TypeKind.FUNCTIONPROTO. 427 {} Line {} Column {}""".format( 428 cursor.spelling, 429 cursor.type.kind, 430 cursor.location.file, 431 cursor.location.line, 432 cursor.location.column))) 433 434 name = cursor.spelling 435 result_type = cursor.result_type.spelling 436 is_variadic = cursor.type.is_function_variadic() 437 params = [] 438 for arg in cursor.get_arguments(): 439 params.append((arg.type.spelling, arg.spelling)) 440 function = Function(name, result_type, params, is_variadic, module) 441 # For variadic function, set the callee and va_list position 442 if function.is_variadic and function.name in self.va_functions_mapping: 443 va_func = self.va_functions_mapping[function.name] 444 function.set_variadic_callee(va_func[0], va_func[1]) 445 return function 446 447 448class StableDeclarationFilter: 449 """Return true if it's @stable API""" 450 def __call__(self, decl): 451 """Returns True if the given decl has a doxygen stable tag.""" 452 if not decl.raw_comment: 453 return False 454 if '@stable' in decl.raw_comment: 455 return True 456 return False 457 458 459class AllowlistedDeclarationFilter: 460 """A filter for allowlisting function declarations.""" 461 def __init__(self, allowlisted_function_names): 462 self.allowlisted_function_names = allowlisted_function_names 463 464 def __call__(self, decl): 465 """Returns True if the given decl is allowlisted""" 466 return decl.spelling in self.allowlisted_function_names 467 468 469class BlocklistedlistedDeclarationFilter: 470 """A filter for blocklisting function declarations.""" 471 def __init__(self, blocklisted_function_names): 472 self.blocklisted_function_names = blocklisted_function_names 473 474 def __call__(self, decl): 475 """Returns True if the given decl is nor blocklisted""" 476 return decl.spelling not in self.blocklisted_function_names 477 478 479# Functions w/ variable argument lists (...) need special care to call 480# their corresponding v- versions that accept a va_list argument. Note that 481# although '...' will always appear as the last parameter, its v- version 482# may put the va_list arg in a different place. Hence we provide an index 483# to indicate the position. 484# 485# e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of 486# 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg 487# inserted as the 3rd argument. 488 489# We need to insert the va_list (named args) at the position 490# indicated by the KNOWN_VA_FUNCTIONS map. 491KNOWN_VA_FUNCTIONS = { 492 'u_formatMessage': ('u_vformatMessage', 5), 493 'u_parseMessage': ('u_vparseMessage', 5), 494 'u_formatMessageWithError': ('u_vformatMessageWithError', 6), 495 'u_parseMessageWithError': ('u_vparseMessageWithError', 5), 496 'umsg_format': ('umsg_vformat', 3), 497 'umsg_parse': ('umsg_vparse', 4), 498 'utrace_format': ('utrace_vformat', 4), 499} 500 501# The following functions are not @stable 502ALLOWLISTED_FUNCTION_NAMES = ( 503 # Not intended to be called directly, but are used by @stable macros. 504 'utf8_nextCharSafeBody', 505 'utf8_appendCharSafeBody', 506 'utf8_prevCharSafeBody', 507 'utf8_back1SafeBody', 508) 509