xref: /aosp_15_r20/external/icu/tools/icu4c_srcgen/genutil.py (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1# Copyright (C) 2018 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#            http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16"""Utility for ICU4C code generation"""
17
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import print_function
21
22import logging
23import os
24import site
25import sys
26import textwrap
27from collections import deque
28
29import jinja2
30
31THIS_DIR = os.path.dirname(os.path.realpath(__file__))
32ANDROID_TOP = os.path.realpath(os.path.join(THIS_DIR, '../../../..'))
33
34JINJA_ENV = jinja2.Environment(loader=jinja2.FileSystemLoader(
35    os.path.join(THIS_DIR, 'jinja_templates')))
36JINJA_ENV.trim_blocks = True
37JINJA_ENV.lstrip_blocks = True
38
39def generate_shim(functions, includes, suffix, template_file):
40    """Generates the library source file from the given functions."""
41    data = {
42        'functions': functions,
43        'icu_headers': includes,
44        'suffix': suffix,
45    }
46    return JINJA_ENV.get_template(template_file).render(data)
47
48def generate_symbol_txt(shim_functions, extra_function_names, template_file):
49    """Generates the symbol txt file from the given functions."""
50    data = {
51        # Each shim_function is given a suffix.
52        'shim_functions' : shim_functions,
53        # Each extra function name is included as given.
54        'extra_function_names': extra_function_names,
55    }
56    return JINJA_ENV.get_template(template_file).render(data)
57
58def get_jinja_env():
59    """Return a jinja2 environment"""
60    return JINJA_ENV
61
62def get_allowlisted_apis(allowlist_file):
63    """Return all allowlisted API in allowlist_file"""
64    allowlisted_apis = set()
65    with open(os.path.join(THIS_DIR, allowlist_file), 'r') as file:
66        for line in file:
67            line = line.strip()
68            if line and not line.startswith("#"):
69                allowlisted_apis.add(line)
70    return allowlisted_apis
71
72def android_path(*args):
73    """Returns the absolute path to a directory within the Android tree."""
74    return os.path.join(ANDROID_TOP, *args)
75
76
77def get_clang_path():
78    """Find the latest clang version and return the full path"""
79    base_path = android_path('prebuilts/clang/host/linux-x86/')
80    files = [f for f in os.listdir(base_path) if f.startswith('clang-r')]
81    # TODO: Don't use sort() because it assumes the same number of digits in the version name
82    files.sort(reverse=True)
83    selected = files[0]
84    print("Using clang version %s" % selected)
85    path = os.path.join(base_path, selected)
86    return path
87
88
89def get_clang_lib_path(clang_path):
90    """Return the libclang.so path"""
91    base_path = os.path.join(clang_path, 'lib')
92    files = [f for f in os.listdir(base_path) if f.startswith('libclang.so')]
93    return os.path.join(base_path, files[0])
94
95
96def get_clang_header_dir(clang_path):
97    """Return the path to clang header directory"""
98    base_path = os.path.join(clang_path, 'lib/clang/')
99    files = os.listdir(base_path)
100    return os.path.join(base_path, files[0], 'include/')
101
102
103CLANG_PATH = get_clang_path()
104CLANG_LIB_PATH = get_clang_lib_path(CLANG_PATH)
105CLANG_HEADER_PATH = get_clang_header_dir(CLANG_PATH)
106
107site.addsitedir(os.path.join(CLANG_PATH, 'lib/python3/site-packages/'))
108import clang.cindex  # pylint: disable=import-error,wrong-import-position
109
110
111class Function:
112    """A visible function found in an ICU header."""
113
114    def __init__(self, name, result_type, params, is_variadic, module):
115        self.name = name
116        self.result_type = result_type
117        self.params = params
118        self.is_variadic = is_variadic
119        self.va_list_insert_position = -1
120
121        # callee will be used in dlsym and may be identical to others for
122        # functions with variable argument lists.
123        self.callee = self.name
124        if self.is_variadic:
125            self.last_param = self.params[-1][1]
126        self.handle = 'handle_' + module
127        self.return_void = self.result_type == 'void'
128
129    @property
130    def param_str(self):
131        """Returns a string usable as a parameter list in a function decl."""
132        params = []
133        for param_type, param_name in self.params:
134            if '[' in param_type:
135                # `int foo[42]` will be a param_type of `int [42]` and a
136                # param_name of `foo`. We need to put these back in the right
137                # order.
138                param_name += param_type[param_type.find('['):]
139                param_type = param_type[:param_type.find('[')]
140            params.append('{} {}'.format(param_type, param_name))
141        if self.is_variadic:
142            params.append('...')
143        return ', '.join(params)
144
145    @property
146    def arg_str(self):
147        """Returns a string usable as an argument list in a function call."""
148        args = []
149        for _, param_name in self.params:
150            args.append(param_name)
151        if self.is_variadic:
152            if self.va_list_insert_position >= 0:
153                args.insert(self.va_list_insert_position, 'args')
154            else:
155                raise ValueError(textwrap.dedent("""\
156                {}({}) is variadic, but has no valid \
157                inserted position""".format(
158                    self.name,
159                    self.param_str)))
160        return ', '.join(args)
161
162    def set_variadic_callee(self, callee, inserted_position):
163        """Set variadic callee with callee name and inserted position"""
164        if self.is_variadic:
165            self.callee = callee
166            self.va_list_insert_position = inserted_position
167
168
169def logger():
170    """Returns the module level logger."""
171    return logging.getLogger(__name__)
172
173
174class DeclaredFunctionsParser:
175    """Parser to get declared functions from ICU4C headers. """
176
177    def __init__(self, decl_filters, allowlisted_decl_filter):
178        """
179        Args:
180            decl_filters: A list of filters for declared functions.
181            allowlisted_decl_filter: A list of allowlisting filters for declared functions.
182            If the function is allowlisted here, the function will not filtered by the filter added
183            in decl_filters
184        """
185        self.decl_filters = decl_filters
186        self.allowlisted_decl_filters = allowlisted_decl_filter
187        self.va_functions_mapping = {}
188        self.ignored_include_dependency = {}
189
190        # properties to store the parsing result
191        self.all_headers = []
192        self.all_header_paths_to_copy = set()
193        self.all_declared_functions = []
194        self.seen_functions = set()
195        self.all_header_to_function_names = {}
196
197        # Configures libclang to load in our environment
198        # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, etc.  Note
199        # that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
200        clang.cindex.Config.set_library_file(CLANG_LIB_PATH)
201
202    def set_va_functions_mapping(self, mapping):
203        """Set mapping from a variable argument function to an implementation.
204
205        Functions w/ variable argument lists (...) need special care to call
206        their corresponding v- versions that accept a va_list argument. Note that
207        although '...' will always appear as the last parameter, its v- version
208        may put the va_list arg in a different place. Hence we provide an index
209        to indicate the position.
210        e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of
211        'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg
212        inserted as the 3rd argument."""
213        self.va_functions_mapping = mapping
214
215    def set_ignored_include_dependency(self, mapping):
216        """
217        A sample mapping is { "ulocdata.h" : [ "uloc.h", "ures.h" ] }.
218        The include dependencies will explicitly be ignored when producing header_paths_to_copy.
219        """
220        self.ignored_include_dependency = mapping
221
222    @property
223    def header_includes(self):
224        """Return all headers declaring the functions returned in get_all_declared_functions.
225
226        If all functions in the header are filtered, the header is not included in here."""
227        return [DeclaredFunctionsParser.short_header_path(header) for header in self.all_headers]
228
229    @property
230    def header_paths_to_copy(self):
231        """Return all headers needed to be copied"""
232        return self.all_header_paths_to_copy
233
234    @property
235    def declared_functions(self):
236        """Return all declared functions after filtering"""
237        return self.all_declared_functions
238
239    @property
240    def header_to_function_names(self):
241        """Return the mapping from the header file name to a list of function names in the file"""
242        return self.all_header_to_function_names
243
244    @staticmethod
245    def get_cflags():
246        """Returns the cflags that should be used for parsing."""
247        clang_flags = [
248            '-x',
249            'c',
250            '-std=c99',
251            '-DU_DISABLE_RENAMING=1',
252            '-DU_SHOW_CPLUSPLUS_API=0',
253            '-DU_HIDE_DRAFT_API',
254            '-DU_HIDE_DEPRECATED_API',
255            '-DU_HIDE_INTERNAL_API',
256            '-DANDROID_LINK_SHARED_ICU4C',
257        ]
258
259        include_dirs = [
260            CLANG_HEADER_PATH,
261            android_path('bionic/libc/include'),
262            android_path('external/icu/android_icu4c/include'),
263            android_path('external/icu/icu4c/source/common'),
264            android_path('external/icu/icu4c/source/i18n'),
265        ]
266
267        for include_dir in include_dirs:
268            clang_flags.append('-I' + include_dir)
269        return clang_flags
270
271    @staticmethod
272    def get_all_cpp_headers():
273        """Return all C++ header names in icu4c/source/test/hdrtst/cxxfiles.txt"""
274        cpp_headers = []
275        with open(android_path('external/icu/tools/icu4c_srcgen/cxxfiles.txt'), 'r') as file:
276            for line in file:
277                line = line.strip()
278                if not line.startswith("#"):
279                    cpp_headers.append(line)
280        return cpp_headers
281
282    def parse(self):
283        """Parse the headers and collect the declared functions after filtering
284        and the headers containing the functions."""
285        index = clang.cindex.Index.create()
286
287        icu_modules = (
288            'common',
289            'i18n',
290        )
291        header_dependencies = {}
292        for module in icu_modules:
293            path = android_path(android_path('external/icu/icu4c/source', module, 'unicode'))
294            files = [os.path.join(path, f)
295                     for f in os.listdir(path) if f.endswith('.h')]
296
297            for file_path in files:
298                base_header_name = os.path.basename(file_path)
299                # Ignore C++ headers.
300                if base_header_name in DeclaredFunctionsParser.get_all_cpp_headers():
301                    continue
302
303                tunit = index.parse(file_path, DeclaredFunctionsParser.get_cflags())
304                DeclaredFunctionsParser.handle_diagnostics(tunit)
305                header_dependencies[file_path] = [file_inclusion.include.name for file_inclusion
306                                                  in tunit.get_includes()]
307                visible_functions = self.get_visible_functions(
308                    tunit.cursor, module, file_path)
309                self.all_header_to_function_names[base_header_name] = \
310                    [f.name for f in visible_functions]
311                for function in visible_functions:
312                    self.seen_functions.add(function.name)
313                    self.all_declared_functions.append(function)
314                if visible_functions:
315                    self.all_headers.append(file_path)
316
317        # Sort to produce an deterministic output
318        self.all_declared_functions = sorted(self.all_declared_functions, key=lambda f: f.name)
319        self.all_headers = sorted(self.all_headers)
320
321        # Build the headers required for using your restricted API set, and put the set into
322        # all_header_files_to_copy.
323        # header_dependencies is a map from icu4c header file path to a list of included headers.
324        # The key must be a ICU4C header, but the value could contain non-ICU4C headers, e.g.
325        # {
326        #   ".../icu4c/source/common/unicode/utype.h": [
327        #      ".../icu4c/source/common/unicode/uversion.h",
328        #      ".../bionic/libc/include/ctype.h",
329        #    ],
330        #    ...
331        # }
332        file_queue = deque()
333        file_processed = set()
334        for header in self.all_headers:
335            file_queue.appendleft(header)
336            self.all_header_paths_to_copy.add(header)
337        while file_queue:
338            file = file_queue.pop()
339            file_basename = os.path.basename(file)
340            if file in file_processed:
341                continue
342            file_processed.add(file)
343            for header in header_dependencies[file]:
344                header_basename = os.path.basename(header)
345                # Skip this header if this dependency is explicitly ignored
346                if file_basename in self.ignored_include_dependency and \
347                    header_basename in self.ignored_include_dependency[file_basename]:
348                    continue
349                if header in header_dependencies:  # Do not care non-icu4c headers
350                    self.all_header_paths_to_copy.add(header)
351                    file_queue.appendleft(header)
352
353    @staticmethod
354    def handle_diagnostics(tunit):
355        """Prints compiler diagnostics to stdout. Exits if errors occurred."""
356        errors = 0
357        for diag in tunit.diagnostics:
358            if diag.severity == clang.cindex.Diagnostic.Fatal:
359                level = logging.CRITICAL
360                errors += 1
361            elif diag.severity == clang.cindex.Diagnostic.Error:
362                level = logging.ERROR
363                errors += 1
364            elif diag.severity == clang.cindex.Diagnostic.Warning:
365                level = logging.WARNING
366            elif diag.severity == clang.cindex.Diagnostic.Note:
367                level = logging.INFO
368            logger().log(
369                level, '%s:%s:%s %s', diag.location.file, diag.location.line,
370                diag.location.column, diag.spelling)
371        if errors:
372            sys.exit('Errors occurred during parsing. Exiting.')
373
374    def get_visible_functions(self, cursor, module, file_name):
375        """Returns a list of all visible functions in a header file."""
376        functions = []
377        for child in cursor.get_children():
378            if self.should_process_decl(child, file_name):
379                functions.append(self.from_cursor(child, module))
380        return functions
381
382    def should_process_decl(self, decl, file_name):
383        """Returns True if this function needs to be processed."""
384        if decl.kind != clang.cindex.CursorKind.FUNCTION_DECL:
385            return False
386        if decl.location.file.name != file_name:
387            return False
388        if decl.spelling in self.seen_functions:
389            return False
390        if not DeclaredFunctionsParser.is_function_visible(decl):
391            return False
392        for allowlisted_decl_filter in self.allowlisted_decl_filters:
393            if allowlisted_decl_filter(decl):
394                return True
395        for decl_filter in self.decl_filters:
396            if not decl_filter(decl):
397                return False
398        return True
399
400    @staticmethod
401    def is_function_visible(decl):
402        """Returns True if the function has default visibility."""
403        visible = False
404        vis_attrs = DeclaredFunctionsParser.get_children_by_kind(
405            decl, clang.cindex.CursorKind.VISIBILITY_ATTR)
406        for child in vis_attrs:
407            visible = child.spelling == 'default'
408        return visible
409
410    @staticmethod
411    def get_children_by_kind(cursor, kind):
412        """Returns a generator of cursor's children of a specific kind."""
413        for child in cursor.get_children():
414            if child.kind == kind:
415                yield child
416
417    @staticmethod
418    def short_header_path(name):
419        """Trim the given file name to 'unicode/xyz.h'."""
420        return name[name.rfind('unicode/'):]
421
422    def from_cursor(self, cursor, module):
423        """Creates a Function object from the decl at the cursor."""
424        if cursor.type.kind != clang.cindex.TypeKind.FUNCTIONPROTO:
425            raise ValueError(textwrap.dedent("""\
426                {}'s type kind is {}, expected TypeKind.FUNCTIONPROTO.
427                {} Line {} Column {}""".format(
428                    cursor.spelling,
429                    cursor.type.kind,
430                    cursor.location.file,
431                    cursor.location.line,
432                    cursor.location.column)))
433
434        name = cursor.spelling
435        result_type = cursor.result_type.spelling
436        is_variadic = cursor.type.is_function_variadic()
437        params = []
438        for arg in cursor.get_arguments():
439            params.append((arg.type.spelling, arg.spelling))
440        function = Function(name, result_type, params, is_variadic, module)
441        # For variadic function, set the callee and va_list position
442        if function.is_variadic and function.name in self.va_functions_mapping:
443            va_func = self.va_functions_mapping[function.name]
444            function.set_variadic_callee(va_func[0], va_func[1])
445        return function
446
447
448class StableDeclarationFilter:
449    """Return true if it's @stable API"""
450    def __call__(self, decl):
451        """Returns True if the given decl has a doxygen stable tag."""
452        if not decl.raw_comment:
453            return False
454        if '@stable' in decl.raw_comment:
455            return True
456        return False
457
458
459class AllowlistedDeclarationFilter:
460    """A filter for allowlisting function declarations."""
461    def __init__(self, allowlisted_function_names):
462        self.allowlisted_function_names = allowlisted_function_names
463
464    def __call__(self, decl):
465        """Returns True if the given decl is allowlisted"""
466        return decl.spelling in self.allowlisted_function_names
467
468
469class BlocklistedlistedDeclarationFilter:
470    """A filter for blocklisting function declarations."""
471    def __init__(self, blocklisted_function_names):
472        self.blocklisted_function_names = blocklisted_function_names
473
474    def __call__(self, decl):
475        """Returns True if the given decl is nor blocklisted"""
476        return decl.spelling not in self.blocklisted_function_names
477
478
479# Functions w/ variable argument lists (...) need special care to call
480# their corresponding v- versions that accept a va_list argument. Note that
481# although '...' will always appear as the last parameter, its v- version
482# may put the va_list arg in a different place. Hence we provide an index
483# to indicate the position.
484#
485# e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of
486# 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg
487# inserted as the 3rd argument.
488
489# We need to insert the va_list (named args) at the position
490# indicated by the KNOWN_VA_FUNCTIONS map.
491KNOWN_VA_FUNCTIONS = {
492    'u_formatMessage': ('u_vformatMessage', 5),
493    'u_parseMessage': ('u_vparseMessage', 5),
494    'u_formatMessageWithError': ('u_vformatMessageWithError', 6),
495    'u_parseMessageWithError': ('u_vparseMessageWithError', 5),
496    'umsg_format': ('umsg_vformat', 3),
497    'umsg_parse': ('umsg_vparse', 4),
498    'utrace_format': ('utrace_vformat', 4),
499}
500
501# The following functions are not @stable
502ALLOWLISTED_FUNCTION_NAMES = (
503    # Not intended to be called directly, but are used by @stable macros.
504    'utf8_nextCharSafeBody',
505    'utf8_appendCharSafeBody',
506    'utf8_prevCharSafeBody',
507    'utf8_back1SafeBody',
508)
509