xref: /aosp_15_r20/external/icu/tools/icu4c_srcgen/generate_ndk.py (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1#!/usr/bin/env -S python3 -B
2#
3# Copyright (C) 2018 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#            http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""Generate ICU stable C API wrapper source.
18
19
20This script parses all the header files specified by the ICU module names. For
21each function in the allowlist, it generates the NDK headers, and shim functions
22to shim.cpp, which in turn calls the real implementation at runtime.
23The tool relies on libclang to parse header files.
24
25Reference to ICU4C stable C APIs:
26http://icu-project.org/apiref/icu4c/files.html
27"""
28from __future__ import absolute_import
29from __future__ import print_function
30
31import logging
32import os
33import re
34import shutil
35import subprocess
36from pathlib import Path
37from typing import Dict
38
39from genutil import (
40    android_path,
41    generate_shim,
42    get_jinja_env,
43    get_allowlisted_apis,
44    AllowlistedDeclarationFilter,
45    DeclaredFunctionsParser,
46    StableDeclarationFilter,
47    THIS_DIR,
48)
49
50# No suffix for ndk shim
51SYMBOL_SUFFIX = ''
52
53SECRET_PROCESSING_TOKEN = "@@@SECRET@@@"
54
55DOC_BLOCK_COMMENT = r"\/\*\*(?:\*(?!\/)|[^*])*\*\/[ ]*\n"
56TILL_CLOSE_PARENTHESIS = r"[^)^;]*\)"
57STABLE_MACRO = r"(?:U_STABLE|U_CAPI)"
58STABLE_FUNCTION_DECLARATION = r"^(" + DOC_BLOCK_COMMENT + STABLE_MACRO \
59                              + TILL_CLOSE_PARENTHESIS + ");$"
60NONSTABLE_FUNCTION_DECLARATION = r"^(" + DOC_BLOCK_COMMENT + r"(U_INTERNAL|U_DEPRECATED|U_DRAFT)" \
61                                 + TILL_CLOSE_PARENTHESIS + ");$"
62
63REGEX_STABLE_FUNCTION_DECLARATION = re.compile(STABLE_FUNCTION_DECLARATION, re.MULTILINE)
64REGEX_NONSTABLE_FUNCTION_DECLARATION = re.compile(NONSTABLE_FUNCTION_DECLARATION, re.MULTILINE)
65
66API_LEVEL_MACRO_MAP = {
67    '31': '31',
68    'T': '__ANDROID_API_T__',
69}
70
71def get_allowlisted_regex_string(decl_names):
72    """Return a regex in string to capture the C function declarations in the decl_names list"""
73    tag = "|".join(decl_names)
74    return r"(" + DOC_BLOCK_COMMENT + STABLE_MACRO + r"[^(]*(?=" + tag + r")(" + tag + ")" \
75           + r"\("+ TILL_CLOSE_PARENTHESIS +");$"
76
77def get_replacement_adding_api_level_macro(api_level: str):
78    """Return the replacement string adding the NDK C macro
79    guarding C function declaration by the api_level"""
80    return r"\1 __INTRODUCED_IN({0});\n\n".format(api_level)
81
82def modify_func_declarations(src_path: str, dst_path: str,
83    exported_decl_api_map: Dict[str, str]):
84    """Process the source file,
85    remove the C function declarations not in the decl_names,
86    add guard the functions listed in decl_names by the API level,
87    and output to the dst_path """
88    decl_names = list(exported_decl_api_map.keys())
89    allowlist_regex_string = get_allowlisted_regex_string(decl_names)
90    allowlist_decl_regex = re.compile('^' + allowlist_regex_string, re.MULTILINE)
91    with open(src_path, "r") as file:
92        src = file.read()
93
94    # Remove all non-stable function declarations
95    modified = REGEX_NONSTABLE_FUNCTION_DECLARATION.sub('', src)
96
97    # Insert intermediate token to all functions in the allowlist
98    if decl_names:
99        modified = allowlist_decl_regex.sub(SECRET_PROCESSING_TOKEN + r"\1;", modified)
100    # Remove all other stable declarations not in the allowlist
101    modified = REGEX_STABLE_FUNCTION_DECLARATION.sub('', modified)
102
103    api_levels = list(set(exported_decl_api_map.values()))
104    for api_level in api_levels:
105        exported_decl_at_this_level = {key: value for key, value in
106                                       exported_decl_api_map.items()
107                                       if value == api_level }
108
109        # Insert C macro and annotation to indicate the API level to each functions
110        macro = API_LEVEL_MACRO_MAP[api_level]
111        decl_name_regex_string = get_allowlisted_regex_string(
112            list(exported_decl_at_this_level.keys()))
113        secret_allowlist_decl_regex = re.compile(
114            '^' + SECRET_PROCESSING_TOKEN + decl_name_regex_string,
115            re.MULTILINE)
116        modified = secret_allowlist_decl_regex.sub(
117            get_replacement_adding_api_level_macro(macro), modified)
118
119    with open(dst_path, "w") as out:
120        out.write(modified)
121
122def remove_ignored_includes(file_path, include_list):
123    """
124    Remove the included header, i.e. #include lines, listed in include_list from the file_path
125    header.
126    """
127
128    # Do nothing if the list is empty
129    if not include_list:
130        return
131
132    tag = "|".join(include_list)
133
134    with open(file_path, "r") as file:
135        content = file.read()
136
137    regex = re.compile(r"^#include \"unicode\/(" + tag + ")\"\n", re.MULTILINE)
138    content = regex.sub('', content)
139
140    with open(file_path, "w") as out:
141        out.write(content)
142
143def copy_header_only_files():
144    """Copy required header only files"""
145    base_src_path = android_path('external/icu/icu4c/source/')
146    base_dest_path = android_path('external/icu/libicu/ndk_headers/unicode/')
147    with open(android_path('external/icu/tools/icu4c_srcgen/libicu_required_header_only_files.txt'),
148              'r') as in_file:
149        header_only_files = [
150            base_src_path + line.strip() for line in in_file.readlines() if not line.startswith('#')
151        ]
152
153    for src_path in header_only_files:
154        dest_path = base_dest_path + os.path.basename(src_path)
155        cmd = ['sed',
156               "s/U_SHOW_CPLUSPLUS_API/LIBICU_U_SHOW_CPLUSPLUS_API/g",
157               src_path
158               ]
159
160        with open(dest_path, "w") as destfile:
161            subprocess.check_call(cmd, stdout=destfile)
162
163def copy_cts_headers():
164    """Copy headers from common/ and i18n/ to cts_headers/ for compiling cintltst as CTS."""
165    dst_folder = android_path('external/icu/libicu/cts_headers')
166    if os.path.exists(dst_folder):
167        shutil.rmtree(dst_folder)
168    os.mkdir(dst_folder)
169    os.mkdir(os.path.join(dst_folder, 'unicode'))
170
171    shutil.copyfile(android_path('external/icu/android_icu4c/include/uconfig_local.h'),
172                    android_path('external/icu/libicu/cts_headers/uconfig_local.h'))
173
174    header_subfolders = (
175        'common',
176        'common/unicode',
177        'i18n',
178        'i18n/unicode',
179    )
180    for subfolder in header_subfolders:
181        path = android_path('external/icu/icu4c/source', subfolder)
182        files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.h')]
183
184        for src_path in files:
185            base_header_name = os.path.basename(src_path)
186            dst_path = dst_folder
187            if subfolder.endswith('unicode'):
188                dst_path = os.path.join(dst_path, 'unicode')
189            dst_path = os.path.join(dst_path, base_header_name)
190
191            shutil.copyfile(src_path, dst_path)
192
193def get_rename_macro_regex(decl_names):
194    """Return a regex in string to capture the C macro defining the name in the decl_names list"""
195    tag = "|".join(decl_names)
196    return re.compile(r"^(#define (?:" + tag + r") .*)$", re.MULTILINE)
197
198def generate_cts_headers(decl_names):
199    """Generate headers for compiling cintltst as CTS."""
200    copy_cts_headers()
201
202    # Disable all C macro renaming the NDK functions in order to test the functions in the CTS
203    urename_path = android_path('external/icu/libicu/cts_headers/unicode/urename.h')
204    with open(urename_path, "r") as file:
205        src = file.read()
206
207    regex = get_rename_macro_regex(decl_names)
208    modified = regex.sub(r"// \1", src)
209
210    with open(urename_path, "w") as out:
211        out.write(modified)
212
213IGNORED_INCLUDE_DEPENDENCY = {
214    "ubrk.h": ["parseerr.h", ],
215    "ucol.h": ["uiter.h", "unorm.h", "uset.h", ],
216    "ulocdata.h": ["ures.h", "uset.h", ],
217    "unorm2.h": ["uset.h", ],
218    "ustring.h": ["uiter.h", ],
219    "utrans.h": ["uset.h", ],
220}
221
222IGNORED_HEADER_FOR_DOXYGEN_GROUPING = set([
223    "ubidi.h", # ubidi.h has the @{ ... @} group block already.
224    "uconfig.h", # pre-defined config that NDK users shouldn't change
225    "platform.h", # pre-defined variable not to be changed by the NDK users
226    "utf_old.h", # deprecated UTF macros
227    "uvernum.h", # ICU version information not useful for version-independent usage in NDK
228    "urename.h" # Renaming symbols, but not used in NDK
229])
230
231"""
232This map should mirror the mapping in external/icu/icu4c/source/Doxyfile.in.
233This is needed because NDK doesn't allow per-module Doxyfile,
234apart from the shared frameworks/native/docs/Doxyfile.
235"""
236DOXYGEN_ALIASES = {
237    "@memo": '\\par Note:\n',
238    "@draft": '\\xrefitem draft "Draft" "Draft List" This API may be changed in the future versions and was introduced in',
239    "@stable": '\\xrefitem stable "Stable" "Stable List"',
240    "@deprecated": '\\xrefitem deprecated "Deprecated" "Deprecated List"',
241    "@obsolete": '\\xrefitem obsolete "Obsolete" "Obsolete List"',
242    "@system": '\\xrefitem system "System" "System List" Do not use unless you know what you are doing.',
243    "@internal": '\\xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.',
244}
245
246def add_ndk_required_doxygen_grouping():
247    """Add @addtogroup annotation to the header files for NDK API docs"""
248    path = android_path('external/icu/libicu/ndk_headers/unicode')
249    files = Path(path).glob("*.h")
250
251    for src_path in files:
252        header_content = src_path.read_text()
253
254        for old, new in DOXYGEN_ALIASES.items():
255            header_content = header_content.replace(old, new)
256
257        src_path.write_text(header_content)
258
259        if os.path.basename(src_path) in IGNORED_HEADER_FOR_DOXYGEN_GROUPING:
260            continue
261
262        cmd_add_addtogroup_annotation = ['sed',
263               '-i',
264               '0,/^\( *\)\(\* *\\\\file\)/s//\\1* @addtogroup icu4c ICU4C\\n\\1* @{\\n\\1\\2/',
265               src_path
266               ]
267
268        subprocess.check_call(cmd_add_addtogroup_annotation)
269
270        # Next iteration if the above sed regex doesn't add the text
271        if not has_string_in_file(src_path, 'addtogroup'):
272            basename = os.path.basename(src_path)
273            print(f'Warning: unicode/{basename} has no "\\file" annotation')
274            continue
275
276        # Add the closing bracket for @addtogroup
277        with open(src_path, 'a') as header_file:
278            header_file.write('\n/** @} */ // addtogroup\n')
279
280def has_string_in_file(path, s):
281    """Return True if the a string exists in the file"""
282    with open(path, 'r') as file:
283        return s in file.read()
284
285def get_exported_symbol_map(export_file : str) -> Dict[str, str]:
286    """Return a dictionary mapping from the symbol name to API level in the
287    export_file"""
288    result_map = {}
289    with open(os.path.join(THIS_DIR, export_file), 'r') as file:
290        for line in file:
291            line = line.strip()
292            if line and not line.startswith("#"):
293                splits = line.split(',')
294                if len(splits) < 2:
295                    raise ValueError(f'line "{line}" has no , separator')
296                result_map[splits[0]] = splits[1]
297
298    return result_map
299
300
301def main():
302    """Parse the ICU4C headers and generate the shim libicu."""
303    logging.basicConfig(level=logging.DEBUG)
304
305    exported_symbol_map = get_exported_symbol_map('libicu_export.txt')
306    allowlisted_apis = set(exported_symbol_map.keys())
307    decl_filters = [StableDeclarationFilter()]
308    decl_filters.append(AllowlistedDeclarationFilter(allowlisted_apis))
309    parser = DeclaredFunctionsParser(decl_filters, [])
310    parser.set_ignored_include_dependency(IGNORED_INCLUDE_DEPENDENCY)
311
312    parser.parse()
313
314    includes = parser.header_includes
315    functions = parser.declared_functions
316    header_to_function_names = parser.header_to_function_names
317
318    # The shim has the allowlisted functions only
319    functions = [f for f in functions if f.name in allowlisted_apis]
320
321    headers_folder = android_path('external/icu/libicu/ndk_headers/unicode')
322    if os.path.exists(headers_folder):
323        shutil.rmtree(headers_folder)
324    os.mkdir(headers_folder)
325
326    with open(android_path('external/icu/libicu/src/shim.cpp'),
327              'w') as out_file:
328        out_file.write(generate_shim(functions, includes, SYMBOL_SUFFIX, 'libicu_shim.cpp.j2'))
329
330    with open(android_path('external/icu/libicu/libicu.map.txt'), 'w') as out_file:
331        data = {
332            'exported_symbol_map' : exported_symbol_map,
333        }
334        out_file.write(get_jinja_env().get_template('libicu.map.txt.j2').render(data))
335
336    # Process the C headers and put them into the ndk folder.
337    for src_path in parser.header_paths_to_copy:
338        basename = os.path.basename(src_path)
339        dst_path = os.path.join(headers_folder, basename)
340        exported_symbol_map_this_header = {
341            key: value for key, value in exported_symbol_map.items()
342            if key in header_to_function_names[basename]}
343        modify_func_declarations(src_path, dst_path, exported_symbol_map_this_header)
344        # Remove #include lines from the header files.
345        if basename in IGNORED_INCLUDE_DEPENDENCY:
346            remove_ignored_includes(dst_path, IGNORED_INCLUDE_DEPENDENCY[basename])
347
348    copy_header_only_files()
349
350    generate_cts_headers(allowlisted_apis)
351
352    add_ndk_required_doxygen_grouping()
353
354    # Apply documentation patches by the following shell script
355    subprocess.check_call(
356        [android_path('external/icu/tools/icu4c_srcgen/doc_patches/apply_patches.sh')])
357
358    print("Done. See the generated headers at libicu/ndk_headers/.")
359
360if __name__ == '__main__':
361    main()
362