xref: /aosp_15_r20/external/icu/tools/icuutil.py (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1# Copyright 2017 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Utility methods associated with ICU source and builds."""
16
17from __future__ import print_function
18
19import filecmp
20import glob
21import os
22import pathlib
23import shutil
24import subprocess
25import sys
26
27import i18nutil
28import ziputil
29
30
31# See https://github.com/unicode-org/icu/blob/main/docs/userguide/icu_data/buildtool.md
32# for the documentation.
33ICU_DATA_FILTERS = """{
34  "featureFilters": {
35    "misc": {
36      "excludelist": [
37        "metaZones",
38        "timezoneTypes",
39        "windowsZones",
40        "zoneinfo64"
41      ]
42    },
43    "brkitr_adaboost": {
44      "includelist": [
45        "jaml"
46      ]
47    }
48  }
49}
50"""
51
52ICU_MLDATA_FILTERS = """{
53  "featureFilters": {
54    "brkitr_adaboost": {
55      "includelist": [
56        "jaml"
57      ]
58    }
59  }
60}
61"""
62
63
64def cldrDir():
65  """Returns the location of CLDR in the Android source tree."""
66  android_build_top = i18nutil.GetAndroidRootOrDie()
67  cldr_dir = os.path.realpath('%s/external/cldr' % android_build_top)
68  i18nutil.CheckDirExists(cldr_dir, 'external/cldr')
69  return cldr_dir
70
71
72def icuDir():
73  """Returns the location of ICU in the Android source tree."""
74  android_build_top = i18nutil.GetAndroidRootOrDie()
75  icu_dir = os.path.realpath('%s/external/icu' % android_build_top)
76  i18nutil.CheckDirExists(icu_dir, 'external/icu')
77  return icu_dir
78
79
80def icu4cDir():
81  """Returns the location of ICU4C in the Android source tree."""
82  icu4c_dir = os.path.realpath('%s/icu4c/source' % icuDir())
83  i18nutil.CheckDirExists(icu4c_dir, 'external/icu/icu4c/source')
84  return icu4c_dir
85
86
87def icu4jDir():
88  """Returns the location of ICU4J in the Android source tree."""
89  icu4j_dir = os.path.realpath('%s/icu4j' % icuDir())
90  i18nutil.CheckDirExists(icu4j_dir, 'external/icu/icu4j')
91  return icu4j_dir
92
93
94def datFile(icu_build_dir):
95  """Returns the location of the ICU .dat file in the specified ICU build dir."""
96  dat_file_pattern = '%s/data/out/tmp/icudt??l.dat' % icu_build_dir
97  dat_files = glob.glob(dat_file_pattern)
98  if len(dat_files) != 1:
99    print('ERROR: Unexpectedly found %d .dat files (%s). Halting.' % (len(datfiles), datfiles))
100    sys.exit(1)
101  dat_file = dat_files[0]
102  return dat_file
103
104
105def PrepareIcuBuild(icu_build_dir, data_filters_json=None):
106  """Sets up an ICU build in the specified directory.
107
108  Creates the directory and runs "runConfigureICU Linux"
109  """
110  # Keep track of the original cwd so we can go back to it at the end.
111  original_working_dir = os.getcwd()
112
113  # Create a directory to run 'make' from.
114  if not os.path.exists(icu_build_dir):
115    os.mkdir(icu_build_dir)
116  os.chdir(icu_build_dir)
117
118  # Build the ICU tools.
119  print('Configuring ICU tools...')
120  cmd = ['env']
121  if data_filters_json is not None:
122    json_file_path = os.path.join(icu_build_dir, "icu4c_data_filters.json")
123    print("json path: %s" % json_file_path)
124    writeFileContent(json_file_path, data_filters_json)
125    cmd.append('ICU_DATA_FILTER_FILE=%s' % json_file_path)
126
127  cmd += ['ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data',
128          '%s/runConfigureICU' % icu4cDir(),
129          'Linux']
130  subprocess.check_call(cmd)
131
132  os.chdir(original_working_dir)
133
134def writeFileContent(file_path, file_content):
135  """Write a string into the file"""
136  with open(file_path, "w") as file:
137    file.write(file_content)
138
139def MakeTzDataFiles(icu_build_dir, iana_tar_file):
140  """Builds and runs the ICU tools in ${icu_Build_dir}/tools/tzcode.
141
142  The tools are run against the specified IANA tzdata .tar.gz.
143  The resulting zoneinfo64.txt is copied into the src directories.
144  """
145  tzcode_working_dir = '%s/tools/tzcode' % icu_build_dir
146
147  # Fix missing files.
148  # The tz2icu tool only picks up icuregions and icuzones if they are in the CWD
149  for icu_data_file in [ 'icuregions', 'icuzones']:
150    icu_data_file_source = '%s/tools/tzcode/%s' % (icu4cDir(), icu_data_file)
151    icu_data_file_symlink = '%s/%s' % (tzcode_working_dir, icu_data_file)
152    os.symlink(icu_data_file_source, icu_data_file_symlink)
153
154  iana_tar_filename = os.path.basename(iana_tar_file)
155  working_iana_tar_file = '%s/%s' % (tzcode_working_dir, iana_tar_filename)
156  shutil.copyfile(iana_tar_file, working_iana_tar_file)
157
158  print('Making ICU tz data files...')
159  # The Makefile assumes the existence of the bin directory.
160  os.mkdir('%s/bin' % icu_build_dir)
161
162  # -j1 is needed because the build is not parallelizable. http://b/109641429
163  subprocess.check_call(['make', '-j1', '-C', tzcode_working_dir])
164
165  # Copy the source file to its ultimate destination.
166  zoneinfo_file = '%s/zoneinfo64.txt' % tzcode_working_dir
167  icu_txt_data_dir = '%s/data/misc' % icu4cDir()
168  print('Copying zoneinfo64.txt to %s ...' % icu_txt_data_dir)
169  shutil.copy(zoneinfo_file, icu_txt_data_dir)
170
171
172def MakeAndCopyIcuDataFiles(icu_build_dir, copy_icu4c_dat_file_only=False):
173  """Builds the ICU .dat and .jar files using the current src data.
174
175  The files are copied back into the expected locations in the src tree.
176
177  This is a low-level method.
178  Please check :func:`GenerateIcuDataFiles()` for caveats.
179  """
180  # Keep track of the original cwd so we can go back to it at the end.
181  original_working_dir = os.getcwd()
182
183  # Regenerate the .dat file.
184  os.chdir(icu_build_dir)
185  subprocess.check_call(['make', '-j32'])
186
187  # Copy the .dat file to its ultimate destination.
188  icu_dat_data_dir = '%s/stubdata' % icu4cDir()
189  dat_file = datFile(icu_build_dir)
190
191  print('Copying %s to %s ...' % (dat_file, icu_dat_data_dir))
192  shutil.copy(dat_file, icu_dat_data_dir)
193
194  if copy_icu4c_dat_file_only:
195    return
196
197  # Generate the ICU4J .jar files
198  subprocess.check_call(['make', '-j32', 'icu4j-data'])
199
200  # Generate the test data in icu4c/source/test/testdata/out
201  subprocess.check_call(['make', '-j32', 'tests'])
202
203  # Copy the ICU4J .jar files to their ultimate destination.
204  CopyIcu4jDataFiles()
205
206  os.chdir(icu4jDir())
207  # os.path.basename(dat_file) is like icudt??l.dat
208  icu4j_data_ver = os.path.basename(dat_file)[:-5] + "b"
209  subprocess.check_call(['env', 'ICU_DATA_VER=' + icu4j_data_ver, './extract-data-files.sh'])
210  os.chdir(icu_build_dir)
211
212  testdata_out_dir = '%s/test/testdata/out' % icu4cDir()
213  print('Copying test data to %s ' % testdata_out_dir)
214  if os.path.exists(testdata_out_dir):
215    shutil.rmtree(testdata_out_dir)
216  shutil.copytree('test/testdata/out', testdata_out_dir)
217
218  # Switch back to the original working cwd.
219  os.chdir(original_working_dir)
220
221def CopyIcu4jDataFiles():
222  """Copy the ICU4J .jar files to their ultimate destination"""
223  icu_jar_data_dir = '%s/main/shared/data' % icu4jDir()
224  os.makedirs(icu_jar_data_dir, exist_ok=True)
225  jarfiles = glob.glob('data/out/icu4j/*.jar')
226  if len(jarfiles) != 3:
227    print('ERROR: Unexpectedly found %d .jar files (%s). Halting.' % (len(jarfiles), jarfiles))
228    sys.exit(1)
229  for jarfile in jarfiles:
230    icu_jarfile = os.path.join(icu_jar_data_dir, os.path.basename(jarfile))
231    if ziputil.ZipCompare(jarfile, icu_jarfile):
232      print('Ignoring %s which is identical to %s ...' % (jarfile, icu_jarfile))
233    else:
234      print('Copying %s to %s ...' % (jarfile, icu_jar_data_dir))
235      shutil.copy(jarfile, icu_jar_data_dir)
236
237def MakeAndCopyIcuTzFiles(icu_build_dir, res_dest_dir):
238  """Makes .res files containing just time zone data.
239
240  They provide time zone data only: some strings like translated
241  time zone names will be missing, but rules will be correct.
242  """
243
244  # Keep track of the original cwd so we can go back to it at the end.
245  original_working_dir = os.getcwd()
246
247  # Regenerate the .res files.
248  os.chdir(icu_build_dir)
249  subprocess.check_call(['make', '-j32'])
250
251  # The list of ICU resources needed for time zone data overlays.
252  tz_res_names = [
253          'metaZones.res',
254          'timezoneTypes.res',
255          'windowsZones.res',
256          'zoneinfo64.res',
257  ]
258
259  dat_file = datFile(icu_build_dir)
260  icu_package_dat = os.path.basename(dat_file)
261  if not icu_package_dat.endswith('.dat'):
262      print('%s does not end with .dat' % icu_package_dat)
263      sys.exit(1)
264  icu_package = icu_package_dat[:-4]
265
266  # Copy all the .res files we need from, e.g. ./data/out/build/icudt55l, to the
267  # destination directory.
268  res_src_dir = '%s/data/out/build/%s' % (icu_build_dir, icu_package)
269  for tz_res_name in tz_res_names:
270    shutil.copy('%s/%s' % (res_src_dir, tz_res_name), res_dest_dir)
271
272  # Switch back to the original working cwd.
273  os.chdir(original_working_dir)
274
275def GenerateIcuDataFiles():
276  """ There are ICU files generation of which depends on ICU itself.
277  This method repeatedly builds ICU and re-generates these files until they
278  converge, i.e. subsequent builds do not change these files.
279  """
280  last_icu_build_dir = _MakeIcuDataFilesOnce()
281
282  _MakeIcuDataFilesWithoutTimeZoneFiles(last_icu_build_dir)
283
284def _MakeIcuDataFilesOnce():
285  """Builds ICU and copies .dat and .jar files to expected places.
286  Build is invoked only once. It is unlikely that you need to call
287  this method outside of this script.
288
289  This is a low-level method.
290  Please check :func:`GenerateIcuDataFiles()` for caveats.
291  """
292  i18nutil.SwitchToNewTemporaryDirectory()
293  icu_build_dir = '%s/icu' % os.getcwd()
294
295  PrepareIcuBuild(icu_build_dir, data_filters_json=ICU_MLDATA_FILTERS)
296
297  MakeAndCopyIcuDataFiles(icu_build_dir)
298
299  return icu_build_dir
300
301def _MakeIcuDataFilesWithoutTimeZoneFiles(icu_build_dir):
302  """
303  Remove the timezone .res files from the .dat file in order to save ~200 KB file size.
304  TODO (b/206956042): Move this to the first build whenhttps://unicode-org.atlassian.net/browse/ICU-21769 is fixed.
305  Now another build is needed to build a new .dat file without the timezone files.
306  """
307  # A manual removal of the .lst file is needed to force GNUmake to rebuild the .lst file
308  list_file_path = pathlib.Path(icu_build_dir, 'data/out/tmp/icudata.lst')
309  list_file_path.unlink(missing_ok=True)
310
311  PrepareIcuBuild(icu_build_dir, data_filters_json=ICU_DATA_FILTERS)
312  # copy_icu4c_dat_file_only is set to true to avoid copying the ICU4J data or other files
313  # because the data files may be incomplete to be consumed for a host tool.
314  # The ICU4J implementation on device doesn't use the ICU4J data files,
315  # e.g. ./icu4j/main/shared/data/icudata.jar
316  MakeAndCopyIcuDataFiles(icu_build_dir, copy_icu4c_dat_file_only=True)
317
318def CopyLicenseFiles(target_dir):
319  """Copies ICU license files to the target_dir"""
320
321  license_file = '%s/LICENSE' % icuDir()
322  print('Copying %s to %s ...' % (license_file, target_dir))
323  shutil.copy(license_file, target_dir)
324
325