xref: /aosp_15_r20/external/cronet/base/win/embedded_i18n/create_string_rc.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1#!/usr/bin/env python3
2# Copyright 2012 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Generates .h and .rc files for strings extracted from a .grd file.
7
8This script generates an rc file and header (NAME.{rc,h}) to be included in
9a build target. The rc file includes translations for strings pulled from the
10given .grd file(s) and their corresponding localized .xtb files.
11
12To specify strings that will be extracted, the script pointed to by the
13argument "extract-datafile" should contain one or both of the following global
14variables:
15
16STRING_IDS is a list of strings IDs we want to import from the .grd files and
17include in the generated RC file. These strings are universal for all brands.
18
19MODE_SPECIFIC_STRINGS: is a dictionary of strings for which there are brand
20specific values. This mapping provides brand- and mode-specific string ids for a
21given input id as described here:
22
23{
24  resource_id_1: {  # A resource ID for use with GetLocalizedString.
25    brand_1: [  # 'google_chrome', for example.
26      string_id_1,  # Strings listed in order of the brand's modes, as
27      string_id_2,  # specified in install_static::InstallConstantIndex.
28      ...
29      string_id_N,
30    ],
31    brand_2: [  # 'chromium', for example.
32      ...
33    ],
34  },
35  resource_id_2:  ...
36}
37
38Note: MODE_SPECIFIC_STRINGS cannot be specified if STRING_IDS is not specified.
39
40"""
41
42# The generated header file includes IDs for each string, but also has values to
43# allow getting a string based on a language offset.  For example, the header
44# file looks like this:
45#
46# #define IDS_L10N_OFFSET_AR 0
47# #define IDS_L10N_OFFSET_BG 1
48# #define IDS_L10N_OFFSET_CA 2
49# ...
50# #define IDS_L10N_OFFSET_ZH_TW 41
51#
52# #define IDS_MY_STRING_AR 1600
53# #define IDS_MY_STRING_BG 1601
54# ...
55# #define IDS_MY_STRING_BASE IDS_MY_STRING_AR
56#
57# This allows us to lookup an an ID for a string by adding IDS_MY_STRING_BASE
58# and IDS_L10N_OFFSET_* for the language we are interested in.
59#
60
61from __future__ import print_function
62
63import argparse
64import collections
65import glob
66import io
67import os
68import sys
69from xml import sax
70
71BASEDIR = os.path.dirname(os.path.abspath(__file__))
72sys.path.insert(1, os.path.join(BASEDIR, '../../../tools/grit'))
73sys.path.insert(2, os.path.join(BASEDIR, '../../../tools/python'))
74
75from grit.extern import tclib
76
77class GrdHandler(sax.handler.ContentHandler):
78  """Extracts selected strings from a .grd file.
79
80  Attributes:
81    messages: A dict mapping string identifiers to their corresponding messages
82      (key "text") and transconsole ids (key "tc_id").
83    referenced_xtb_files: A list of all xtb files referenced inside the .grd
84      file.
85  """
86  def __init__(self, string_id_set):
87    """Constructs a handler that reads selected strings from a .grd file.
88
89    The dict attribute |messages| is populated with the strings that are read.
90
91    Args:
92      string_id_set: An optional set of message identifiers to extract; all
93      messages are extracted if empty.
94    """
95    sax.handler.ContentHandler.__init__(self)
96    self.messages = collections.defaultdict(dict)
97    self.referenced_xtb_files = []
98    self.__id_set = string_id_set
99    self.__message_name = None
100    self.__element_stack = []
101    self.__text_scraps = []
102
103    # contains the text in the format required by transconsole to generate the
104    # corresponding TC fingerprint.
105    self.__tc_text_scraps = []
106
107    self.__characters_callback = None
108
109  def startElement(self, name, attrs):
110    self.__element_stack.append(name)
111    if name == 'message':
112      self.__OnOpenMessage(attrs.getValue('name'))
113    elif name == 'ph':
114      self.__OnOpenPlaceholder(attrs.getValue('name'))
115    elif name == 'file':
116      parent = self.__element_stack[-2]
117      if parent == 'translations':
118        self.__OnAddXtbFile(attrs.getValue('path'))
119
120  def endElement(self, name):
121    popped = self.__element_stack.pop()
122    assert popped == name
123    if name == 'message':
124      self.__OnCloseMessage()
125
126  def characters(self, content):
127    if self.__characters_callback:
128      self.__characters_callback(self.__element_stack[-1], content)
129
130  def __IsExtractingMessage(self):
131    """Returns True if a message is currently being extracted."""
132    return self.__message_name is not None
133
134  def __OnOpenMessage(self, message_name):
135    """Invoked at the start of a <message> with message's name."""
136    assert not self.__IsExtractingMessage()
137    self.__message_name = (message_name if (not (self.__id_set) or
138                           message_name in self.__id_set)
139                           else None)
140    if self.__message_name:
141      self.__characters_callback = self.__OnMessageText
142
143  def __OnOpenPlaceholder(self, ph_name):
144    """Invoked at the start of a <ph> with the `name` attribute."""
145    if self.__IsExtractingMessage():
146      # TC uses the `name` attribute as part of the fingerprint
147      # generation.
148      self.__tc_text_scraps.append(ph_name)
149
150  def __OnMessageText(self, containing_element, message_text):
151    """Invoked to handle a block of text for a message."""
152    if message_text and (containing_element == 'message' or
153                         containing_element == 'ph'):
154      self.__text_scraps.append(message_text)
155      if containing_element == 'message':
156        self.__tc_text_scraps.append(message_text)
157
158  def __OnCloseMessage(self):
159    """Invoked at the end of a message."""
160    if self.__IsExtractingMessage():
161      self.messages[self.__message_name]["text"] = ''.join(
162          self.__text_scraps).strip()
163
164      # Generate the message ID for each source string to correlate it with its
165      # TC translations in the .xtb files.
166      self.messages[self.__message_name]["tc_id"] = tclib.GenerateMessageId(
167          ''.join(self.__tc_text_scraps).strip())
168
169      self.__message_name = None
170      self.__text_scraps = []
171      self.__tc_text_scraps = []
172      self.__characters_callback = None
173
174  def __OnAddXtbFile(self, xtb_file_path):
175    """Adds the xtb file path of a 'file'."""
176    if os.path.splitext(xtb_file_path)[1].lower() == '.xtb':
177      self.referenced_xtb_files.append(xtb_file_path)
178
179class XtbHandler(sax.handler.ContentHandler):
180  """Extracts selected translations from an .xtd file.
181
182  Populates the |lang| and |translations| attributes with the language and
183  selected strings of an .xtb file. Instances may be re-used to read the same
184  set of translations from multiple .xtb files.
185
186  Attributes:
187    translations: A mapping of translation ids to strings.
188    lang: The language parsed from the .xtb file.
189  """
190  def __init__(self, translation_ids):
191    """Constructs an instance to parse the given strings from an .xtb file.
192
193    Args:
194      translation_ids: a mapping of translation ids to their string
195        identifiers list for the translations to be extracted.
196    """
197    sax.handler.ContentHandler.__init__(self)
198    self.lang = None
199    self.translations = None
200    self.__translation_ids = translation_ids
201    self.__element_stack = []
202    self.__string_ids = None
203    self.__text_scraps = []
204
205    # The count of the `ph` tags.
206    self.__ph_count = 0
207
208    self.__characters_callback = None
209
210  def startDocument(self):
211    # Clear the lang and translations since a new document is being parsed.
212    self.lang = ''
213    self.translations = {}
214
215  def startElement(self, name, attrs):
216    self.__element_stack.append(name)
217    # translationbundle is the document element, and hosts the lang id.
218    if len(self.__element_stack) == 1:
219      assert name == 'translationbundle'
220      self.__OnLanguage(attrs.getValue('lang'))
221    if name == 'translation':
222      self.__OnOpenTranslation(attrs.getValue('id'))
223    elif name == 'ph':
224      self.__OnOpenPlaceholder()
225
226  def endElement(self, name):
227    popped = self.__element_stack.pop()
228    assert popped == name
229    if name == 'translation':
230      self.__OnCloseTranslation()
231
232  def characters(self, content):
233    if self.__characters_callback:
234      self.__characters_callback(self.__element_stack[-1], content)
235
236  def __IsExtractingTranslation(self):
237    """Returns `True` if a translation is currently being extracted."""
238    return self.__string_ids is not None
239
240  def __OnLanguage(self, lang):
241    self.lang = lang.replace('-', '_').upper()
242
243  def __OnOpenTranslation(self, translation_id):
244    assert not self.__IsExtractingTranslation()
245    self.__string_ids = self.__translation_ids.get(translation_id)
246    if self.__string_ids:
247      self.__characters_callback = self.__OnTranslationText
248
249  def __OnOpenPlaceholder(self):
250    if self.__IsExtractingTranslation():
251      # The XTB files contain `ph` tags instead of placeholders, so we add the
252      # placeholders in the format `$1` in place of the `ph` tags here.
253      self.__ph_count += 1
254      self.__text_scraps.append('$' + str(self.__ph_count))
255
256  def __OnTranslationText(self, containing_element, message_text):
257    if message_text and containing_element == 'translation':
258      self.__text_scraps.append(message_text)
259
260  def __OnCloseTranslation(self):
261    if self.__IsExtractingTranslation():
262      translated_string = ''.join(self.__text_scraps).strip()
263      for string_id in self.__string_ids:
264        self.translations[string_id] = translated_string
265      self.__string_ids = None
266      self.__text_scraps = []
267      self.__ph_count = 0
268      self.__characters_callback = None
269
270
271class StringRcMaker(object):
272  """Makes .h and .rc files containing strings and translations."""
273  def __init__(self, inputs, expected_xtb_input_files, header_file, rc_file,
274    brand, first_resource_id, string_ids_to_extract, mode_specific_strings):
275    """Constructs a maker.
276
277    Args:
278      inputs: A list of (grd_file, xtb_dir) pairs containing the source data.
279      expected_xtb_input_files: A list of xtb files that are expected to exist
280        in the inputs folders. If there is a discrepency between what exists
281        and what is expected the script will fail.
282      header_file: The location of the header file to write containing all the
283        defined string IDs.
284      rc_file: The location of the rc file to write containing all the string
285        resources.
286      brand: The brand to check against when extracting mode-specific strings.
287      first_resource_id: The starting ID for the generated string resources.
288      string_ids_to_extract: The IDs of strings we want to import from the .grd
289        files and include in the generated RC file. These strings are universal
290        for all brands.
291      mode_specific_strings: A dictionary of strings that have conditional
292        values based on the brand's install mode. Refer to the documentation at
293        the top of this file for more information on the format of the
294        dictionary.
295    """
296    self.inputs = inputs
297    self.expected_xtb_input_files = expected_xtb_input_files
298    self.expected_xtb_input_files.sort()
299    self.header_file = header_file
300    self.rc_file = rc_file
301    self.brand = brand
302    self.first_resource_id = first_resource_id;
303    self.string_id_set = set(string_ids_to_extract)
304    self.mode_specific_strings = mode_specific_strings
305    self.__AddModeSpecificStringIds()
306
307  def MakeFiles(self):
308    translated_strings = self.__ReadSourceAndTranslatedStrings()
309    self.__WriteRCFile(translated_strings)
310    self.__WriteHeaderFile(translated_strings)
311
312  class __TranslationData(object):
313    """A container of information about a single translation."""
314    def __init__(self, resource_id_str, language, translation):
315      self.resource_id_str = resource_id_str
316      self.language = language
317      self.translation = translation
318
319    def __lt__(self, other):
320      """Allow __TranslationDatas to be sorted by id then by language."""
321      return (self.resource_id_str, self.language) < (other.resource_id_str,
322                                                      other.language)
323
324  def __AddModeSpecificStringIds(self):
325    """Adds the mode-specific strings for all of the current brand's install
326    modes to self.string_id_set."""
327    for string_id, brands in self.mode_specific_strings.items():
328      brand_strings = brands.get(self.brand)
329      if not brand_strings:
330        raise RuntimeError(
331            'No strings declared for brand \'%s\' in MODE_SPECIFIC_STRINGS for '
332            'message %s' % (self.brand, string_id))
333      self.string_id_set.update(brand_strings)
334
335  def __ReadSourceAndTranslatedStrings(self):
336    """Reads the source strings and translations from all inputs."""
337    translated_strings = []
338    all_xtb_files = []
339    for grd_file, xtb_dir in self.inputs:
340      # Get the name of the grd file sans extension.
341      source_name = os.path.splitext(os.path.basename(grd_file))[0]
342      # Compute a glob for the translation files.
343      xtb_pattern = os.path.join(os.path.dirname(grd_file), xtb_dir,
344                                 '%s*.xtb' % source_name)
345      local_xtb_files = [x.replace('\\', '/') for x in glob.glob(xtb_pattern)]
346      all_xtb_files.extend(local_xtb_files)
347      translated_strings.extend(
348        self.__ReadSourceAndTranslationsFrom(grd_file, local_xtb_files))
349    translated_strings.sort()
350    all_xtb_files.sort()
351
352    if self.expected_xtb_input_files != all_xtb_files:
353      extra = list(set(all_xtb_files) - set(self.expected_xtb_input_files))
354      missing = list(set(self.expected_xtb_input_files) - set(all_xtb_files))
355      error = '''Asserted file list does not match.
356
357Expected input files:
358{}
359Actual input files:
360{}
361Missing input files:
362{}
363Extra input files:
364{}
365'''
366      print(error.format('\n'.join(self.expected_xtb_input_files),
367                         '\n'.join(all_xtb_files), '\n'.join(missing),
368                         '\n'.join(extra)))
369      sys.exit(1)
370    return translated_strings
371
372  def __ReadSourceAndTranslationsFrom(self, grd_file, xtb_files):
373    """Reads source strings and translations for a .grd file.
374
375    Reads the source strings and all available translations for the messages
376    identified by self.string_id_set (or all the messages if self.string_id_set
377    is empty). The source string is used where translations are missing.
378
379    Args:
380      grd_file: Path to a .grd file.
381      xtb_files: List of paths to .xtb files.
382
383    Returns:
384      An unsorted list of __TranslationData instances.
385    """
386    sax_parser = sax.make_parser()
387
388    # Read the source (en-US) string from the .grd file.
389    grd_handler = GrdHandler(self.string_id_set)
390    sax_parser.setContentHandler(grd_handler)
391    sax_parser.parse(grd_file)
392    source_strings = grd_handler.messages
393
394    grd_file_path = os.path.dirname(grd_file)
395    source_xtb_files = []
396    for xtb_file in grd_handler.referenced_xtb_files:
397      relative_xtb_file_path = (
398        os.path.join(grd_file_path, xtb_file).replace('\\', '/'))
399      source_xtb_files.append(relative_xtb_file_path)
400    missing_xtb_files = list(set(source_xtb_files) - set(xtb_files))
401
402    # Manually put the source strings as en-US in the list of translated
403    # strings.
404    translated_strings = []
405    for string_id, string_data in source_strings.items():
406      translated_strings.append(self.__TranslationData(string_id,
407                                                       'EN_US',
408                                                       string_data["text"]))
409
410    # Multiple source strings may have the same message text; hence the
411    # message id is mapped to a list of string ids instead of a single value.
412    translation_ids = {}
413    for (string_id, string_data) in source_strings.items():
414      translation_ids.setdefault(string_data["tc_id"], []).append(string_id);
415
416    # Track any xtb files that appear in the xtb folder but are not present in
417    # the grd file.
418    extra_xtb_files = []
419    # Gather the translated strings from the .xtb files. Use the en-US string
420    # for any message lacking a translation.
421    xtb_handler = XtbHandler(translation_ids)
422    sax_parser.setContentHandler(xtb_handler)
423    for xtb_filename in xtb_files:
424      if not xtb_filename in source_xtb_files:
425        extra_xtb_files.append(xtb_filename)
426      sax_parser.parse(xtb_filename)
427      for string_id, string_data in source_strings.items():
428        translated_string = xtb_handler.translations.get(string_id,
429                                                         string_data["text"])
430        translated_strings.append(self.__TranslationData(string_id,
431                                                         xtb_handler.lang,
432                                                         translated_string))
433    if missing_xtb_files or extra_xtb_files:
434      if missing_xtb_files:
435        missing_error = ("There were files that were found in the .grd file "
436                         "'{}' but do not exist on disk:\n{}")
437        print(missing_error.format(grd_file, '\n'.join(missing_xtb_files)))
438
439      if extra_xtb_files:
440        extra_error = ("There were files that exist on disk but were not found "
441                       "in the .grd file '{}':\n{}")
442        print(extra_error.format(grd_file, '\n'.join(extra_xtb_files)))
443
444      sys.exit(1)
445    return translated_strings
446
447  def __WriteRCFile(self, translated_strings):
448    """Writes a resource file with the strings provided in |translated_strings|.
449    """
450    HEADER_TEXT = (
451      u'#include "%s"\n\n'
452      u'STRINGTABLE\n'
453      u'BEGIN\n'
454      ) % os.path.basename(self.header_file)
455
456    FOOTER_TEXT = (
457      u'END\n'
458    )
459
460    with io.open(self.rc_file,
461                 mode='w',
462                 encoding='utf-16',
463                 newline='\n') as outfile:
464      outfile.write(HEADER_TEXT)
465      for translation in translated_strings:
466        # Escape special characters for the rc file.
467        escaped_text = (translation.translation.replace('"', '""')
468                       .replace('\t', '\\t')
469                       .replace('\n', '\\n'))
470        outfile.write(u'  %s "%s"\n' %
471                      (translation.resource_id_str + '_' + translation.language,
472                       escaped_text))
473      outfile.write(FOOTER_TEXT)
474
475  def __WriteHeaderFile(self, translated_strings):
476    """Writes a .h file with resource ids."""
477    # TODO(grt): Stream the lines to the file rather than building this giant
478    # list of lines first.
479    lines = []
480    do_languages_lines = ['\n#define DO_LANGUAGES']
481    installer_string_mapping_lines = ['\n#define DO_STRING_MAPPING']
482    do_mode_strings_lines = ['\n#define DO_MODE_STRINGS']
483
484    # Write the values for how the languages ids are offset.
485    seen_languages = set()
486    offset_id = 0
487    for translation_data in translated_strings:
488      lang = translation_data.language
489      if lang not in seen_languages:
490        seen_languages.add(lang)
491        lines.append('#define IDS_L10N_OFFSET_%s %s' % (lang, offset_id))
492        do_languages_lines.append('  HANDLE_LANGUAGE(%s, IDS_L10N_OFFSET_%s)'
493                                  % (lang.replace('_', '-').lower(), lang))
494        offset_id += 1
495      else:
496        break
497
498    # Write the resource ids themselves.
499    resource_id = self.first_resource_id
500    for translation_data in translated_strings:
501      lines.append('#define %s %s' % (translation_data.resource_id_str + '_' +
502                                      translation_data.language,
503                                      resource_id))
504      resource_id += 1
505
506    # Handle mode-specific strings.
507    for string_id, brands in self.mode_specific_strings.items():
508      # Populate the DO_MODE_STRINGS macro.
509      brand_strings = brands.get(self.brand)
510      if not brand_strings:
511        raise RuntimeError(
512            'No strings declared for brand \'%s\' in MODE_SPECIFIC_STRINGS for '
513            'message %s' % (self.brand, string_id))
514      do_mode_strings_lines.append(
515        '  HANDLE_MODE_STRING(%s_BASE, %s)'
516        % (string_id, ', '.join([ ('%s_BASE' % s) for s in brand_strings])))
517
518    # Generate defines for the specific strings to extract or take all of the
519    # strings found in the translations.
520    if self.string_id_set:
521      string_ids_to_write = self.string_id_set;
522    else:
523      string_ids_to_write = {t.resource_id_str for t in translated_strings}
524
525    # Write out base ID values.
526    for string_id in sorted(string_ids_to_write):
527      lines.append('#define %s_BASE %s_%s' % (string_id,
528                                              string_id,
529                                              translated_strings[0].language))
530      installer_string_mapping_lines.append('  HANDLE_STRING(%s_BASE, %s)'
531                                            % (string_id, string_id))
532
533    with open(self.header_file, 'w') as outfile:
534      outfile.write('\n'.join(lines))
535      outfile.write('\n#ifndef RC_INVOKED')
536      outfile.write(' \\\n'.join(do_languages_lines))
537      outfile.write(' \\\n'.join(installer_string_mapping_lines))
538      outfile.write(' \\\n'.join(do_mode_strings_lines))
539      # .rc files must end in a new line
540      outfile.write('\n#endif  // ndef RC_INVOKED\n')
541
542
543def BuildArgumentParser():
544  parser = argparse.ArgumentParser(
545    description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
546  parser.add_argument('-b',
547                      help='identifier of the browser brand (e.g., chromium).'
548                      'This argument is mandatory if the module file included'
549                      'by --extract-datafile contains MODE_SPECIFIC_STRINGS',
550                      dest='brand')
551  parser.add_argument('-i', action='append',
552                      required=True,
553                      help='path to .grd file',
554                      dest='input_grd_files')
555  parser.add_argument('-r', action='append',
556                      required=True,
557                      help='relative path to .xtb dir for each .grd file',
558                      dest='input_xtb_relative_paths')
559  parser.add_argument('-x', action='append',
560                      required=True,
561                      help='expected xtb input files to read',
562                      dest='expected_xtb_input_files')
563  parser.add_argument('--header-file',
564                      required=True,
565                      help='path to generated .h file to write',
566                      dest='header_file')
567  parser.add_argument('--rc-file',
568                      required=True,
569                      help='path to generated .rc file to write',
570                      dest='rc_file')
571  parser.add_argument('--first-resource-id',
572                      type=int,
573                      required=True,
574                      help='first id for the generated string resources',
575                      dest='first_resource_id')
576  parser.add_argument('--extract-datafile',
577                      help='the python file execute that will define the '
578                      'specific strings to extract from the source .grd file.'
579                      'The module should contain a global array STRING_IDS '
580                      'that specifies which string IDs need to be extracted '
581                      '(if no global member by that name exists, then all the '
582                      'strings are extracted). It may also optionally contain '
583                      'a dictionary MODE_SPECIFIC_STRINGS which defines the '
584                      'mode-specific strings to use for a given brand that is '
585                      'extracted.',
586                      dest='extract_datafile')
587
588  return parser
589
590
591def main():
592  parser = BuildArgumentParser()
593  args = parser.parse_args()
594  # Extract all the strings from the given grd by default.
595  string_ids_to_extract = []
596  mode_specific_strings = {}
597
598  # Check to see if an external module containing string extraction information
599  # was specified.
600  extract_datafile = args.extract_datafile
601  if extract_datafile:
602    datafile_locals = dict();
603    exec(open(extract_datafile).read(), globals(), datafile_locals)
604    if 'STRING_IDS' in datafile_locals:
605      string_ids_to_extract = datafile_locals['STRING_IDS']
606    if 'MODE_SPECIFIC_STRINGS' in datafile_locals:
607      if not string_ids_to_extract:
608        parser.error('MODE_SPECIFIC_STRINGS was specified in file ' +
609          extract_datafile + ' but there were no specific STRING_IDS '
610          'specified for extraction')
611      mode_specific_strings = datafile_locals['MODE_SPECIFIC_STRINGS']
612
613  brand = args.brand
614  if brand:
615    if not mode_specific_strings:
616      parser.error('A brand was specified (' + brand + ') but no mode '
617        'specific strings were given.')
618    valid_brands = [b for b in
619      next(iter(mode_specific_strings.values())).keys()]
620    if not brand in valid_brands:
621      parser.error('A brand was specified (' + brand + ') but it is not '
622        'a valid brand [' + ', '.join(valid_brands) + '].')
623  elif mode_specific_strings:
624    parser.error('MODE_SPECIFIC_STRINGS were specified but no brand was '
625      'given.')
626
627  grd_files = args.input_grd_files
628  xtb_relative_paths = args.input_xtb_relative_paths
629
630  if len(grd_files) != len(xtb_relative_paths):
631    parser.error('Mismatch in number of grd files ({}) and xtb relative '
632                 'paths ({})'.format(len(grd_files), len(xtb_relative_paths)))
633
634  inputs = zip(grd_files, xtb_relative_paths)
635
636  StringRcMaker(inputs, args.expected_xtb_input_files, args.header_file,
637    args.rc_file,  brand, args.first_resource_id, string_ids_to_extract,
638    mode_specific_strings).MakeFiles()
639  return 0
640
641if '__main__' == __name__:
642  sys.exit(main())
643