1#!/usr/bin/env python3 2# Copyright 2012 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Generates .h and .rc files for strings extracted from a .grd file. 7 8This script generates an rc file and header (NAME.{rc,h}) to be included in 9a build target. The rc file includes translations for strings pulled from the 10given .grd file(s) and their corresponding localized .xtb files. 11 12To specify strings that will be extracted, the script pointed to by the 13argument "extract-datafile" should contain one or both of the following global 14variables: 15 16STRING_IDS is a list of strings IDs we want to import from the .grd files and 17include in the generated RC file. These strings are universal for all brands. 18 19MODE_SPECIFIC_STRINGS: is a dictionary of strings for which there are brand 20specific values. This mapping provides brand- and mode-specific string ids for a 21given input id as described here: 22 23{ 24 resource_id_1: { # A resource ID for use with GetLocalizedString. 25 brand_1: [ # 'google_chrome', for example. 26 string_id_1, # Strings listed in order of the brand's modes, as 27 string_id_2, # specified in install_static::InstallConstantIndex. 28 ... 29 string_id_N, 30 ], 31 brand_2: [ # 'chromium', for example. 32 ... 33 ], 34 }, 35 resource_id_2: ... 36} 37 38Note: MODE_SPECIFIC_STRINGS cannot be specified if STRING_IDS is not specified. 39 40""" 41 42# The generated header file includes IDs for each string, but also has values to 43# allow getting a string based on a language offset. For example, the header 44# file looks like this: 45# 46# #define IDS_L10N_OFFSET_AR 0 47# #define IDS_L10N_OFFSET_BG 1 48# #define IDS_L10N_OFFSET_CA 2 49# ... 50# #define IDS_L10N_OFFSET_ZH_TW 41 51# 52# #define IDS_MY_STRING_AR 1600 53# #define IDS_MY_STRING_BG 1601 54# ... 55# #define IDS_MY_STRING_BASE IDS_MY_STRING_AR 56# 57# This allows us to lookup an an ID for a string by adding IDS_MY_STRING_BASE 58# and IDS_L10N_OFFSET_* for the language we are interested in. 59# 60 61from __future__ import print_function 62 63import argparse 64import collections 65import glob 66import io 67import os 68import sys 69from xml import sax 70 71BASEDIR = os.path.dirname(os.path.abspath(__file__)) 72sys.path.insert(1, os.path.join(BASEDIR, '../../../tools/grit')) 73sys.path.insert(2, os.path.join(BASEDIR, '../../../tools/python')) 74 75from grit.extern import tclib 76 77class GrdHandler(sax.handler.ContentHandler): 78 """Extracts selected strings from a .grd file. 79 80 Attributes: 81 messages: A dict mapping string identifiers to their corresponding messages 82 (key "text") and transconsole ids (key "tc_id"). 83 referenced_xtb_files: A list of all xtb files referenced inside the .grd 84 file. 85 """ 86 def __init__(self, string_id_set): 87 """Constructs a handler that reads selected strings from a .grd file. 88 89 The dict attribute |messages| is populated with the strings that are read. 90 91 Args: 92 string_id_set: An optional set of message identifiers to extract; all 93 messages are extracted if empty. 94 """ 95 sax.handler.ContentHandler.__init__(self) 96 self.messages = collections.defaultdict(dict) 97 self.referenced_xtb_files = [] 98 self.__id_set = string_id_set 99 self.__message_name = None 100 self.__element_stack = [] 101 self.__text_scraps = [] 102 103 # contains the text in the format required by transconsole to generate the 104 # corresponding TC fingerprint. 105 self.__tc_text_scraps = [] 106 107 self.__characters_callback = None 108 109 def startElement(self, name, attrs): 110 self.__element_stack.append(name) 111 if name == 'message': 112 self.__OnOpenMessage(attrs.getValue('name')) 113 elif name == 'ph': 114 self.__OnOpenPlaceholder(attrs.getValue('name')) 115 elif name == 'file': 116 parent = self.__element_stack[-2] 117 if parent == 'translations': 118 self.__OnAddXtbFile(attrs.getValue('path')) 119 120 def endElement(self, name): 121 popped = self.__element_stack.pop() 122 assert popped == name 123 if name == 'message': 124 self.__OnCloseMessage() 125 126 def characters(self, content): 127 if self.__characters_callback: 128 self.__characters_callback(self.__element_stack[-1], content) 129 130 def __IsExtractingMessage(self): 131 """Returns True if a message is currently being extracted.""" 132 return self.__message_name is not None 133 134 def __OnOpenMessage(self, message_name): 135 """Invoked at the start of a <message> with message's name.""" 136 assert not self.__IsExtractingMessage() 137 self.__message_name = (message_name if (not (self.__id_set) or 138 message_name in self.__id_set) 139 else None) 140 if self.__message_name: 141 self.__characters_callback = self.__OnMessageText 142 143 def __OnOpenPlaceholder(self, ph_name): 144 """Invoked at the start of a <ph> with the `name` attribute.""" 145 if self.__IsExtractingMessage(): 146 # TC uses the `name` attribute as part of the fingerprint 147 # generation. 148 self.__tc_text_scraps.append(ph_name) 149 150 def __OnMessageText(self, containing_element, message_text): 151 """Invoked to handle a block of text for a message.""" 152 if message_text and (containing_element == 'message' or 153 containing_element == 'ph'): 154 self.__text_scraps.append(message_text) 155 if containing_element == 'message': 156 self.__tc_text_scraps.append(message_text) 157 158 def __OnCloseMessage(self): 159 """Invoked at the end of a message.""" 160 if self.__IsExtractingMessage(): 161 self.messages[self.__message_name]["text"] = ''.join( 162 self.__text_scraps).strip() 163 164 # Generate the message ID for each source string to correlate it with its 165 # TC translations in the .xtb files. 166 self.messages[self.__message_name]["tc_id"] = tclib.GenerateMessageId( 167 ''.join(self.__tc_text_scraps).strip()) 168 169 self.__message_name = None 170 self.__text_scraps = [] 171 self.__tc_text_scraps = [] 172 self.__characters_callback = None 173 174 def __OnAddXtbFile(self, xtb_file_path): 175 """Adds the xtb file path of a 'file'.""" 176 if os.path.splitext(xtb_file_path)[1].lower() == '.xtb': 177 self.referenced_xtb_files.append(xtb_file_path) 178 179class XtbHandler(sax.handler.ContentHandler): 180 """Extracts selected translations from an .xtd file. 181 182 Populates the |lang| and |translations| attributes with the language and 183 selected strings of an .xtb file. Instances may be re-used to read the same 184 set of translations from multiple .xtb files. 185 186 Attributes: 187 translations: A mapping of translation ids to strings. 188 lang: The language parsed from the .xtb file. 189 """ 190 def __init__(self, translation_ids): 191 """Constructs an instance to parse the given strings from an .xtb file. 192 193 Args: 194 translation_ids: a mapping of translation ids to their string 195 identifiers list for the translations to be extracted. 196 """ 197 sax.handler.ContentHandler.__init__(self) 198 self.lang = None 199 self.translations = None 200 self.__translation_ids = translation_ids 201 self.__element_stack = [] 202 self.__string_ids = None 203 self.__text_scraps = [] 204 205 # The count of the `ph` tags. 206 self.__ph_count = 0 207 208 self.__characters_callback = None 209 210 def startDocument(self): 211 # Clear the lang and translations since a new document is being parsed. 212 self.lang = '' 213 self.translations = {} 214 215 def startElement(self, name, attrs): 216 self.__element_stack.append(name) 217 # translationbundle is the document element, and hosts the lang id. 218 if len(self.__element_stack) == 1: 219 assert name == 'translationbundle' 220 self.__OnLanguage(attrs.getValue('lang')) 221 if name == 'translation': 222 self.__OnOpenTranslation(attrs.getValue('id')) 223 elif name == 'ph': 224 self.__OnOpenPlaceholder() 225 226 def endElement(self, name): 227 popped = self.__element_stack.pop() 228 assert popped == name 229 if name == 'translation': 230 self.__OnCloseTranslation() 231 232 def characters(self, content): 233 if self.__characters_callback: 234 self.__characters_callback(self.__element_stack[-1], content) 235 236 def __IsExtractingTranslation(self): 237 """Returns `True` if a translation is currently being extracted.""" 238 return self.__string_ids is not None 239 240 def __OnLanguage(self, lang): 241 self.lang = lang.replace('-', '_').upper() 242 243 def __OnOpenTranslation(self, translation_id): 244 assert not self.__IsExtractingTranslation() 245 self.__string_ids = self.__translation_ids.get(translation_id) 246 if self.__string_ids: 247 self.__characters_callback = self.__OnTranslationText 248 249 def __OnOpenPlaceholder(self): 250 if self.__IsExtractingTranslation(): 251 # The XTB files contain `ph` tags instead of placeholders, so we add the 252 # placeholders in the format `$1` in place of the `ph` tags here. 253 self.__ph_count += 1 254 self.__text_scraps.append('$' + str(self.__ph_count)) 255 256 def __OnTranslationText(self, containing_element, message_text): 257 if message_text and containing_element == 'translation': 258 self.__text_scraps.append(message_text) 259 260 def __OnCloseTranslation(self): 261 if self.__IsExtractingTranslation(): 262 translated_string = ''.join(self.__text_scraps).strip() 263 for string_id in self.__string_ids: 264 self.translations[string_id] = translated_string 265 self.__string_ids = None 266 self.__text_scraps = [] 267 self.__ph_count = 0 268 self.__characters_callback = None 269 270 271class StringRcMaker(object): 272 """Makes .h and .rc files containing strings and translations.""" 273 def __init__(self, inputs, expected_xtb_input_files, header_file, rc_file, 274 brand, first_resource_id, string_ids_to_extract, mode_specific_strings): 275 """Constructs a maker. 276 277 Args: 278 inputs: A list of (grd_file, xtb_dir) pairs containing the source data. 279 expected_xtb_input_files: A list of xtb files that are expected to exist 280 in the inputs folders. If there is a discrepency between what exists 281 and what is expected the script will fail. 282 header_file: The location of the header file to write containing all the 283 defined string IDs. 284 rc_file: The location of the rc file to write containing all the string 285 resources. 286 brand: The brand to check against when extracting mode-specific strings. 287 first_resource_id: The starting ID for the generated string resources. 288 string_ids_to_extract: The IDs of strings we want to import from the .grd 289 files and include in the generated RC file. These strings are universal 290 for all brands. 291 mode_specific_strings: A dictionary of strings that have conditional 292 values based on the brand's install mode. Refer to the documentation at 293 the top of this file for more information on the format of the 294 dictionary. 295 """ 296 self.inputs = inputs 297 self.expected_xtb_input_files = expected_xtb_input_files 298 self.expected_xtb_input_files.sort() 299 self.header_file = header_file 300 self.rc_file = rc_file 301 self.brand = brand 302 self.first_resource_id = first_resource_id; 303 self.string_id_set = set(string_ids_to_extract) 304 self.mode_specific_strings = mode_specific_strings 305 self.__AddModeSpecificStringIds() 306 307 def MakeFiles(self): 308 translated_strings = self.__ReadSourceAndTranslatedStrings() 309 self.__WriteRCFile(translated_strings) 310 self.__WriteHeaderFile(translated_strings) 311 312 class __TranslationData(object): 313 """A container of information about a single translation.""" 314 def __init__(self, resource_id_str, language, translation): 315 self.resource_id_str = resource_id_str 316 self.language = language 317 self.translation = translation 318 319 def __lt__(self, other): 320 """Allow __TranslationDatas to be sorted by id then by language.""" 321 return (self.resource_id_str, self.language) < (other.resource_id_str, 322 other.language) 323 324 def __AddModeSpecificStringIds(self): 325 """Adds the mode-specific strings for all of the current brand's install 326 modes to self.string_id_set.""" 327 for string_id, brands in self.mode_specific_strings.items(): 328 brand_strings = brands.get(self.brand) 329 if not brand_strings: 330 raise RuntimeError( 331 'No strings declared for brand \'%s\' in MODE_SPECIFIC_STRINGS for ' 332 'message %s' % (self.brand, string_id)) 333 self.string_id_set.update(brand_strings) 334 335 def __ReadSourceAndTranslatedStrings(self): 336 """Reads the source strings and translations from all inputs.""" 337 translated_strings = [] 338 all_xtb_files = [] 339 for grd_file, xtb_dir in self.inputs: 340 # Get the name of the grd file sans extension. 341 source_name = os.path.splitext(os.path.basename(grd_file))[0] 342 # Compute a glob for the translation files. 343 xtb_pattern = os.path.join(os.path.dirname(grd_file), xtb_dir, 344 '%s*.xtb' % source_name) 345 local_xtb_files = [x.replace('\\', '/') for x in glob.glob(xtb_pattern)] 346 all_xtb_files.extend(local_xtb_files) 347 translated_strings.extend( 348 self.__ReadSourceAndTranslationsFrom(grd_file, local_xtb_files)) 349 translated_strings.sort() 350 all_xtb_files.sort() 351 352 if self.expected_xtb_input_files != all_xtb_files: 353 extra = list(set(all_xtb_files) - set(self.expected_xtb_input_files)) 354 missing = list(set(self.expected_xtb_input_files) - set(all_xtb_files)) 355 error = '''Asserted file list does not match. 356 357Expected input files: 358{} 359Actual input files: 360{} 361Missing input files: 362{} 363Extra input files: 364{} 365''' 366 print(error.format('\n'.join(self.expected_xtb_input_files), 367 '\n'.join(all_xtb_files), '\n'.join(missing), 368 '\n'.join(extra))) 369 sys.exit(1) 370 return translated_strings 371 372 def __ReadSourceAndTranslationsFrom(self, grd_file, xtb_files): 373 """Reads source strings and translations for a .grd file. 374 375 Reads the source strings and all available translations for the messages 376 identified by self.string_id_set (or all the messages if self.string_id_set 377 is empty). The source string is used where translations are missing. 378 379 Args: 380 grd_file: Path to a .grd file. 381 xtb_files: List of paths to .xtb files. 382 383 Returns: 384 An unsorted list of __TranslationData instances. 385 """ 386 sax_parser = sax.make_parser() 387 388 # Read the source (en-US) string from the .grd file. 389 grd_handler = GrdHandler(self.string_id_set) 390 sax_parser.setContentHandler(grd_handler) 391 sax_parser.parse(grd_file) 392 source_strings = grd_handler.messages 393 394 grd_file_path = os.path.dirname(grd_file) 395 source_xtb_files = [] 396 for xtb_file in grd_handler.referenced_xtb_files: 397 relative_xtb_file_path = ( 398 os.path.join(grd_file_path, xtb_file).replace('\\', '/')) 399 source_xtb_files.append(relative_xtb_file_path) 400 missing_xtb_files = list(set(source_xtb_files) - set(xtb_files)) 401 402 # Manually put the source strings as en-US in the list of translated 403 # strings. 404 translated_strings = [] 405 for string_id, string_data in source_strings.items(): 406 translated_strings.append(self.__TranslationData(string_id, 407 'EN_US', 408 string_data["text"])) 409 410 # Multiple source strings may have the same message text; hence the 411 # message id is mapped to a list of string ids instead of a single value. 412 translation_ids = {} 413 for (string_id, string_data) in source_strings.items(): 414 translation_ids.setdefault(string_data["tc_id"], []).append(string_id); 415 416 # Track any xtb files that appear in the xtb folder but are not present in 417 # the grd file. 418 extra_xtb_files = [] 419 # Gather the translated strings from the .xtb files. Use the en-US string 420 # for any message lacking a translation. 421 xtb_handler = XtbHandler(translation_ids) 422 sax_parser.setContentHandler(xtb_handler) 423 for xtb_filename in xtb_files: 424 if not xtb_filename in source_xtb_files: 425 extra_xtb_files.append(xtb_filename) 426 sax_parser.parse(xtb_filename) 427 for string_id, string_data in source_strings.items(): 428 translated_string = xtb_handler.translations.get(string_id, 429 string_data["text"]) 430 translated_strings.append(self.__TranslationData(string_id, 431 xtb_handler.lang, 432 translated_string)) 433 if missing_xtb_files or extra_xtb_files: 434 if missing_xtb_files: 435 missing_error = ("There were files that were found in the .grd file " 436 "'{}' but do not exist on disk:\n{}") 437 print(missing_error.format(grd_file, '\n'.join(missing_xtb_files))) 438 439 if extra_xtb_files: 440 extra_error = ("There were files that exist on disk but were not found " 441 "in the .grd file '{}':\n{}") 442 print(extra_error.format(grd_file, '\n'.join(extra_xtb_files))) 443 444 sys.exit(1) 445 return translated_strings 446 447 def __WriteRCFile(self, translated_strings): 448 """Writes a resource file with the strings provided in |translated_strings|. 449 """ 450 HEADER_TEXT = ( 451 u'#include "%s"\n\n' 452 u'STRINGTABLE\n' 453 u'BEGIN\n' 454 ) % os.path.basename(self.header_file) 455 456 FOOTER_TEXT = ( 457 u'END\n' 458 ) 459 460 with io.open(self.rc_file, 461 mode='w', 462 encoding='utf-16', 463 newline='\n') as outfile: 464 outfile.write(HEADER_TEXT) 465 for translation in translated_strings: 466 # Escape special characters for the rc file. 467 escaped_text = (translation.translation.replace('"', '""') 468 .replace('\t', '\\t') 469 .replace('\n', '\\n')) 470 outfile.write(u' %s "%s"\n' % 471 (translation.resource_id_str + '_' + translation.language, 472 escaped_text)) 473 outfile.write(FOOTER_TEXT) 474 475 def __WriteHeaderFile(self, translated_strings): 476 """Writes a .h file with resource ids.""" 477 # TODO(grt): Stream the lines to the file rather than building this giant 478 # list of lines first. 479 lines = [] 480 do_languages_lines = ['\n#define DO_LANGUAGES'] 481 installer_string_mapping_lines = ['\n#define DO_STRING_MAPPING'] 482 do_mode_strings_lines = ['\n#define DO_MODE_STRINGS'] 483 484 # Write the values for how the languages ids are offset. 485 seen_languages = set() 486 offset_id = 0 487 for translation_data in translated_strings: 488 lang = translation_data.language 489 if lang not in seen_languages: 490 seen_languages.add(lang) 491 lines.append('#define IDS_L10N_OFFSET_%s %s' % (lang, offset_id)) 492 do_languages_lines.append(' HANDLE_LANGUAGE(%s, IDS_L10N_OFFSET_%s)' 493 % (lang.replace('_', '-').lower(), lang)) 494 offset_id += 1 495 else: 496 break 497 498 # Write the resource ids themselves. 499 resource_id = self.first_resource_id 500 for translation_data in translated_strings: 501 lines.append('#define %s %s' % (translation_data.resource_id_str + '_' + 502 translation_data.language, 503 resource_id)) 504 resource_id += 1 505 506 # Handle mode-specific strings. 507 for string_id, brands in self.mode_specific_strings.items(): 508 # Populate the DO_MODE_STRINGS macro. 509 brand_strings = brands.get(self.brand) 510 if not brand_strings: 511 raise RuntimeError( 512 'No strings declared for brand \'%s\' in MODE_SPECIFIC_STRINGS for ' 513 'message %s' % (self.brand, string_id)) 514 do_mode_strings_lines.append( 515 ' HANDLE_MODE_STRING(%s_BASE, %s)' 516 % (string_id, ', '.join([ ('%s_BASE' % s) for s in brand_strings]))) 517 518 # Generate defines for the specific strings to extract or take all of the 519 # strings found in the translations. 520 if self.string_id_set: 521 string_ids_to_write = self.string_id_set; 522 else: 523 string_ids_to_write = {t.resource_id_str for t in translated_strings} 524 525 # Write out base ID values. 526 for string_id in sorted(string_ids_to_write): 527 lines.append('#define %s_BASE %s_%s' % (string_id, 528 string_id, 529 translated_strings[0].language)) 530 installer_string_mapping_lines.append(' HANDLE_STRING(%s_BASE, %s)' 531 % (string_id, string_id)) 532 533 with open(self.header_file, 'w') as outfile: 534 outfile.write('\n'.join(lines)) 535 outfile.write('\n#ifndef RC_INVOKED') 536 outfile.write(' \\\n'.join(do_languages_lines)) 537 outfile.write(' \\\n'.join(installer_string_mapping_lines)) 538 outfile.write(' \\\n'.join(do_mode_strings_lines)) 539 # .rc files must end in a new line 540 outfile.write('\n#endif // ndef RC_INVOKED\n') 541 542 543def BuildArgumentParser(): 544 parser = argparse.ArgumentParser( 545 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) 546 parser.add_argument('-b', 547 help='identifier of the browser brand (e.g., chromium).' 548 'This argument is mandatory if the module file included' 549 'by --extract-datafile contains MODE_SPECIFIC_STRINGS', 550 dest='brand') 551 parser.add_argument('-i', action='append', 552 required=True, 553 help='path to .grd file', 554 dest='input_grd_files') 555 parser.add_argument('-r', action='append', 556 required=True, 557 help='relative path to .xtb dir for each .grd file', 558 dest='input_xtb_relative_paths') 559 parser.add_argument('-x', action='append', 560 required=True, 561 help='expected xtb input files to read', 562 dest='expected_xtb_input_files') 563 parser.add_argument('--header-file', 564 required=True, 565 help='path to generated .h file to write', 566 dest='header_file') 567 parser.add_argument('--rc-file', 568 required=True, 569 help='path to generated .rc file to write', 570 dest='rc_file') 571 parser.add_argument('--first-resource-id', 572 type=int, 573 required=True, 574 help='first id for the generated string resources', 575 dest='first_resource_id') 576 parser.add_argument('--extract-datafile', 577 help='the python file execute that will define the ' 578 'specific strings to extract from the source .grd file.' 579 'The module should contain a global array STRING_IDS ' 580 'that specifies which string IDs need to be extracted ' 581 '(if no global member by that name exists, then all the ' 582 'strings are extracted). It may also optionally contain ' 583 'a dictionary MODE_SPECIFIC_STRINGS which defines the ' 584 'mode-specific strings to use for a given brand that is ' 585 'extracted.', 586 dest='extract_datafile') 587 588 return parser 589 590 591def main(): 592 parser = BuildArgumentParser() 593 args = parser.parse_args() 594 # Extract all the strings from the given grd by default. 595 string_ids_to_extract = [] 596 mode_specific_strings = {} 597 598 # Check to see if an external module containing string extraction information 599 # was specified. 600 extract_datafile = args.extract_datafile 601 if extract_datafile: 602 datafile_locals = dict(); 603 exec(open(extract_datafile).read(), globals(), datafile_locals) 604 if 'STRING_IDS' in datafile_locals: 605 string_ids_to_extract = datafile_locals['STRING_IDS'] 606 if 'MODE_SPECIFIC_STRINGS' in datafile_locals: 607 if not string_ids_to_extract: 608 parser.error('MODE_SPECIFIC_STRINGS was specified in file ' + 609 extract_datafile + ' but there were no specific STRING_IDS ' 610 'specified for extraction') 611 mode_specific_strings = datafile_locals['MODE_SPECIFIC_STRINGS'] 612 613 brand = args.brand 614 if brand: 615 if not mode_specific_strings: 616 parser.error('A brand was specified (' + brand + ') but no mode ' 617 'specific strings were given.') 618 valid_brands = [b for b in 619 next(iter(mode_specific_strings.values())).keys()] 620 if not brand in valid_brands: 621 parser.error('A brand was specified (' + brand + ') but it is not ' 622 'a valid brand [' + ', '.join(valid_brands) + '].') 623 elif mode_specific_strings: 624 parser.error('MODE_SPECIFIC_STRINGS were specified but no brand was ' 625 'given.') 626 627 grd_files = args.input_grd_files 628 xtb_relative_paths = args.input_xtb_relative_paths 629 630 if len(grd_files) != len(xtb_relative_paths): 631 parser.error('Mismatch in number of grd files ({}) and xtb relative ' 632 'paths ({})'.format(len(grd_files), len(xtb_relative_paths))) 633 634 inputs = zip(grd_files, xtb_relative_paths) 635 636 StringRcMaker(inputs, args.expected_xtb_input_files, args.header_file, 637 args.rc_file, brand, args.first_resource_id, string_ids_to_extract, 638 mode_specific_strings).MakeFiles() 639 return 0 640 641if '__main__' == __name__: 642 sys.exit(main()) 643