1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "compile/PseudolocaleGenerator.h"
18
19 #include <stdint.h>
20
21 #include <algorithm>
22 #include <random>
23
24 #include "ResourceTable.h"
25 #include "ResourceValues.h"
26 #include "ValueVisitor.h"
27 #include "androidfw/ResourceTypes.h"
28 #include "androidfw/Util.h"
29 #include "compile/Pseudolocalizer.h"
30 #include "util/Util.h"
31
32 using ::android::ConfigDescription;
33 using ::android::StringPiece;
34 using ::android::StringPiece16;
35
36 namespace aapt {
37
38 // The struct that represents both Span objects and UntranslatableSections.
39 struct UnifiedSpan {
40 // Only present for Span objects. If not present, this was an UntranslatableSection.
41 std::optional<std::string> tag;
42
43 // The UTF-16 index into the string where this span starts.
44 uint32_t first_char;
45
46 // The UTF-16 index into the string where this span ends, inclusive.
47 uint32_t last_char;
48 };
49
operator <(const UnifiedSpan & left,const UnifiedSpan & right)50 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
51 if (left.first_char < right.first_char) {
52 return true;
53 } else if (left.first_char > right.first_char) {
54 return false;
55 } else if (left.last_char < right.last_char) {
56 return true;
57 }
58 return false;
59 }
60
SpanToUnifiedSpan(const android::StringPool::Span & span)61 inline static UnifiedSpan SpanToUnifiedSpan(const android::StringPool::Span& span) {
62 return UnifiedSpan{*span.name, span.first_char, span.last_char};
63 }
64
UntranslatableSectionToUnifiedSpan(const UntranslatableSection & section)65 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
66 return UnifiedSpan{
67 {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
68 }
69
70 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
71 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
MergeSpans(const StyledString & string)72 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
73 // Ensure the Spans are sorted and converted.
74 std::vector<UnifiedSpan> sorted_spans;
75 sorted_spans.reserve(string.value->spans.size());
76 std::transform(string.value->spans.begin(), string.value->spans.end(),
77 std::back_inserter(sorted_spans), SpanToUnifiedSpan);
78
79 // Stable sort to ensure tag sequences like "<b><i>" are preserved.
80 std::stable_sort(sorted_spans.begin(), sorted_spans.end());
81
82 // Ensure the UntranslatableSections are sorted and converted.
83 std::vector<UnifiedSpan> sorted_untranslatable_sections;
84 sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
85 std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
86 std::back_inserter(sorted_untranslatable_sections),
87 UntranslatableSectionToUnifiedSpan);
88 std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
89
90 std::vector<UnifiedSpan> merged_spans;
91 merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
92 auto span_iter = sorted_spans.begin();
93 auto untranslatable_iter = sorted_untranslatable_sections.begin();
94 while (span_iter != sorted_spans.end() &&
95 untranslatable_iter != sorted_untranslatable_sections.end()) {
96 if (*span_iter < *untranslatable_iter) {
97 merged_spans.push_back(std::move(*span_iter));
98 ++span_iter;
99 } else {
100 merged_spans.push_back(std::move(*untranslatable_iter));
101 ++untranslatable_iter;
102 }
103 }
104
105 while (span_iter != sorted_spans.end()) {
106 merged_spans.push_back(std::move(*span_iter));
107 ++span_iter;
108 }
109
110 while (untranslatable_iter != sorted_untranslatable_sections.end()) {
111 merged_spans.push_back(std::move(*untranslatable_iter));
112 ++untranslatable_iter;
113 }
114 return merged_spans;
115 }
116
PseudolocalizeStyledString(StyledString * string,Pseudolocalizer::Method method,android::StringPool * pool)117 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
118 Pseudolocalizer::Method method,
119 android::StringPool* pool) {
120 Pseudolocalizer localizer(method);
121
122 // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
123 // This will effectively subdivide the string into multiple sections that can be individually
124 // pseudolocalized, while keeping the span indices synchronized.
125 std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
126
127 // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
128 // runtime. So we will do all our processing in UTF-16, then convert back.
129 const std::u16string text16 = android::util::Utf8ToUtf16(string->value->value);
130
131 // Convenient wrapper around the text that allows us to work with StringPieces.
132 const StringPiece16 text(text16);
133
134 // The new string.
135 std::string new_string = localizer.Start();
136
137 // The stack that keeps track of what nested Span we're in.
138 std::vector<size_t> span_stack;
139
140 // The current position in the original text.
141 uint32_t cursor = 0u;
142
143 // The current position in the new text.
144 uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
145 new_string.size(), false);
146
147 // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
148 bool translatable = true;
149 size_t span_idx = 0u;
150 while (span_idx < merged_spans.size() || !span_stack.empty()) {
151 UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
152 UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
153
154 if (span != nullptr) {
155 if (parent_span == nullptr || parent_span->last_char > span->first_char) {
156 // There is no parent, or this span is the child of the parent.
157 // Pseudolocalize all the text until this span.
158 const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
159 cursor += substr.size();
160
161 // Pseudolocalize the substring.
162 std::string new_substr = android::util::Utf16ToUtf8(substr);
163 if (translatable) {
164 new_substr = localizer.Text(new_substr);
165 }
166 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
167 new_substr.size(), false);
168 new_string += new_substr;
169
170 // Rewrite the first_char.
171 span->first_char = new_cursor;
172 if (!span->tag) {
173 // An untranslatable section has begun!
174 translatable = false;
175 }
176 span_stack.push_back(span_idx);
177 ++span_idx;
178 continue;
179 }
180 }
181
182 if (parent_span != nullptr) {
183 // There is a parent, and either this span is not a child of it, or there are no more spans.
184 // Pop this off the stack.
185 const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
186 cursor += substr.size();
187
188 // Pseudolocalize the substring.
189 std::string new_substr = android::util::Utf16ToUtf8(substr);
190 if (translatable) {
191 new_substr = localizer.Text(new_substr);
192 }
193 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
194 new_substr.size(), false);
195 new_string += new_substr;
196
197 parent_span->last_char = new_cursor - 1;
198 if (parent_span->tag) {
199 // An end to an untranslatable section.
200 translatable = true;
201 }
202 span_stack.pop_back();
203 }
204 }
205
206 // Finish the pseudolocalization at the end of the string.
207 new_string +=
208 localizer.Text(android::util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
209 new_string += localizer.End();
210
211 android::StyleString localized;
212 localized.str = std::move(new_string);
213
214 // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
215 for (UnifiedSpan& span : merged_spans) {
216 if (span.tag) {
217 localized.spans.push_back(
218 android::Span{std::move(span.tag.value()), span.first_char, span.last_char});
219 }
220 }
221 return util::make_unique<StyledString>(pool->MakeRef(localized));
222 }
223
224 namespace {
225
226 class Visitor : public ValueVisitor {
227 public:
228 // Either value or item will be populated upon visiting the value.
229 std::unique_ptr<Value> value;
230 std::unique_ptr<Item> item;
231
Visitor(android::StringPool * pool,Pseudolocalizer::Method method)232 Visitor(android::StringPool* pool, Pseudolocalizer::Method method)
233 : pool_(pool), method_(method), localizer_(method) {
234 }
235
Visit(Plural * plural)236 void Visit(Plural* plural) override {
237 CloningValueTransformer cloner(pool_);
238 std::unique_ptr<Plural> localized = util::make_unique<Plural>();
239 for (size_t i = 0; i < plural->values.size(); i++) {
240 Visitor sub_visitor(pool_, method_);
241 if (plural->values[i]) {
242 plural->values[i]->Accept(&sub_visitor);
243 if (sub_visitor.item) {
244 localized->values[i] = std::move(sub_visitor.item);
245 } else {
246 localized->values[i] = plural->values[i]->Transform(cloner);
247 }
248 }
249 }
250 localized->SetSource(plural->GetSource());
251 localized->SetWeak(true);
252 value = std::move(localized);
253 }
254
Visit(String * string)255 void Visit(String* string) override {
256 const StringPiece original_string = *string->value;
257 std::string result = localizer_.Start();
258
259 // Pseudolocalize only the translatable sections.
260 size_t start = 0u;
261 for (const UntranslatableSection& section : string->untranslatable_sections) {
262 // Pseudolocalize the content before the untranslatable section.
263 const size_t len = section.start - start;
264 if (len > 0u) {
265 result += localizer_.Text(original_string.substr(start, len));
266 }
267
268 // Copy the untranslatable content.
269 result += original_string.substr(section.start, section.end - section.start);
270 start = section.end;
271 }
272
273 // Pseudolocalize the content after the last untranslatable section.
274 if (start != original_string.size()) {
275 const size_t len = original_string.size() - start;
276 result += localizer_.Text(original_string.substr(start, len));
277 }
278 result += localizer_.End();
279
280 std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
281 localized->SetSource(string->GetSource());
282 localized->SetWeak(true);
283 item = std::move(localized);
284 }
285
Visit(StyledString * string)286 void Visit(StyledString* string) override {
287 item = PseudolocalizeStyledString(string, method_, pool_);
288 item->SetSource(string->GetSource());
289 item->SetWeak(true);
290 }
291
292 private:
293 DISALLOW_COPY_AND_ASSIGN(Visitor);
294
295 android::StringPool* pool_;
296 Pseudolocalizer::Method method_;
297 Pseudolocalizer localizer_;
298 };
299
300 class GrammaticalGenderVisitor : public ValueVisitor {
301 public:
302 std::unique_ptr<Value> value;
303 std::unique_ptr<Item> item;
304
GrammaticalGenderVisitor(android::StringPool * pool,uint8_t grammaticalInflection)305 GrammaticalGenderVisitor(android::StringPool* pool, uint8_t grammaticalInflection)
306 : pool_(pool), grammaticalInflection_(grammaticalInflection) {
307 }
308
Visit(Plural * plural)309 void Visit(Plural* plural) override {
310 CloningValueTransformer cloner(pool_);
311 std::unique_ptr<Plural> grammatical_gendered = util::make_unique<Plural>();
312 for (size_t i = 0; i < plural->values.size(); i++) {
313 if (plural->values[i]) {
314 GrammaticalGenderVisitor sub_visitor(pool_, grammaticalInflection_);
315 plural->values[i]->Accept(&sub_visitor);
316 if (sub_visitor.item) {
317 grammatical_gendered->values[i] = std::move(sub_visitor.item);
318 } else {
319 grammatical_gendered->values[i] = plural->values[i]->Transform(cloner);
320 }
321 }
322 }
323 grammatical_gendered->SetSource(plural->GetSource());
324 grammatical_gendered->SetWeak(true);
325 value = std::move(grammatical_gendered);
326 }
327
AddGrammaticalGenderPrefix(const std::string_view & original_string)328 std::string AddGrammaticalGenderPrefix(const std::string_view& original_string) {
329 std::string result;
330 switch (grammaticalInflection_) {
331 case android::ResTable_config::GRAMMATICAL_GENDER_MASCULINE:
332 result = std::string("(M)") + std::string(original_string);
333 break;
334 case android::ResTable_config::GRAMMATICAL_GENDER_FEMININE:
335 result = std::string("(F)") + std::string(original_string);
336 break;
337 case android::ResTable_config::GRAMMATICAL_GENDER_NEUTER:
338 result = std::string("(N)") + std::string(original_string);
339 break;
340 default:
341 result = std::string(original_string);
342 break;
343 }
344 return result;
345 }
346
Visit(String * string)347 void Visit(String* string) override {
348 std::string prefixed_string = AddGrammaticalGenderPrefix(std::string(*string->value));
349 std::unique_ptr<String> grammatical_gendered =
350 util::make_unique<String>(pool_->MakeRef(prefixed_string));
351 grammatical_gendered->SetSource(string->GetSource());
352 grammatical_gendered->SetWeak(true);
353 item = std::move(grammatical_gendered);
354 }
355
Visit(StyledString * string)356 void Visit(StyledString* string) override {
357 std::string prefixed_string = AddGrammaticalGenderPrefix(std::string(string->value->value));
358 android::StyleString new_string;
359 new_string.str = std::move(prefixed_string);
360 for (const android::StringPool::Span& span : string->value->spans) {
361 new_string.spans.emplace_back(android::Span{*span.name, span.first_char, span.last_char});
362 }
363 std::unique_ptr<StyledString> grammatical_gendered =
364 util::make_unique<StyledString>(pool_->MakeRef(new_string));
365 grammatical_gendered->SetSource(string->GetSource());
366 grammatical_gendered->SetWeak(true);
367 item = std::move(grammatical_gendered);
368 }
369
370 private:
371 DISALLOW_COPY_AND_ASSIGN(GrammaticalGenderVisitor);
372 android::StringPool* pool_;
373 uint8_t grammaticalInflection_;
374 };
375
ModifyConfigForPseudoLocale(const ConfigDescription & base,Pseudolocalizer::Method m,uint8_t grammaticalInflection)376 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
377 Pseudolocalizer::Method m,
378 uint8_t grammaticalInflection) {
379 ConfigDescription modified = base;
380 switch (m) {
381 case Pseudolocalizer::Method::kAccent:
382 modified.language[0] = 'e';
383 modified.language[1] = 'n';
384 modified.country[0] = 'X';
385 modified.country[1] = 'A';
386 break;
387
388 case Pseudolocalizer::Method::kBidi:
389 modified.language[0] = 'a';
390 modified.language[1] = 'r';
391 modified.country[0] = 'X';
392 modified.country[1] = 'B';
393 break;
394 default:
395 break;
396 }
397 modified.grammaticalInflection = grammaticalInflection;
398 return modified;
399 }
400
GrammaticalGender(ResourceConfigValue * original_value,ResourceConfigValue * localized_config_value,android::StringPool * pool,ResourceEntry * entry,const Pseudolocalizer::Method method,uint8_t grammaticalInflection)401 void GrammaticalGender(ResourceConfigValue* original_value,
402 ResourceConfigValue* localized_config_value, android::StringPool* pool,
403 ResourceEntry* entry, const Pseudolocalizer::Method method,
404 uint8_t grammaticalInflection) {
405 GrammaticalGenderVisitor visitor(pool, grammaticalInflection);
406 localized_config_value->value->Accept(&visitor);
407
408 std::unique_ptr<Value> grammatical_gendered_value;
409 if (visitor.value) {
410 grammatical_gendered_value = std::move(visitor.value);
411 } else if (visitor.item) {
412 grammatical_gendered_value = std::move(visitor.item);
413 }
414 if (!grammatical_gendered_value) {
415 return;
416 }
417
418 ConfigDescription config =
419 ModifyConfigForPseudoLocale(original_value->config, method, grammaticalInflection);
420
421 ResourceConfigValue* grammatical_gendered_config_value =
422 entry->FindOrCreateValue(config, original_value->product);
423 if (!grammatical_gendered_config_value->value) {
424 // Only use auto-generated pseudo-localization if none is defined.
425 grammatical_gendered_config_value->value = std::move(grammatical_gendered_value);
426 }
427 }
428
429 const uint32_t MASK_MASCULINE = 1; // Bit mask for masculine
430 const uint32_t MASK_FEMININE = 2; // Bit mask for feminine
431 const uint32_t MASK_NEUTER = 4; // Bit mask for neuter
432
GrammaticalGenderIfNeeded(ResourceConfigValue * original_value,ResourceConfigValue * new_value,android::StringPool * pool,ResourceEntry * entry,const Pseudolocalizer::Method method,uint32_t gender_state)433 void GrammaticalGenderIfNeeded(ResourceConfigValue* original_value, ResourceConfigValue* new_value,
434 android::StringPool* pool, ResourceEntry* entry,
435 const Pseudolocalizer::Method method, uint32_t gender_state) {
436 if (gender_state & MASK_FEMININE) {
437 GrammaticalGender(original_value, new_value, pool, entry, method,
438 android::ResTable_config::GRAMMATICAL_GENDER_FEMININE);
439 }
440
441 if (gender_state & MASK_MASCULINE) {
442 GrammaticalGender(original_value, new_value, pool, entry, method,
443 android::ResTable_config::GRAMMATICAL_GENDER_MASCULINE);
444 }
445
446 if (gender_state & MASK_NEUTER) {
447 GrammaticalGender(original_value, new_value, pool, entry, method,
448 android::ResTable_config::GRAMMATICAL_GENDER_NEUTER);
449 }
450 }
451
PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,ResourceConfigValue * original_value,android::StringPool * pool,ResourceEntry * entry,uint32_t gender_state,bool gender_flag)452 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
453 ResourceConfigValue* original_value, android::StringPool* pool,
454 ResourceEntry* entry, uint32_t gender_state, bool gender_flag) {
455 Visitor visitor(pool, method);
456 original_value->value->Accept(&visitor);
457
458 std::unique_ptr<Value> localized_value;
459 if (visitor.value) {
460 localized_value = std::move(visitor.value);
461 } else if (visitor.item) {
462 localized_value = std::move(visitor.item);
463 }
464
465 if (!localized_value) {
466 return;
467 }
468
469 ConfigDescription config_with_accent = ModifyConfigForPseudoLocale(
470 original_value->config, method, android::ResTable_config::GRAMMATICAL_GENDER_ANY);
471
472 ResourceConfigValue* new_config_value =
473 entry->FindOrCreateValue(config_with_accent, original_value->product);
474 if (!new_config_value->value) {
475 // Only use auto-generated pseudo-localization if none is defined.
476 new_config_value->value = std::move(localized_value);
477 }
478 if (gender_flag) {
479 GrammaticalGenderIfNeeded(original_value, new_config_value, pool, entry, method, gender_state);
480 }
481 }
482
483 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
484 // translatable.
IsPseudolocalizable(ResourceConfigValue * config_value)485 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
486 const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
487 if (diff & ConfigDescription::CONFIG_LOCALE) {
488 return false;
489 }
490 return config_value->value->IsTranslatable();
491 }
492
493 } // namespace
494
ParseGenderValuesAndSaveState(const std::string & grammatical_gender_values,uint32_t * gender_state,android::IDiagnostics * diag)495 bool ParseGenderValuesAndSaveState(const std::string& grammatical_gender_values,
496 uint32_t* gender_state, android::IDiagnostics* diag) {
497 std::vector<std::string> values = util::SplitAndLowercase(grammatical_gender_values, ',');
498 for (size_t i = 0; i < values.size(); i++) {
499 if (values[i].length() != 0) {
500 if (values[i] == "f") {
501 *gender_state |= MASK_FEMININE;
502 } else if (values[i] == "m") {
503 *gender_state |= MASK_MASCULINE;
504 } else if (values[i] == "n") {
505 *gender_state |= MASK_NEUTER;
506 } else {
507 diag->Error(android::DiagMessage() << "Invalid grammatical gender value: " << values[i]);
508 return false;
509 }
510 }
511 }
512 return true;
513 }
514
ParseGenderRatio(const std::string & grammatical_gender_ratio,float * gender_ratio,android::IDiagnostics * diag)515 bool ParseGenderRatio(const std::string& grammatical_gender_ratio, float* gender_ratio,
516 android::IDiagnostics* diag) {
517 const char* input = grammatical_gender_ratio.c_str();
518 char* endPtr;
519 errno = 0;
520 *gender_ratio = strtof(input, &endPtr);
521 if (endPtr == input || *endPtr != '\0' || errno == ERANGE || *gender_ratio < 0 ||
522 *gender_ratio > 1) {
523 diag->Error(android::DiagMessage()
524 << "Invalid grammatical gender ratio: " << grammatical_gender_ratio
525 << ", must be a real number between 0 and 1");
526 return false;
527 }
528 return true;
529 }
530
Consume(IAaptContext * context,ResourceTable * table)531 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
532 uint32_t gender_state = 0;
533 if (!ParseGenderValuesAndSaveState(grammatical_gender_values_, &gender_state,
534 context->GetDiagnostics())) {
535 return false;
536 }
537
538 float gender_ratio = 0;
539 if (!ParseGenderRatio(grammatical_gender_ratio_, &gender_ratio, context->GetDiagnostics())) {
540 return false;
541 }
542
543 std::random_device rd;
544 std::mt19937 gen(rd());
545 std::uniform_real_distribution<> distrib(0.0, 1.0);
546
547 for (auto& package : table->packages) {
548 for (auto& type : package->types) {
549 for (auto& entry : type->entries) {
550 bool gender_flag = false;
551 if (distrib(gen) < gender_ratio) {
552 gender_flag = true;
553 }
554 std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
555 for (ResourceConfigValue* value : values) {
556 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
557 entry.get(), gender_state, gender_flag);
558 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
559 entry.get(), gender_state, gender_flag);
560 }
561 }
562 }
563 }
564 return true;
565 }
566
567 } // namespace aapt
568