xref: /aosp_15_r20/frameworks/base/tools/aapt2/compile/PseudolocaleGenerator.cpp (revision d57664e9bc4670b3ecf6748a746a57c557b6bc9e)
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "compile/PseudolocaleGenerator.h"
18 
19 #include <stdint.h>
20 
21 #include <algorithm>
22 #include <random>
23 
24 #include "ResourceTable.h"
25 #include "ResourceValues.h"
26 #include "ValueVisitor.h"
27 #include "androidfw/ResourceTypes.h"
28 #include "androidfw/Util.h"
29 #include "compile/Pseudolocalizer.h"
30 #include "util/Util.h"
31 
32 using ::android::ConfigDescription;
33 using ::android::StringPiece;
34 using ::android::StringPiece16;
35 
36 namespace aapt {
37 
38 // The struct that represents both Span objects and UntranslatableSections.
39 struct UnifiedSpan {
40   // Only present for Span objects. If not present, this was an UntranslatableSection.
41   std::optional<std::string> tag;
42 
43   // The UTF-16 index into the string where this span starts.
44   uint32_t first_char;
45 
46   // The UTF-16 index into the string where this span ends, inclusive.
47   uint32_t last_char;
48 };
49 
operator <(const UnifiedSpan & left,const UnifiedSpan & right)50 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
51   if (left.first_char < right.first_char) {
52     return true;
53   } else if (left.first_char > right.first_char) {
54     return false;
55   } else if (left.last_char < right.last_char) {
56     return true;
57   }
58   return false;
59 }
60 
SpanToUnifiedSpan(const android::StringPool::Span & span)61 inline static UnifiedSpan SpanToUnifiedSpan(const android::StringPool::Span& span) {
62   return UnifiedSpan{*span.name, span.first_char, span.last_char};
63 }
64 
UntranslatableSectionToUnifiedSpan(const UntranslatableSection & section)65 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
66   return UnifiedSpan{
67       {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
68 }
69 
70 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
71 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
MergeSpans(const StyledString & string)72 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
73   // Ensure the Spans are sorted and converted.
74   std::vector<UnifiedSpan> sorted_spans;
75   sorted_spans.reserve(string.value->spans.size());
76   std::transform(string.value->spans.begin(), string.value->spans.end(),
77                  std::back_inserter(sorted_spans), SpanToUnifiedSpan);
78 
79   // Stable sort to ensure tag sequences like "<b><i>" are preserved.
80   std::stable_sort(sorted_spans.begin(), sorted_spans.end());
81 
82   // Ensure the UntranslatableSections are sorted and converted.
83   std::vector<UnifiedSpan> sorted_untranslatable_sections;
84   sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
85   std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
86                  std::back_inserter(sorted_untranslatable_sections),
87                  UntranslatableSectionToUnifiedSpan);
88   std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
89 
90   std::vector<UnifiedSpan> merged_spans;
91   merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
92   auto span_iter = sorted_spans.begin();
93   auto untranslatable_iter = sorted_untranslatable_sections.begin();
94   while (span_iter != sorted_spans.end() &&
95          untranslatable_iter != sorted_untranslatable_sections.end()) {
96     if (*span_iter < *untranslatable_iter) {
97       merged_spans.push_back(std::move(*span_iter));
98       ++span_iter;
99     } else {
100       merged_spans.push_back(std::move(*untranslatable_iter));
101       ++untranslatable_iter;
102     }
103   }
104 
105   while (span_iter != sorted_spans.end()) {
106     merged_spans.push_back(std::move(*span_iter));
107     ++span_iter;
108   }
109 
110   while (untranslatable_iter != sorted_untranslatable_sections.end()) {
111     merged_spans.push_back(std::move(*untranslatable_iter));
112     ++untranslatable_iter;
113   }
114   return merged_spans;
115 }
116 
PseudolocalizeStyledString(StyledString * string,Pseudolocalizer::Method method,android::StringPool * pool)117 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
118                                                          Pseudolocalizer::Method method,
119                                                          android::StringPool* pool) {
120   Pseudolocalizer localizer(method);
121 
122   // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
123   // This will effectively subdivide the string into multiple sections that can be individually
124   // pseudolocalized, while keeping the span indices synchronized.
125   std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
126 
127   // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
128   // runtime. So we will do all our processing in UTF-16, then convert back.
129   const std::u16string text16 = android::util::Utf8ToUtf16(string->value->value);
130 
131   // Convenient wrapper around the text that allows us to work with StringPieces.
132   const StringPiece16 text(text16);
133 
134   // The new string.
135   std::string new_string = localizer.Start();
136 
137   // The stack that keeps track of what nested Span we're in.
138   std::vector<size_t> span_stack;
139 
140   // The current position in the original text.
141   uint32_t cursor = 0u;
142 
143   // The current position in the new text.
144   uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
145                                              new_string.size(), false);
146 
147   // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
148   bool translatable = true;
149   size_t span_idx = 0u;
150   while (span_idx < merged_spans.size() || !span_stack.empty()) {
151     UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
152     UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
153 
154     if (span != nullptr) {
155       if (parent_span == nullptr || parent_span->last_char > span->first_char) {
156         // There is no parent, or this span is the child of the parent.
157         // Pseudolocalize all the text until this span.
158         const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
159         cursor += substr.size();
160 
161         // Pseudolocalize the substring.
162         std::string new_substr = android::util::Utf16ToUtf8(substr);
163         if (translatable) {
164           new_substr = localizer.Text(new_substr);
165         }
166         new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
167                                            new_substr.size(), false);
168         new_string += new_substr;
169 
170         // Rewrite the first_char.
171         span->first_char = new_cursor;
172         if (!span->tag) {
173           // An untranslatable section has begun!
174           translatable = false;
175         }
176         span_stack.push_back(span_idx);
177         ++span_idx;
178         continue;
179       }
180     }
181 
182     if (parent_span != nullptr) {
183       // There is a parent, and either this span is not a child of it, or there are no more spans.
184       // Pop this off the stack.
185       const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
186       cursor += substr.size();
187 
188       // Pseudolocalize the substring.
189       std::string new_substr = android::util::Utf16ToUtf8(substr);
190       if (translatable) {
191         new_substr = localizer.Text(new_substr);
192       }
193       new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
194                                          new_substr.size(), false);
195       new_string += new_substr;
196 
197       parent_span->last_char = new_cursor - 1;
198       if (parent_span->tag) {
199         // An end to an untranslatable section.
200         translatable = true;
201       }
202       span_stack.pop_back();
203     }
204   }
205 
206   // Finish the pseudolocalization at the end of the string.
207   new_string +=
208       localizer.Text(android::util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
209   new_string += localizer.End();
210 
211   android::StyleString localized;
212   localized.str = std::move(new_string);
213 
214   // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
215   for (UnifiedSpan& span : merged_spans) {
216     if (span.tag) {
217       localized.spans.push_back(
218           android::Span{std::move(span.tag.value()), span.first_char, span.last_char});
219     }
220   }
221   return util::make_unique<StyledString>(pool->MakeRef(localized));
222 }
223 
224 namespace {
225 
226 class Visitor : public ValueVisitor {
227  public:
228   // Either value or item will be populated upon visiting the value.
229   std::unique_ptr<Value> value;
230   std::unique_ptr<Item> item;
231 
Visitor(android::StringPool * pool,Pseudolocalizer::Method method)232   Visitor(android::StringPool* pool, Pseudolocalizer::Method method)
233       : pool_(pool), method_(method), localizer_(method) {
234   }
235 
Visit(Plural * plural)236   void Visit(Plural* plural) override {
237     CloningValueTransformer cloner(pool_);
238     std::unique_ptr<Plural> localized = util::make_unique<Plural>();
239     for (size_t i = 0; i < plural->values.size(); i++) {
240       Visitor sub_visitor(pool_, method_);
241       if (plural->values[i]) {
242         plural->values[i]->Accept(&sub_visitor);
243         if (sub_visitor.item) {
244           localized->values[i] = std::move(sub_visitor.item);
245         } else {
246           localized->values[i] = plural->values[i]->Transform(cloner);
247         }
248       }
249     }
250     localized->SetSource(plural->GetSource());
251     localized->SetWeak(true);
252     value = std::move(localized);
253   }
254 
Visit(String * string)255   void Visit(String* string) override {
256     const StringPiece original_string = *string->value;
257     std::string result = localizer_.Start();
258 
259     // Pseudolocalize only the translatable sections.
260     size_t start = 0u;
261     for (const UntranslatableSection& section : string->untranslatable_sections) {
262       // Pseudolocalize the content before the untranslatable section.
263       const size_t len = section.start - start;
264       if (len > 0u) {
265         result += localizer_.Text(original_string.substr(start, len));
266       }
267 
268       // Copy the untranslatable content.
269       result += original_string.substr(section.start, section.end - section.start);
270       start = section.end;
271     }
272 
273     // Pseudolocalize the content after the last untranslatable section.
274     if (start != original_string.size()) {
275       const size_t len = original_string.size() - start;
276       result += localizer_.Text(original_string.substr(start, len));
277     }
278     result += localizer_.End();
279 
280     std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
281     localized->SetSource(string->GetSource());
282     localized->SetWeak(true);
283     item = std::move(localized);
284   }
285 
Visit(StyledString * string)286   void Visit(StyledString* string) override {
287     item = PseudolocalizeStyledString(string, method_, pool_);
288     item->SetSource(string->GetSource());
289     item->SetWeak(true);
290   }
291 
292  private:
293   DISALLOW_COPY_AND_ASSIGN(Visitor);
294 
295   android::StringPool* pool_;
296   Pseudolocalizer::Method method_;
297   Pseudolocalizer localizer_;
298 };
299 
300 class GrammaticalGenderVisitor : public ValueVisitor {
301  public:
302   std::unique_ptr<Value> value;
303   std::unique_ptr<Item> item;
304 
GrammaticalGenderVisitor(android::StringPool * pool,uint8_t grammaticalInflection)305   GrammaticalGenderVisitor(android::StringPool* pool, uint8_t grammaticalInflection)
306       : pool_(pool), grammaticalInflection_(grammaticalInflection) {
307   }
308 
Visit(Plural * plural)309   void Visit(Plural* plural) override {
310     CloningValueTransformer cloner(pool_);
311     std::unique_ptr<Plural> grammatical_gendered = util::make_unique<Plural>();
312     for (size_t i = 0; i < plural->values.size(); i++) {
313       if (plural->values[i]) {
314         GrammaticalGenderVisitor sub_visitor(pool_, grammaticalInflection_);
315         plural->values[i]->Accept(&sub_visitor);
316         if (sub_visitor.item) {
317           grammatical_gendered->values[i] = std::move(sub_visitor.item);
318         } else {
319           grammatical_gendered->values[i] = plural->values[i]->Transform(cloner);
320         }
321       }
322     }
323     grammatical_gendered->SetSource(plural->GetSource());
324     grammatical_gendered->SetWeak(true);
325     value = std::move(grammatical_gendered);
326   }
327 
AddGrammaticalGenderPrefix(const std::string_view & original_string)328   std::string AddGrammaticalGenderPrefix(const std::string_view& original_string) {
329     std::string result;
330     switch (grammaticalInflection_) {
331       case android::ResTable_config::GRAMMATICAL_GENDER_MASCULINE:
332         result = std::string("(M)") + std::string(original_string);
333         break;
334       case android::ResTable_config::GRAMMATICAL_GENDER_FEMININE:
335         result = std::string("(F)") + std::string(original_string);
336         break;
337       case android::ResTable_config::GRAMMATICAL_GENDER_NEUTER:
338         result = std::string("(N)") + std::string(original_string);
339         break;
340       default:
341         result = std::string(original_string);
342         break;
343     }
344     return result;
345   }
346 
Visit(String * string)347   void Visit(String* string) override {
348     std::string prefixed_string = AddGrammaticalGenderPrefix(std::string(*string->value));
349     std::unique_ptr<String> grammatical_gendered =
350         util::make_unique<String>(pool_->MakeRef(prefixed_string));
351     grammatical_gendered->SetSource(string->GetSource());
352     grammatical_gendered->SetWeak(true);
353     item = std::move(grammatical_gendered);
354   }
355 
Visit(StyledString * string)356   void Visit(StyledString* string) override {
357     std::string prefixed_string = AddGrammaticalGenderPrefix(std::string(string->value->value));
358     android::StyleString new_string;
359     new_string.str = std::move(prefixed_string);
360     for (const android::StringPool::Span& span : string->value->spans) {
361       new_string.spans.emplace_back(android::Span{*span.name, span.first_char, span.last_char});
362     }
363     std::unique_ptr<StyledString> grammatical_gendered =
364         util::make_unique<StyledString>(pool_->MakeRef(new_string));
365     grammatical_gendered->SetSource(string->GetSource());
366     grammatical_gendered->SetWeak(true);
367     item = std::move(grammatical_gendered);
368   }
369 
370  private:
371   DISALLOW_COPY_AND_ASSIGN(GrammaticalGenderVisitor);
372   android::StringPool* pool_;
373   uint8_t grammaticalInflection_;
374 };
375 
ModifyConfigForPseudoLocale(const ConfigDescription & base,Pseudolocalizer::Method m,uint8_t grammaticalInflection)376 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
377                                               Pseudolocalizer::Method m,
378                                               uint8_t grammaticalInflection) {
379   ConfigDescription modified = base;
380   switch (m) {
381     case Pseudolocalizer::Method::kAccent:
382       modified.language[0] = 'e';
383       modified.language[1] = 'n';
384       modified.country[0] = 'X';
385       modified.country[1] = 'A';
386       break;
387 
388     case Pseudolocalizer::Method::kBidi:
389       modified.language[0] = 'a';
390       modified.language[1] = 'r';
391       modified.country[0] = 'X';
392       modified.country[1] = 'B';
393       break;
394     default:
395       break;
396   }
397   modified.grammaticalInflection = grammaticalInflection;
398   return modified;
399 }
400 
GrammaticalGender(ResourceConfigValue * original_value,ResourceConfigValue * localized_config_value,android::StringPool * pool,ResourceEntry * entry,const Pseudolocalizer::Method method,uint8_t grammaticalInflection)401 void GrammaticalGender(ResourceConfigValue* original_value,
402                        ResourceConfigValue* localized_config_value, android::StringPool* pool,
403                        ResourceEntry* entry, const Pseudolocalizer::Method method,
404                        uint8_t grammaticalInflection) {
405   GrammaticalGenderVisitor visitor(pool, grammaticalInflection);
406   localized_config_value->value->Accept(&visitor);
407 
408   std::unique_ptr<Value> grammatical_gendered_value;
409   if (visitor.value) {
410     grammatical_gendered_value = std::move(visitor.value);
411   } else if (visitor.item) {
412     grammatical_gendered_value = std::move(visitor.item);
413   }
414   if (!grammatical_gendered_value) {
415     return;
416   }
417 
418   ConfigDescription config =
419       ModifyConfigForPseudoLocale(original_value->config, method, grammaticalInflection);
420 
421   ResourceConfigValue* grammatical_gendered_config_value =
422       entry->FindOrCreateValue(config, original_value->product);
423   if (!grammatical_gendered_config_value->value) {
424     // Only use auto-generated pseudo-localization if none is defined.
425     grammatical_gendered_config_value->value = std::move(grammatical_gendered_value);
426   }
427 }
428 
429 const uint32_t MASK_MASCULINE = 1;  // Bit mask for masculine
430 const uint32_t MASK_FEMININE = 2;   // Bit mask for feminine
431 const uint32_t MASK_NEUTER = 4;     // Bit mask for neuter
432 
GrammaticalGenderIfNeeded(ResourceConfigValue * original_value,ResourceConfigValue * new_value,android::StringPool * pool,ResourceEntry * entry,const Pseudolocalizer::Method method,uint32_t gender_state)433 void GrammaticalGenderIfNeeded(ResourceConfigValue* original_value, ResourceConfigValue* new_value,
434                                android::StringPool* pool, ResourceEntry* entry,
435                                const Pseudolocalizer::Method method, uint32_t gender_state) {
436   if (gender_state & MASK_FEMININE) {
437     GrammaticalGender(original_value, new_value, pool, entry, method,
438                       android::ResTable_config::GRAMMATICAL_GENDER_FEMININE);
439   }
440 
441   if (gender_state & MASK_MASCULINE) {
442     GrammaticalGender(original_value, new_value, pool, entry, method,
443                       android::ResTable_config::GRAMMATICAL_GENDER_MASCULINE);
444   }
445 
446   if (gender_state & MASK_NEUTER) {
447     GrammaticalGender(original_value, new_value, pool, entry, method,
448                       android::ResTable_config::GRAMMATICAL_GENDER_NEUTER);
449   }
450 }
451 
PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,ResourceConfigValue * original_value,android::StringPool * pool,ResourceEntry * entry,uint32_t gender_state,bool gender_flag)452 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
453                             ResourceConfigValue* original_value, android::StringPool* pool,
454                             ResourceEntry* entry, uint32_t gender_state, bool gender_flag) {
455   Visitor visitor(pool, method);
456   original_value->value->Accept(&visitor);
457 
458   std::unique_ptr<Value> localized_value;
459   if (visitor.value) {
460     localized_value = std::move(visitor.value);
461   } else if (visitor.item) {
462     localized_value = std::move(visitor.item);
463   }
464 
465   if (!localized_value) {
466     return;
467   }
468 
469   ConfigDescription config_with_accent = ModifyConfigForPseudoLocale(
470       original_value->config, method, android::ResTable_config::GRAMMATICAL_GENDER_ANY);
471 
472   ResourceConfigValue* new_config_value =
473       entry->FindOrCreateValue(config_with_accent, original_value->product);
474   if (!new_config_value->value) {
475     // Only use auto-generated pseudo-localization if none is defined.
476     new_config_value->value = std::move(localized_value);
477   }
478   if (gender_flag) {
479     GrammaticalGenderIfNeeded(original_value, new_config_value, pool, entry, method, gender_state);
480   }
481 }
482 
483 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
484 // translatable.
IsPseudolocalizable(ResourceConfigValue * config_value)485 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
486   const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
487   if (diff & ConfigDescription::CONFIG_LOCALE) {
488     return false;
489   }
490   return config_value->value->IsTranslatable();
491 }
492 
493 }  // namespace
494 
ParseGenderValuesAndSaveState(const std::string & grammatical_gender_values,uint32_t * gender_state,android::IDiagnostics * diag)495 bool ParseGenderValuesAndSaveState(const std::string& grammatical_gender_values,
496                                    uint32_t* gender_state, android::IDiagnostics* diag) {
497   std::vector<std::string> values = util::SplitAndLowercase(grammatical_gender_values, ',');
498   for (size_t i = 0; i < values.size(); i++) {
499     if (values[i].length() != 0) {
500       if (values[i] == "f") {
501         *gender_state |= MASK_FEMININE;
502       } else if (values[i] == "m") {
503         *gender_state |= MASK_MASCULINE;
504       } else if (values[i] == "n") {
505         *gender_state |= MASK_NEUTER;
506       } else {
507         diag->Error(android::DiagMessage() << "Invalid grammatical gender value: " << values[i]);
508         return false;
509       }
510     }
511   }
512   return true;
513 }
514 
ParseGenderRatio(const std::string & grammatical_gender_ratio,float * gender_ratio,android::IDiagnostics * diag)515 bool ParseGenderRatio(const std::string& grammatical_gender_ratio, float* gender_ratio,
516                       android::IDiagnostics* diag) {
517   const char* input = grammatical_gender_ratio.c_str();
518   char* endPtr;
519   errno = 0;
520   *gender_ratio = strtof(input, &endPtr);
521   if (endPtr == input || *endPtr != '\0' || errno == ERANGE || *gender_ratio < 0 ||
522       *gender_ratio > 1) {
523     diag->Error(android::DiagMessage()
524                 << "Invalid grammatical gender ratio: " << grammatical_gender_ratio
525                 << ", must be a real number between 0 and 1");
526     return false;
527   }
528   return true;
529 }
530 
Consume(IAaptContext * context,ResourceTable * table)531 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
532   uint32_t gender_state = 0;
533   if (!ParseGenderValuesAndSaveState(grammatical_gender_values_, &gender_state,
534                                      context->GetDiagnostics())) {
535     return false;
536   }
537 
538   float gender_ratio = 0;
539   if (!ParseGenderRatio(grammatical_gender_ratio_, &gender_ratio, context->GetDiagnostics())) {
540     return false;
541   }
542 
543   std::random_device rd;
544   std::mt19937 gen(rd());
545   std::uniform_real_distribution<> distrib(0.0, 1.0);
546 
547   for (auto& package : table->packages) {
548     for (auto& type : package->types) {
549       for (auto& entry : type->entries) {
550         bool gender_flag = false;
551         if (distrib(gen) < gender_ratio) {
552           gender_flag = true;
553         }
554         std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
555         for (ResourceConfigValue* value : values) {
556           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
557                                  entry.get(), gender_state, gender_flag);
558           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
559                                  entry.get(), gender_state, gender_flag);
560         }
561       }
562     }
563   }
564   return true;
565 }
566 
567 }  // namespace aapt
568