xref: /aosp_15_r20/frameworks/base/tools/aapt/pseudolocalize.cpp (revision d57664e9bc4670b3ecf6748a746a57c557b6bc9e)
1*d57664e9SAndroid Build Coastguard Worker #include "pseudolocalize.h"
2*d57664e9SAndroid Build Coastguard Worker 
3*d57664e9SAndroid Build Coastguard Worker using namespace std;
4*d57664e9SAndroid Build Coastguard Worker 
5*d57664e9SAndroid Build Coastguard Worker // String basis to generate expansion
6*d57664e9SAndroid Build Coastguard Worker static const String16 k_expansion_string = String16("one two three "
7*d57664e9SAndroid Build Coastguard Worker     "four five six seven eight nine ten eleven twelve thirteen "
8*d57664e9SAndroid Build Coastguard Worker     "fourteen fiveteen sixteen seventeen nineteen twenty");
9*d57664e9SAndroid Build Coastguard Worker 
10*d57664e9SAndroid Build Coastguard Worker // Special unicode characters to override directionality of the words
11*d57664e9SAndroid Build Coastguard Worker static const String16 k_rlm = String16("\xe2\x80\x8f");
12*d57664e9SAndroid Build Coastguard Worker static const String16 k_rlo = String16("\xE2\x80\xae");
13*d57664e9SAndroid Build Coastguard Worker static const String16 k_pdf = String16("\xE2\x80\xac");
14*d57664e9SAndroid Build Coastguard Worker 
15*d57664e9SAndroid Build Coastguard Worker // Placeholder marks
16*d57664e9SAndroid Build Coastguard Worker static const String16 k_placeholder_open = String16("\xc2\xbb");
17*d57664e9SAndroid Build Coastguard Worker static const String16 k_placeholder_close = String16("\xc2\xab");
18*d57664e9SAndroid Build Coastguard Worker 
19*d57664e9SAndroid Build Coastguard Worker static const char16_t k_arg_start = '{';
20*d57664e9SAndroid Build Coastguard Worker static const char16_t k_arg_end = '}';
21*d57664e9SAndroid Build Coastguard Worker 
Pseudolocalizer(PseudolocalizationMethod m)22*d57664e9SAndroid Build Coastguard Worker Pseudolocalizer::Pseudolocalizer(PseudolocalizationMethod m)
23*d57664e9SAndroid Build Coastguard Worker     : mImpl(nullptr), mLastDepth(0) {
24*d57664e9SAndroid Build Coastguard Worker   setMethod(m);
25*d57664e9SAndroid Build Coastguard Worker }
26*d57664e9SAndroid Build Coastguard Worker 
setMethod(PseudolocalizationMethod m)27*d57664e9SAndroid Build Coastguard Worker void Pseudolocalizer::setMethod(PseudolocalizationMethod m) {
28*d57664e9SAndroid Build Coastguard Worker   if (mImpl) {
29*d57664e9SAndroid Build Coastguard Worker     delete mImpl;
30*d57664e9SAndroid Build Coastguard Worker   }
31*d57664e9SAndroid Build Coastguard Worker   if (m == PSEUDO_ACCENTED) {
32*d57664e9SAndroid Build Coastguard Worker     mImpl = new PseudoMethodAccent();
33*d57664e9SAndroid Build Coastguard Worker   } else if (m == PSEUDO_BIDI) {
34*d57664e9SAndroid Build Coastguard Worker     mImpl = new PseudoMethodBidi();
35*d57664e9SAndroid Build Coastguard Worker   } else {
36*d57664e9SAndroid Build Coastguard Worker     mImpl = new PseudoMethodNone();
37*d57664e9SAndroid Build Coastguard Worker   }
38*d57664e9SAndroid Build Coastguard Worker }
39*d57664e9SAndroid Build Coastguard Worker 
text(const String16 & text)40*d57664e9SAndroid Build Coastguard Worker String16 Pseudolocalizer::text(const String16& text) {
41*d57664e9SAndroid Build Coastguard Worker   String16 out;
42*d57664e9SAndroid Build Coastguard Worker   size_t depth = mLastDepth;
43*d57664e9SAndroid Build Coastguard Worker   size_t lastpos, pos;
44*d57664e9SAndroid Build Coastguard Worker   const size_t length= text.size();
45*d57664e9SAndroid Build Coastguard Worker   const char16_t* str = text.c_str();
46*d57664e9SAndroid Build Coastguard Worker   bool escaped = false;
47*d57664e9SAndroid Build Coastguard Worker   for (lastpos = pos = 0; pos < length; pos++) {
48*d57664e9SAndroid Build Coastguard Worker     char16_t c = str[pos];
49*d57664e9SAndroid Build Coastguard Worker     if (escaped) {
50*d57664e9SAndroid Build Coastguard Worker       escaped = false;
51*d57664e9SAndroid Build Coastguard Worker       continue;
52*d57664e9SAndroid Build Coastguard Worker     }
53*d57664e9SAndroid Build Coastguard Worker     if (c == '\'') {
54*d57664e9SAndroid Build Coastguard Worker       escaped = true;
55*d57664e9SAndroid Build Coastguard Worker       continue;
56*d57664e9SAndroid Build Coastguard Worker     }
57*d57664e9SAndroid Build Coastguard Worker 
58*d57664e9SAndroid Build Coastguard Worker     if (c == k_arg_start) {
59*d57664e9SAndroid Build Coastguard Worker       depth++;
60*d57664e9SAndroid Build Coastguard Worker     } else if (c == k_arg_end && depth) {
61*d57664e9SAndroid Build Coastguard Worker       depth--;
62*d57664e9SAndroid Build Coastguard Worker     }
63*d57664e9SAndroid Build Coastguard Worker 
64*d57664e9SAndroid Build Coastguard Worker     if (mLastDepth != depth || pos == length - 1) {
65*d57664e9SAndroid Build Coastguard Worker       bool pseudo = ((mLastDepth % 2) == 0);
66*d57664e9SAndroid Build Coastguard Worker       size_t nextpos = pos;
67*d57664e9SAndroid Build Coastguard Worker       if (!pseudo || depth == mLastDepth) {
68*d57664e9SAndroid Build Coastguard Worker         nextpos++;
69*d57664e9SAndroid Build Coastguard Worker       }
70*d57664e9SAndroid Build Coastguard Worker       size_t size = nextpos - lastpos;
71*d57664e9SAndroid Build Coastguard Worker       if (size) {
72*d57664e9SAndroid Build Coastguard Worker         String16 chunk = String16(text, size, lastpos);
73*d57664e9SAndroid Build Coastguard Worker         if (pseudo) {
74*d57664e9SAndroid Build Coastguard Worker           chunk = mImpl->text(chunk);
75*d57664e9SAndroid Build Coastguard Worker         } else if (str[lastpos] == k_arg_start &&
76*d57664e9SAndroid Build Coastguard Worker                    str[nextpos - 1] == k_arg_end) {
77*d57664e9SAndroid Build Coastguard Worker           chunk = mImpl->placeholder(chunk);
78*d57664e9SAndroid Build Coastguard Worker         }
79*d57664e9SAndroid Build Coastguard Worker         out.append(chunk);
80*d57664e9SAndroid Build Coastguard Worker       }
81*d57664e9SAndroid Build Coastguard Worker       if (pseudo && depth < mLastDepth) { // End of message
82*d57664e9SAndroid Build Coastguard Worker         out.append(mImpl->end());
83*d57664e9SAndroid Build Coastguard Worker       } else if (!pseudo && depth > mLastDepth) { // Start of message
84*d57664e9SAndroid Build Coastguard Worker         out.append(mImpl->start());
85*d57664e9SAndroid Build Coastguard Worker       }
86*d57664e9SAndroid Build Coastguard Worker       lastpos = nextpos;
87*d57664e9SAndroid Build Coastguard Worker       mLastDepth = depth;
88*d57664e9SAndroid Build Coastguard Worker     }
89*d57664e9SAndroid Build Coastguard Worker   }
90*d57664e9SAndroid Build Coastguard Worker   return out;
91*d57664e9SAndroid Build Coastguard Worker }
92*d57664e9SAndroid Build Coastguard Worker 
93*d57664e9SAndroid Build Coastguard Worker static const char*
pseudolocalize_char(const char16_t c)94*d57664e9SAndroid Build Coastguard Worker pseudolocalize_char(const char16_t c)
95*d57664e9SAndroid Build Coastguard Worker {
96*d57664e9SAndroid Build Coastguard Worker     switch (c) {
97*d57664e9SAndroid Build Coastguard Worker         case 'a':   return "\xc3\xa5";
98*d57664e9SAndroid Build Coastguard Worker         case 'b':   return "\xc9\x93";
99*d57664e9SAndroid Build Coastguard Worker         case 'c':   return "\xc3\xa7";
100*d57664e9SAndroid Build Coastguard Worker         case 'd':   return "\xc3\xb0";
101*d57664e9SAndroid Build Coastguard Worker         case 'e':   return "\xc3\xa9";
102*d57664e9SAndroid Build Coastguard Worker         case 'f':   return "\xc6\x92";
103*d57664e9SAndroid Build Coastguard Worker         case 'g':   return "\xc4\x9d";
104*d57664e9SAndroid Build Coastguard Worker         case 'h':   return "\xc4\xa5";
105*d57664e9SAndroid Build Coastguard Worker         case 'i':   return "\xc3\xae";
106*d57664e9SAndroid Build Coastguard Worker         case 'j':   return "\xc4\xb5";
107*d57664e9SAndroid Build Coastguard Worker         case 'k':   return "\xc4\xb7";
108*d57664e9SAndroid Build Coastguard Worker         case 'l':   return "\xc4\xbc";
109*d57664e9SAndroid Build Coastguard Worker         case 'm':   return "\xe1\xb8\xbf";
110*d57664e9SAndroid Build Coastguard Worker         case 'n':   return "\xc3\xb1";
111*d57664e9SAndroid Build Coastguard Worker         case 'o':   return "\xc3\xb6";
112*d57664e9SAndroid Build Coastguard Worker         case 'p':   return "\xc3\xbe";
113*d57664e9SAndroid Build Coastguard Worker         case 'q':   return "\x51";
114*d57664e9SAndroid Build Coastguard Worker         case 'r':   return "\xc5\x95";
115*d57664e9SAndroid Build Coastguard Worker         case 's':   return "\xc5\xa1";
116*d57664e9SAndroid Build Coastguard Worker         case 't':   return "\xc5\xa3";
117*d57664e9SAndroid Build Coastguard Worker         case 'u':   return "\xc3\xbb";
118*d57664e9SAndroid Build Coastguard Worker         case 'v':   return "\x56";
119*d57664e9SAndroid Build Coastguard Worker         case 'w':   return "\xc5\xb5";
120*d57664e9SAndroid Build Coastguard Worker         case 'x':   return "\xd1\x85";
121*d57664e9SAndroid Build Coastguard Worker         case 'y':   return "\xc3\xbd";
122*d57664e9SAndroid Build Coastguard Worker         case 'z':   return "\xc5\xbe";
123*d57664e9SAndroid Build Coastguard Worker         case 'A':   return "\xc3\x85";
124*d57664e9SAndroid Build Coastguard Worker         case 'B':   return "\xce\xb2";
125*d57664e9SAndroid Build Coastguard Worker         case 'C':   return "\xc3\x87";
126*d57664e9SAndroid Build Coastguard Worker         case 'D':   return "\xc3\x90";
127*d57664e9SAndroid Build Coastguard Worker         case 'E':   return "\xc3\x89";
128*d57664e9SAndroid Build Coastguard Worker         case 'G':   return "\xc4\x9c";
129*d57664e9SAndroid Build Coastguard Worker         case 'H':   return "\xc4\xa4";
130*d57664e9SAndroid Build Coastguard Worker         case 'I':   return "\xc3\x8e";
131*d57664e9SAndroid Build Coastguard Worker         case 'J':   return "\xc4\xb4";
132*d57664e9SAndroid Build Coastguard Worker         case 'K':   return "\xc4\xb6";
133*d57664e9SAndroid Build Coastguard Worker         case 'L':   return "\xc4\xbb";
134*d57664e9SAndroid Build Coastguard Worker         case 'M':   return "\xe1\xb8\xbe";
135*d57664e9SAndroid Build Coastguard Worker         case 'N':   return "\xc3\x91";
136*d57664e9SAndroid Build Coastguard Worker         case 'O':   return "\xc3\x96";
137*d57664e9SAndroid Build Coastguard Worker         case 'P':   return "\xc3\x9e";
138*d57664e9SAndroid Build Coastguard Worker         case 'Q':   return "\x71";
139*d57664e9SAndroid Build Coastguard Worker         case 'R':   return "\xc5\x94";
140*d57664e9SAndroid Build Coastguard Worker         case 'S':   return "\xc5\xa0";
141*d57664e9SAndroid Build Coastguard Worker         case 'T':   return "\xc5\xa2";
142*d57664e9SAndroid Build Coastguard Worker         case 'U':   return "\xc3\x9b";
143*d57664e9SAndroid Build Coastguard Worker         case 'V':   return "\xce\xbd";
144*d57664e9SAndroid Build Coastguard Worker         case 'W':   return "\xc5\xb4";
145*d57664e9SAndroid Build Coastguard Worker         case 'X':   return "\xc3\x97";
146*d57664e9SAndroid Build Coastguard Worker         case 'Y':   return "\xc3\x9d";
147*d57664e9SAndroid Build Coastguard Worker         case 'Z':   return "\xc5\xbd";
148*d57664e9SAndroid Build Coastguard Worker         case '!':   return "\xc2\xa1";
149*d57664e9SAndroid Build Coastguard Worker         case '?':   return "\xc2\xbf";
150*d57664e9SAndroid Build Coastguard Worker         case '$':   return "\xe2\x82\xac";
151*d57664e9SAndroid Build Coastguard Worker         default:    return NULL;
152*d57664e9SAndroid Build Coastguard Worker     }
153*d57664e9SAndroid Build Coastguard Worker }
154*d57664e9SAndroid Build Coastguard Worker 
is_possible_normal_placeholder_end(const char16_t c)155*d57664e9SAndroid Build Coastguard Worker static bool is_possible_normal_placeholder_end(const char16_t c) {
156*d57664e9SAndroid Build Coastguard Worker     switch (c) {
157*d57664e9SAndroid Build Coastguard Worker         case 's': return true;
158*d57664e9SAndroid Build Coastguard Worker         case 'S': return true;
159*d57664e9SAndroid Build Coastguard Worker         case 'c': return true;
160*d57664e9SAndroid Build Coastguard Worker         case 'C': return true;
161*d57664e9SAndroid Build Coastguard Worker         case 'd': return true;
162*d57664e9SAndroid Build Coastguard Worker         case 'o': return true;
163*d57664e9SAndroid Build Coastguard Worker         case 'x': return true;
164*d57664e9SAndroid Build Coastguard Worker         case 'X': return true;
165*d57664e9SAndroid Build Coastguard Worker         case 'f': return true;
166*d57664e9SAndroid Build Coastguard Worker         case 'e': return true;
167*d57664e9SAndroid Build Coastguard Worker         case 'E': return true;
168*d57664e9SAndroid Build Coastguard Worker         case 'g': return true;
169*d57664e9SAndroid Build Coastguard Worker         case 'G': return true;
170*d57664e9SAndroid Build Coastguard Worker         case 'a': return true;
171*d57664e9SAndroid Build Coastguard Worker         case 'A': return true;
172*d57664e9SAndroid Build Coastguard Worker         case 'b': return true;
173*d57664e9SAndroid Build Coastguard Worker         case 'B': return true;
174*d57664e9SAndroid Build Coastguard Worker         case 'h': return true;
175*d57664e9SAndroid Build Coastguard Worker         case 'H': return true;
176*d57664e9SAndroid Build Coastguard Worker         case '%': return true;
177*d57664e9SAndroid Build Coastguard Worker         case 'n': return true;
178*d57664e9SAndroid Build Coastguard Worker         default:  return false;
179*d57664e9SAndroid Build Coastguard Worker     }
180*d57664e9SAndroid Build Coastguard Worker }
181*d57664e9SAndroid Build Coastguard Worker 
pseudo_generate_expansion(const unsigned int length)182*d57664e9SAndroid Build Coastguard Worker static String16 pseudo_generate_expansion(const unsigned int length) {
183*d57664e9SAndroid Build Coastguard Worker     String16 result = k_expansion_string;
184*d57664e9SAndroid Build Coastguard Worker     const char16_t* s = result.c_str();
185*d57664e9SAndroid Build Coastguard Worker     if (result.size() < length) {
186*d57664e9SAndroid Build Coastguard Worker         result += String16(" ");
187*d57664e9SAndroid Build Coastguard Worker         result += pseudo_generate_expansion(length - result.size());
188*d57664e9SAndroid Build Coastguard Worker     } else {
189*d57664e9SAndroid Build Coastguard Worker         int ext = 0;
190*d57664e9SAndroid Build Coastguard Worker         // Should contain only whole words, so looking for a space
191*d57664e9SAndroid Build Coastguard Worker         for (unsigned int i = length + 1; i < result.size(); ++i) {
192*d57664e9SAndroid Build Coastguard Worker           ++ext;
193*d57664e9SAndroid Build Coastguard Worker           if (s[i] == ' ') {
194*d57664e9SAndroid Build Coastguard Worker             break;
195*d57664e9SAndroid Build Coastguard Worker           }
196*d57664e9SAndroid Build Coastguard Worker         }
197*d57664e9SAndroid Build Coastguard Worker         // Just keep the first length + ext characters
198*d57664e9SAndroid Build Coastguard Worker         result = String16(result, length + ext);
199*d57664e9SAndroid Build Coastguard Worker     }
200*d57664e9SAndroid Build Coastguard Worker     return result;
201*d57664e9SAndroid Build Coastguard Worker }
202*d57664e9SAndroid Build Coastguard Worker 
is_space(const char16_t c)203*d57664e9SAndroid Build Coastguard Worker static bool is_space(const char16_t c) {
204*d57664e9SAndroid Build Coastguard Worker   return (c == ' ' || c == '\t' || c == '\n');
205*d57664e9SAndroid Build Coastguard Worker }
206*d57664e9SAndroid Build Coastguard Worker 
start()207*d57664e9SAndroid Build Coastguard Worker String16 PseudoMethodAccent::start() {
208*d57664e9SAndroid Build Coastguard Worker   String16 result;
209*d57664e9SAndroid Build Coastguard Worker   if (mDepth == 0) {
210*d57664e9SAndroid Build Coastguard Worker     result = String16(String8("["));
211*d57664e9SAndroid Build Coastguard Worker   }
212*d57664e9SAndroid Build Coastguard Worker   mWordCount = mLength = 0;
213*d57664e9SAndroid Build Coastguard Worker   mDepth++;
214*d57664e9SAndroid Build Coastguard Worker   return result;
215*d57664e9SAndroid Build Coastguard Worker }
216*d57664e9SAndroid Build Coastguard Worker 
end()217*d57664e9SAndroid Build Coastguard Worker String16 PseudoMethodAccent::end() {
218*d57664e9SAndroid Build Coastguard Worker   String16 result;
219*d57664e9SAndroid Build Coastguard Worker   if (mLength) {
220*d57664e9SAndroid Build Coastguard Worker     result.append(String16(String8(" ")));
221*d57664e9SAndroid Build Coastguard Worker     result.append(pseudo_generate_expansion(
222*d57664e9SAndroid Build Coastguard Worker         mWordCount > 3 ? mLength : mLength / 2));
223*d57664e9SAndroid Build Coastguard Worker   }
224*d57664e9SAndroid Build Coastguard Worker   mWordCount = mLength = 0;
225*d57664e9SAndroid Build Coastguard Worker   mDepth--;
226*d57664e9SAndroid Build Coastguard Worker   if (mDepth == 0) {
227*d57664e9SAndroid Build Coastguard Worker     result.append(String16(String8("]")));
228*d57664e9SAndroid Build Coastguard Worker   }
229*d57664e9SAndroid Build Coastguard Worker   return result;
230*d57664e9SAndroid Build Coastguard Worker }
231*d57664e9SAndroid Build Coastguard Worker 
232*d57664e9SAndroid Build Coastguard Worker /**
233*d57664e9SAndroid Build Coastguard Worker  * Converts characters so they look like they've been localized.
234*d57664e9SAndroid Build Coastguard Worker  *
235*d57664e9SAndroid Build Coastguard Worker  * Note: This leaves escape sequences untouched so they can later be
236*d57664e9SAndroid Build Coastguard Worker  * processed by ResTable::collectString in the normal way.
237*d57664e9SAndroid Build Coastguard Worker  */
text(const String16 & source)238*d57664e9SAndroid Build Coastguard Worker String16 PseudoMethodAccent::text(const String16& source)
239*d57664e9SAndroid Build Coastguard Worker {
240*d57664e9SAndroid Build Coastguard Worker     const char16_t* s = source.c_str();
241*d57664e9SAndroid Build Coastguard Worker     String16 result;
242*d57664e9SAndroid Build Coastguard Worker     const size_t I = source.size();
243*d57664e9SAndroid Build Coastguard Worker     bool lastspace = true;
244*d57664e9SAndroid Build Coastguard Worker     for (size_t i=0; i<I; i++) {
245*d57664e9SAndroid Build Coastguard Worker         char16_t c = s[i];
246*d57664e9SAndroid Build Coastguard Worker         if (c == '\\') {
247*d57664e9SAndroid Build Coastguard Worker             // Escape syntax, no need to pseudolocalize
248*d57664e9SAndroid Build Coastguard Worker             if (i<I-1) {
249*d57664e9SAndroid Build Coastguard Worker                 result += String16("\\");
250*d57664e9SAndroid Build Coastguard Worker                 i++;
251*d57664e9SAndroid Build Coastguard Worker                 c = s[i];
252*d57664e9SAndroid Build Coastguard Worker                 switch (c) {
253*d57664e9SAndroid Build Coastguard Worker                     case 'u':
254*d57664e9SAndroid Build Coastguard Worker                         // this one takes up 5 chars
255*d57664e9SAndroid Build Coastguard Worker                         result += String16(s+i, 5);
256*d57664e9SAndroid Build Coastguard Worker                         i += 4;
257*d57664e9SAndroid Build Coastguard Worker                         break;
258*d57664e9SAndroid Build Coastguard Worker                     case 't':
259*d57664e9SAndroid Build Coastguard Worker                     case 'n':
260*d57664e9SAndroid Build Coastguard Worker                     case '#':
261*d57664e9SAndroid Build Coastguard Worker                     case '@':
262*d57664e9SAndroid Build Coastguard Worker                     case '?':
263*d57664e9SAndroid Build Coastguard Worker                     case '"':
264*d57664e9SAndroid Build Coastguard Worker                     case '\'':
265*d57664e9SAndroid Build Coastguard Worker                     case '\\':
266*d57664e9SAndroid Build Coastguard Worker                     default:
267*d57664e9SAndroid Build Coastguard Worker                         result.append(&c, 1);
268*d57664e9SAndroid Build Coastguard Worker                         break;
269*d57664e9SAndroid Build Coastguard Worker                 }
270*d57664e9SAndroid Build Coastguard Worker             } else {
271*d57664e9SAndroid Build Coastguard Worker                 result.append(&c, 1);
272*d57664e9SAndroid Build Coastguard Worker             }
273*d57664e9SAndroid Build Coastguard Worker         } else if (c == '%') {
274*d57664e9SAndroid Build Coastguard Worker             // Placeholder syntax, no need to pseudolocalize
275*d57664e9SAndroid Build Coastguard Worker             String16 chunk;
276*d57664e9SAndroid Build Coastguard Worker             bool end = false;
277*d57664e9SAndroid Build Coastguard Worker             chunk.append(&c, 1);
278*d57664e9SAndroid Build Coastguard Worker             while (!end && i < I) {
279*d57664e9SAndroid Build Coastguard Worker                 ++i;
280*d57664e9SAndroid Build Coastguard Worker                 c = s[i];
281*d57664e9SAndroid Build Coastguard Worker                 chunk.append(&c, 1);
282*d57664e9SAndroid Build Coastguard Worker                 if (is_possible_normal_placeholder_end(c)) {
283*d57664e9SAndroid Build Coastguard Worker                     end = true;
284*d57664e9SAndroid Build Coastguard Worker                 } else if (c == 't') {
285*d57664e9SAndroid Build Coastguard Worker                     ++i;
286*d57664e9SAndroid Build Coastguard Worker                     c = s[i];
287*d57664e9SAndroid Build Coastguard Worker                     chunk.append(&c, 1);
288*d57664e9SAndroid Build Coastguard Worker                     end = true;
289*d57664e9SAndroid Build Coastguard Worker                 }
290*d57664e9SAndroid Build Coastguard Worker             }
291*d57664e9SAndroid Build Coastguard Worker             // Treat chunk as a placeholder unless it ends with %.
292*d57664e9SAndroid Build Coastguard Worker             result += ((c == '%') ? chunk : placeholder(chunk));
293*d57664e9SAndroid Build Coastguard Worker         } else if (c == '<' || c == '&') {
294*d57664e9SAndroid Build Coastguard Worker             // html syntax, no need to pseudolocalize
295*d57664e9SAndroid Build Coastguard Worker             bool tag_closed = false;
296*d57664e9SAndroid Build Coastguard Worker             while (!tag_closed && i < I) {
297*d57664e9SAndroid Build Coastguard Worker                 if (c == '&') {
298*d57664e9SAndroid Build Coastguard Worker                     String16 escape_text;
299*d57664e9SAndroid Build Coastguard Worker                     escape_text.append(&c, 1);
300*d57664e9SAndroid Build Coastguard Worker                     bool end = false;
301*d57664e9SAndroid Build Coastguard Worker                     size_t htmlCodePos = i;
302*d57664e9SAndroid Build Coastguard Worker                     while (!end && htmlCodePos < I) {
303*d57664e9SAndroid Build Coastguard Worker                         ++htmlCodePos;
304*d57664e9SAndroid Build Coastguard Worker                         c = s[htmlCodePos];
305*d57664e9SAndroid Build Coastguard Worker                         escape_text.append(&c, 1);
306*d57664e9SAndroid Build Coastguard Worker                         // Valid html code
307*d57664e9SAndroid Build Coastguard Worker                         if (c == ';') {
308*d57664e9SAndroid Build Coastguard Worker                             end = true;
309*d57664e9SAndroid Build Coastguard Worker                             i = htmlCodePos;
310*d57664e9SAndroid Build Coastguard Worker                         }
311*d57664e9SAndroid Build Coastguard Worker                         // Wrong html code
312*d57664e9SAndroid Build Coastguard Worker                         else if (!((c == '#' ||
313*d57664e9SAndroid Build Coastguard Worker                                  (c >= 'a' && c <= 'z') ||
314*d57664e9SAndroid Build Coastguard Worker                                  (c >= 'A' && c <= 'Z') ||
315*d57664e9SAndroid Build Coastguard Worker                                  (c >= '0' && c <= '9')))) {
316*d57664e9SAndroid Build Coastguard Worker                             end = true;
317*d57664e9SAndroid Build Coastguard Worker                         }
318*d57664e9SAndroid Build Coastguard Worker                     }
319*d57664e9SAndroid Build Coastguard Worker                     result += escape_text;
320*d57664e9SAndroid Build Coastguard Worker                     if (escape_text != String16("&lt;")) {
321*d57664e9SAndroid Build Coastguard Worker                         tag_closed = true;
322*d57664e9SAndroid Build Coastguard Worker                     }
323*d57664e9SAndroid Build Coastguard Worker                     continue;
324*d57664e9SAndroid Build Coastguard Worker                 }
325*d57664e9SAndroid Build Coastguard Worker                 if (c == '>') {
326*d57664e9SAndroid Build Coastguard Worker                     tag_closed = true;
327*d57664e9SAndroid Build Coastguard Worker                     result.append(&c, 1);
328*d57664e9SAndroid Build Coastguard Worker                     continue;
329*d57664e9SAndroid Build Coastguard Worker                 }
330*d57664e9SAndroid Build Coastguard Worker                 result.append(&c, 1);
331*d57664e9SAndroid Build Coastguard Worker                 i++;
332*d57664e9SAndroid Build Coastguard Worker                 c = s[i];
333*d57664e9SAndroid Build Coastguard Worker             }
334*d57664e9SAndroid Build Coastguard Worker         } else {
335*d57664e9SAndroid Build Coastguard Worker             // This is a pure text that should be pseudolocalized
336*d57664e9SAndroid Build Coastguard Worker             const char* p = pseudolocalize_char(c);
337*d57664e9SAndroid Build Coastguard Worker             if (p != NULL) {
338*d57664e9SAndroid Build Coastguard Worker                 result += String16(p);
339*d57664e9SAndroid Build Coastguard Worker             } else {
340*d57664e9SAndroid Build Coastguard Worker                 bool space = is_space(c);
341*d57664e9SAndroid Build Coastguard Worker                 if (lastspace && !space) {
342*d57664e9SAndroid Build Coastguard Worker                   mWordCount++;
343*d57664e9SAndroid Build Coastguard Worker                 }
344*d57664e9SAndroid Build Coastguard Worker                 lastspace = space;
345*d57664e9SAndroid Build Coastguard Worker                 result.append(&c, 1);
346*d57664e9SAndroid Build Coastguard Worker             }
347*d57664e9SAndroid Build Coastguard Worker             // Count only pseudolocalizable chars and delimiters
348*d57664e9SAndroid Build Coastguard Worker             mLength++;
349*d57664e9SAndroid Build Coastguard Worker         }
350*d57664e9SAndroid Build Coastguard Worker     }
351*d57664e9SAndroid Build Coastguard Worker     return result;
352*d57664e9SAndroid Build Coastguard Worker }
placeholder(const String16 & source)353*d57664e9SAndroid Build Coastguard Worker String16 PseudoMethodAccent::placeholder(const String16& source) {
354*d57664e9SAndroid Build Coastguard Worker   // Surround a placeholder with brackets
355*d57664e9SAndroid Build Coastguard Worker   return k_placeholder_open + source + k_placeholder_close;
356*d57664e9SAndroid Build Coastguard Worker }
357*d57664e9SAndroid Build Coastguard Worker 
text(const String16 & source)358*d57664e9SAndroid Build Coastguard Worker String16 PseudoMethodBidi::text(const String16& source)
359*d57664e9SAndroid Build Coastguard Worker {
360*d57664e9SAndroid Build Coastguard Worker     const char16_t* s = source.c_str();
361*d57664e9SAndroid Build Coastguard Worker     String16 result;
362*d57664e9SAndroid Build Coastguard Worker     bool lastspace = true;
363*d57664e9SAndroid Build Coastguard Worker     bool space = true;
364*d57664e9SAndroid Build Coastguard Worker     bool escape = false;
365*d57664e9SAndroid Build Coastguard Worker     const char16_t ESCAPE_CHAR = '\\';
366*d57664e9SAndroid Build Coastguard Worker     for (size_t i=0; i<source.size(); i++) {
367*d57664e9SAndroid Build Coastguard Worker         char16_t c = s[i];
368*d57664e9SAndroid Build Coastguard Worker         if (!escape && c == ESCAPE_CHAR) {
369*d57664e9SAndroid Build Coastguard Worker           escape = true;
370*d57664e9SAndroid Build Coastguard Worker           continue;
371*d57664e9SAndroid Build Coastguard Worker         }
372*d57664e9SAndroid Build Coastguard Worker         space = (!escape && is_space(c)) || (escape && (c == 'n' || c == 't'));
373*d57664e9SAndroid Build Coastguard Worker         if (lastspace && !space) {
374*d57664e9SAndroid Build Coastguard Worker           // Word start
375*d57664e9SAndroid Build Coastguard Worker           result += k_rlm + k_rlo;
376*d57664e9SAndroid Build Coastguard Worker         } else if (!lastspace && space) {
377*d57664e9SAndroid Build Coastguard Worker           // Word end
378*d57664e9SAndroid Build Coastguard Worker           result += k_pdf + k_rlm;
379*d57664e9SAndroid Build Coastguard Worker         }
380*d57664e9SAndroid Build Coastguard Worker         lastspace = space;
381*d57664e9SAndroid Build Coastguard Worker         if (escape) {
382*d57664e9SAndroid Build Coastguard Worker           result.append(&ESCAPE_CHAR, 1);
383*d57664e9SAndroid Build Coastguard Worker           escape=false;
384*d57664e9SAndroid Build Coastguard Worker         }
385*d57664e9SAndroid Build Coastguard Worker         result.append(&c, 1);
386*d57664e9SAndroid Build Coastguard Worker     }
387*d57664e9SAndroid Build Coastguard Worker     if (!lastspace) {
388*d57664e9SAndroid Build Coastguard Worker       // End of last word
389*d57664e9SAndroid Build Coastguard Worker       result += k_pdf + k_rlm;
390*d57664e9SAndroid Build Coastguard Worker     }
391*d57664e9SAndroid Build Coastguard Worker     return result;
392*d57664e9SAndroid Build Coastguard Worker }
393*d57664e9SAndroid Build Coastguard Worker 
placeholder(const String16 & source)394*d57664e9SAndroid Build Coastguard Worker String16 PseudoMethodBidi::placeholder(const String16& source) {
395*d57664e9SAndroid Build Coastguard Worker   // Surround a placeholder with directionality change sequence
396*d57664e9SAndroid Build Coastguard Worker   return k_rlm + k_rlo + source + k_pdf + k_rlm;
397*d57664e9SAndroid Build Coastguard Worker }
398*d57664e9SAndroid Build Coastguard Worker 
399