1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/rtl.h"
6
7 #include <stddef.h>
8
9 #include <algorithm>
10
11 #include "base/files/file_path.h"
12 #include "base/logging.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/sys_string_conversions.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/test/icu_test_util.h"
17 #include "build/build_config.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19 #include "testing/platform_test.h"
20 #include "third_party/icu/source/common/unicode/locid.h"
21 #include "third_party/icu/source/i18n/unicode/usearch.h"
22
23 namespace base {
24 namespace i18n {
25
26 class RTLTest : public PlatformTest {
27 };
28
TEST_F(RTLTest,GetFirstStrongCharacterDirection)29 TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
30 struct {
31 const wchar_t* text;
32 TextDirection direction;
33 } cases[] = {
34 // Test pure LTR string.
35 {L"foo bar", LEFT_TO_RIGHT},
36 // Test pure RTL string.
37 {L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
38 // Test bidi string in which the first character with strong
39 // directionality
40 // is a character with type L.
41 {L"foo \x05d0 bar", LEFT_TO_RIGHT},
42 // Test bidi string in which the first character with strong
43 // directionality
44 // is a character with type R.
45 {L"\x05d0 foo bar", RIGHT_TO_LEFT},
46 // Test bidi string which starts with a character with weak directionality
47 // and in which the first character with strong directionality is a
48 // character with type L.
49 {L"!foo \x05d0 bar", LEFT_TO_RIGHT},
50 // Test bidi string which starts with a character with weak directionality
51 // and in which the first character with strong directionality is a
52 // character with type R.
53 {L",\x05d0 foo bar", RIGHT_TO_LEFT},
54 // Test bidi string in which the first character with strong
55 // directionality
56 // is a character with type LRE.
57 {L"\x202a \x05d0 foo bar", LEFT_TO_RIGHT},
58 // Test bidi string in which the first character with strong
59 // directionality
60 // is a character with type LRO.
61 {L"\x202d \x05d0 foo bar", LEFT_TO_RIGHT},
62 // Test bidi string in which the first character with strong
63 // directionality
64 // is a character with type RLE.
65 {L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT},
66 // Test bidi string in which the first character with strong
67 // directionality
68 // is a character with type RLO.
69 {L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT},
70 // Test bidi string in which the first character with strong
71 // directionality
72 // is a character with type AL.
73 {L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT},
74 // Test a string without strong directionality characters.
75 {L",!.{}", LEFT_TO_RIGHT},
76 // Test empty string.
77 {L"", LEFT_TO_RIGHT},
78 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
79 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
80 // information).
81 {
82 #if defined(WCHAR_T_IS_32_BIT)
83 L" ! \x10910"
84 L"abc 123",
85 #elif defined(WCHAR_T_IS_16_BIT)
86 L" ! \xd802\xdd10"
87 L"abc 123",
88 #else
89 #error wchar_t should be either UTF-16 or UTF-32
90 #endif
91 RIGHT_TO_LEFT},
92 {
93 #if defined(WCHAR_T_IS_32_BIT)
94 L" ! \x10401"
95 L"abc 123",
96 #elif defined(WCHAR_T_IS_16_BIT)
97 L" ! \xd801\xdc01"
98 L"abc 123",
99 #else
100 #error wchar_t should be either UTF-16 or UTF-32
101 #endif
102 LEFT_TO_RIGHT},
103 };
104
105 for (auto& i : cases)
106 EXPECT_EQ(i.direction,
107 GetFirstStrongCharacterDirection(WideToUTF16(i.text)));
108 }
109
110
111 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
112 // GetLastStrongCharacterDirection because they should be followed by PDF
113 // character.
TEST_F(RTLTest,GetLastStrongCharacterDirection)114 TEST_F(RTLTest, GetLastStrongCharacterDirection) {
115 struct {
116 const wchar_t* text;
117 TextDirection direction;
118 } cases[] = {
119 // Test pure LTR string.
120 {L"foo bar", LEFT_TO_RIGHT},
121 // Test pure RTL string.
122 {L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
123 // Test bidi string in which the last character with strong directionality
124 // is a character with type L.
125 {L"foo \x05d0 bar", LEFT_TO_RIGHT},
126 // Test bidi string in which the last character with strong directionality
127 // is a character with type R.
128 {L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT},
129 // Test bidi string which ends with a character with weak directionality
130 // and in which the last character with strong directionality is a
131 // character with type L.
132 {L"!foo \x05d0 bar!", LEFT_TO_RIGHT},
133 // Test bidi string which ends with a character with weak directionality
134 // and in which the last character with strong directionality is a
135 // character with type R.
136 {L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT},
137 // Test bidi string in which the last character with strong directionality
138 // is a character with type AL.
139 {L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT},
140 // Test a string without strong directionality characters.
141 {L",!.{}", LEFT_TO_RIGHT},
142 // Test empty string.
143 {L"", LEFT_TO_RIGHT},
144 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
145 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
146 // information).
147 {
148 #if defined(WCHAR_T_IS_32_BIT)
149 L"abc 123"
150 L" ! \x10910 !",
151 #elif defined(WCHAR_T_IS_16_BIT)
152 L"abc 123"
153 L" ! \xd802\xdd10 !",
154 #else
155 #error wchar_t should be either UTF-16 or UTF-32
156 #endif
157 RIGHT_TO_LEFT},
158 {
159 #if defined(WCHAR_T_IS_32_BIT)
160 L"abc 123"
161 L" ! \x10401 !",
162 #elif defined(WCHAR_T_IS_16_BIT)
163 L"abc 123"
164 L" ! \xd801\xdc01 !",
165 #else
166 #error wchar_t should be either UTF-16 or UTF-32
167 #endif
168 LEFT_TO_RIGHT},
169 };
170
171 for (auto& i : cases)
172 EXPECT_EQ(i.direction,
173 GetLastStrongCharacterDirection(WideToUTF16(i.text)));
174 }
175
TEST_F(RTLTest,GetStringDirection)176 TEST_F(RTLTest, GetStringDirection) {
177 struct {
178 const wchar_t* text;
179 TextDirection direction;
180 } cases[] = {
181 // Test pure LTR string.
182 {L"foobar", LEFT_TO_RIGHT},
183 {L".foobar", LEFT_TO_RIGHT},
184 {L"foo, bar", LEFT_TO_RIGHT},
185 // Test pure LTR with strong directionality characters of type LRE.
186 {L"\x202a\x202a", LEFT_TO_RIGHT},
187 {L".\x202a\x202a", LEFT_TO_RIGHT},
188 {L"\x202a, \x202a", LEFT_TO_RIGHT},
189 // Test pure LTR with strong directionality characters of type LRO.
190 {L"\x202d\x202d", LEFT_TO_RIGHT},
191 {L".\x202d\x202d", LEFT_TO_RIGHT},
192 {L"\x202d, \x202d", LEFT_TO_RIGHT},
193 // Test pure LTR with various types of strong directionality characters.
194 {L"foo \x202a\x202d", LEFT_TO_RIGHT},
195 {L".\x202d foo \x202a", LEFT_TO_RIGHT},
196 {L"\x202a, \x202d foo", LEFT_TO_RIGHT},
197 // Test pure RTL with strong directionality characters of type R.
198 {L"\x05d0\x05d0", RIGHT_TO_LEFT},
199 {L".\x05d0\x05d0", RIGHT_TO_LEFT},
200 {L"\x05d0, \x05d0", RIGHT_TO_LEFT},
201 // Test pure RTL with strong directionality characters of type RLE.
202 {L"\x202b\x202b", RIGHT_TO_LEFT},
203 {L".\x202b\x202b", RIGHT_TO_LEFT},
204 {L"\x202b, \x202b", RIGHT_TO_LEFT},
205 // Test pure RTL with strong directionality characters of type RLO.
206 {L"\x202e\x202e", RIGHT_TO_LEFT},
207 {L".\x202e\x202e", RIGHT_TO_LEFT},
208 {L"\x202e, \x202e", RIGHT_TO_LEFT},
209 // Test pure RTL with strong directionality characters of type AL.
210 {L"\x0622\x0622", RIGHT_TO_LEFT},
211 {L".\x0622\x0622", RIGHT_TO_LEFT},
212 {L"\x0622, \x0622", RIGHT_TO_LEFT},
213 // Test pure RTL with various types of strong directionality characters.
214 {L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT},
215 {L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT},
216 {L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT},
217 // Test bidi strings.
218 {L"foo \x05d0 bar", UNKNOWN_DIRECTION},
219 {L"\x202b foo bar", UNKNOWN_DIRECTION},
220 {L"!foo \x0622 bar", UNKNOWN_DIRECTION},
221 {L"\x202a\x202b", UNKNOWN_DIRECTION},
222 {L"\x202e\x202d", UNKNOWN_DIRECTION},
223 {L"\x0622\x202a", UNKNOWN_DIRECTION},
224 {L"\x202d\x05d0", UNKNOWN_DIRECTION},
225 // Test a string without strong directionality characters.
226 {L",!.{}", LEFT_TO_RIGHT},
227 // Test empty string.
228 {L"", LEFT_TO_RIGHT},
229 {
230 #if defined(WCHAR_T_IS_32_BIT)
231 L" ! \x10910"
232 L"abc 123",
233 #elif defined(WCHAR_T_IS_16_BIT)
234 L" ! \xd802\xdd10"
235 L"abc 123",
236 #else
237 #error wchar_t should be either UTF-16 or UTF-32
238 #endif
239 UNKNOWN_DIRECTION},
240 {
241 #if defined(WCHAR_T_IS_32_BIT)
242 L" ! \x10401"
243 L"abc 123",
244 #elif defined(WCHAR_T_IS_16_BIT)
245 L" ! \xd801\xdc01"
246 L"abc 123",
247 #else
248 #error wchar_t should be either UTF-16 or UTF-32
249 #endif
250 LEFT_TO_RIGHT},
251 };
252
253 for (auto& i : cases)
254 EXPECT_EQ(i.direction, GetStringDirection(WideToUTF16(i.text)));
255 }
256
TEST_F(RTLTest,WrapPathWithLTRFormatting)257 TEST_F(RTLTest, WrapPathWithLTRFormatting) {
258 const wchar_t* cases[] = {
259 // Test common path, such as "c:\foo\bar".
260 L"c:/foo/bar",
261 // Test path with file name, such as "c:\foo\bar\test.jpg".
262 L"c:/foo/bar/test.jpg",
263 // Test path ending with punctuation, such as "c:\(foo)\bar.".
264 L"c:/(foo)/bar.",
265 // Test path ending with separator, such as "c:\foo\bar\".
266 L"c:/foo/bar/",
267 // Test path with RTL character.
268 L"c:/\x05d0",
269 // Test path with 2 level RTL directory names.
270 L"c:/\x05d0/\x0622",
271 // Test path with mixed RTL/LTR directory names and ending with punctuation.
272 L"c:/\x05d0/\x0622/(foo)/b.a.r.",
273 // Test path without driver name, such as "/foo/bar/test/jpg".
274 L"/foo/bar/test.jpg",
275 // Test path start with current directory, such as "./foo".
276 L"./foo",
277 // Test path start with parent directory, such as "../foo/bar.jpg".
278 L"../foo/bar.jpg",
279 // Test absolute path, such as "//foo/bar.jpg".
280 L"//foo/bar.jpg",
281 // Test path with mixed RTL/LTR directory names.
282 L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
283 // Test empty path.
284 L""
285 };
286
287 for (auto*& i : cases) {
288 FilePath path;
289 #if BUILDFLAG(IS_WIN)
290 std::wstring win_path(i);
291 std::replace(win_path.begin(), win_path.end(), '/', '\\');
292 path = FilePath(win_path);
293 std::wstring wrapped_expected =
294 std::wstring(L"\x202a") + win_path + L"\x202c";
295 #else
296 path = FilePath(base::SysWideToNativeMB(i));
297 std::wstring wrapped_expected = std::wstring(L"\x202a") + i + L"\x202c";
298 #endif
299 std::u16string localized_file_path_string;
300 WrapPathWithLTRFormatting(path, &localized_file_path_string);
301
302 std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
303 EXPECT_EQ(wrapped_expected, wrapped_actual);
304 }
305 }
306
TEST_F(RTLTest,WrapString)307 TEST_F(RTLTest, WrapString) {
308 const wchar_t* cases[] = {
309 L" . ",
310 L"abc",
311 L"a" L"\x5d0\x5d1",
312 L"a" L"\x5d1" L"b",
313 L"\x5d0\x5d1\x5d2",
314 L"\x5d0\x5d1" L"a",
315 L"\x5d0" L"a" L"\x5d1",
316 };
317
318 const bool was_rtl = IsRTL();
319
320 test::ScopedRestoreICUDefaultLocale restore_locale;
321 for (size_t i = 0; i < 2; ++i) {
322 // Toggle the application default text direction (to try each direction).
323 SetRTLForTesting(!IsRTL());
324
325 std::u16string empty;
326 WrapStringWithLTRFormatting(&empty);
327 EXPECT_TRUE(empty.empty());
328 WrapStringWithRTLFormatting(&empty);
329 EXPECT_TRUE(empty.empty());
330
331 for (auto*& test_case : cases) {
332 std::u16string input = WideToUTF16(test_case);
333 std::u16string ltr_wrap = input;
334 WrapStringWithLTRFormatting(<r_wrap);
335 EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
336 EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
337 EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
338
339 std::u16string rtl_wrap = input;
340 WrapStringWithRTLFormatting(&rtl_wrap);
341 EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
342 EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
343 EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
344 }
345 }
346
347 EXPECT_EQ(was_rtl, IsRTL());
348 }
349
TEST_F(RTLTest,GetDisplayStringInLTRDirectionality)350 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
351 struct {
352 const wchar_t* path;
353 bool wrap_ltr;
354 bool wrap_rtl;
355 } cases[] = {
356 { L"test", false, true },
357 { L"test.html", false, true },
358 { L"\x05d0\x05d1\x05d2", true, true },
359 { L"\x05d0\x05d1\x05d2.txt", true, true },
360 { L"\x05d0" L"abc", true, true },
361 { L"\x05d0" L"abc.txt", true, true },
362 { L"abc\x05d0\x05d1", false, true },
363 { L"abc\x05d0\x05d1.jpg", false, true },
364 };
365
366 const bool was_rtl = IsRTL();
367
368 test::ScopedRestoreICUDefaultLocale restore_locale;
369 for (size_t i = 0; i < 2; ++i) {
370 // Toggle the application default text direction (to try each direction).
371 SetRTLForTesting(!IsRTL());
372 for (auto& test_case : cases) {
373 std::u16string input = WideToUTF16(test_case.path);
374 std::u16string output = GetDisplayStringInLTRDirectionality(input);
375 // Test the expected wrapping behavior for the current UI directionality.
376 if (IsRTL() ? test_case.wrap_rtl : test_case.wrap_ltr)
377 EXPECT_NE(output, input);
378 else
379 EXPECT_EQ(output, input);
380 }
381 }
382
383 EXPECT_EQ(was_rtl, IsRTL());
384 }
385
TEST_F(RTLTest,GetTextDirection)386 TEST_F(RTLTest, GetTextDirection) {
387 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
388 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
389 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
390 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
391 // iw is an obsolete code for Hebrew.
392 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
393 // Although we're not yet localized to Farsi and Urdu, we
394 // do have the text layout direction information for them.
395 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
396 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
397 #if 0
398 // Enable these when we include the minimal locale data for Azerbaijani
399 // written in Arabic and Dhivehi. At the moment, our copy of
400 // ICU data does not have entries for them.
401 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
402 // Dhivehi that uses Thaana script.
403 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
404 #endif
405 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
406 // Chinese in China with '-'.
407 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
408 // Filipino : 3-letter code
409 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
410 // Russian
411 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
412 // Japanese that uses multiple scripts
413 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
414 }
415
TEST_F(RTLTest,GetTextDirectionForLocaleInStartUp)416 TEST_F(RTLTest, GetTextDirectionForLocaleInStartUp) {
417 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ar"));
418 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ar_EG"));
419 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("he"));
420 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("he_IL"));
421 // iw is an obsolete code for Hebrew.
422 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("iw"));
423 // Although we're not yet localized to Farsi and Urdu, we
424 // do have the text layout direction information for them.
425 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("fa"));
426 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ur"));
427 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("en"));
428 // Chinese in China with '-'.
429 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("zh-CN"));
430 // Filipino : 3-letter code
431 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("fil"));
432 // Russian
433 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("ru"));
434 // Japanese that uses multiple scripts
435 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("ja"));
436 }
437
TEST_F(RTLTest,UnadjustStringForLocaleDirection)438 TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
439 // These test strings are borrowed from WrapPathWithLTRFormatting
440 const wchar_t* cases[] = {
441 L"foo bar",
442 L"foo \x05d0 bar",
443 L"\x05d0 foo bar",
444 L"!foo \x05d0 bar",
445 L",\x05d0 foo bar",
446 L"\x202a \x05d0 foo bar",
447 L"\x202d \x05d0 foo bar",
448 L"\x202b foo \x05d0 bar",
449 L"\x202e foo \x05d0 bar",
450 L"\x0622 foo \x05d0 bar",
451 };
452
453 const bool was_rtl = IsRTL();
454
455 test::ScopedRestoreICUDefaultLocale restore_locale;
456 for (size_t i = 0; i < 2; ++i) {
457 // Toggle the application default text direction (to try each direction).
458 SetRTLForTesting(!IsRTL());
459
460 for (auto*& test_case : cases) {
461 std::u16string unadjusted_string = WideToUTF16(test_case);
462 std::u16string adjusted_string = unadjusted_string;
463
464 if (!AdjustStringForLocaleDirection(&adjusted_string))
465 continue;
466
467 EXPECT_NE(unadjusted_string, adjusted_string);
468 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
469 EXPECT_EQ(unadjusted_string, adjusted_string)
470 << " for test case [" << unadjusted_string
471 << "] with IsRTL() == " << IsRTL();
472 }
473 }
474
475 EXPECT_EQ(was_rtl, IsRTL());
476 }
477
TEST_F(RTLTest,EnsureTerminatedDirectionalFormatting)478 TEST_F(RTLTest, EnsureTerminatedDirectionalFormatting) {
479 struct {
480 const wchar_t* unformated_text;
481 const wchar_t* formatted_text;
482 } cases[] = {
483 // Tests string without any dir-formatting characters.
484 {L"google.com", L"google.com"},
485 // Tests string with properly terminated dir-formatting character.
486 {L"\x202egoogle.com\x202c", L"\x202egoogle.com\x202c"},
487 // Tests string with over-terminated dir-formatting characters.
488 {L"\x202egoogle\x202c.com\x202c", L"\x202egoogle\x202c.com\x202c"},
489 // Tests string beginning with a dir-formatting character.
490 {L"\x202emoc.elgoog", L"\x202emoc.elgoog\x202c"},
491 // Tests string that over-terminates then re-opens.
492 {L"\x202egoogle\x202c\x202c.\x202eom",
493 L"\x202egoogle\x202c\x202c.\x202eom\x202c"},
494 // Tests string containing a dir-formatting character in the middle.
495 {L"google\x202e.com", L"google\x202e.com\x202c"},
496 // Tests string with multiple dir-formatting characters.
497 {L"\x202egoogle\x202e.com/\x202eguest",
498 L"\x202egoogle\x202e.com/\x202eguest\x202c\x202c\x202c"},
499 // Test the other dir-formatting characters (U+202A, U+202B, and U+202D).
500 {L"\x202agoogle.com", L"\x202agoogle.com\x202c"},
501 {L"\x202bgoogle.com", L"\x202bgoogle.com\x202c"},
502 {L"\x202dgoogle.com", L"\x202dgoogle.com\x202c"},
503 };
504
505 const bool was_rtl = IsRTL();
506
507 test::ScopedRestoreICUDefaultLocale restore_locale;
508 for (size_t i = 0; i < 2; ++i) {
509 // Toggle the application default text direction (to try each direction).
510 SetRTLForTesting(!IsRTL());
511 for (auto& test_case : cases) {
512 std::u16string unsanitized_text = WideToUTF16(test_case.unformated_text);
513 std::u16string sanitized_text = WideToUTF16(test_case.formatted_text);
514 EnsureTerminatedDirectionalFormatting(&unsanitized_text);
515 EXPECT_EQ(sanitized_text, unsanitized_text);
516 }
517 }
518 EXPECT_EQ(was_rtl, IsRTL());
519 }
520
TEST_F(RTLTest,SanitizeUserSuppliedString)521 TEST_F(RTLTest, SanitizeUserSuppliedString) {
522 struct {
523 const wchar_t* unformatted_text;
524 const wchar_t* formatted_text;
525 } cases[] = {
526 // Tests RTL string with properly terminated dir-formatting character.
527 {L"\x202eكبير Google التطبيق\x202c", L"\x202eكبير Google التطبيق\x202c"},
528 // Tests RTL string with over-terminated dir-formatting characters.
529 {L"\x202eكبير Google\x202cالتطبيق\x202c",
530 L"\x202eكبير Google\x202cالتطبيق\x202c"},
531 // Tests RTL string that over-terminates then re-opens.
532 {L"\x202eكبير Google\x202c\x202cالتطبيق\x202e",
533 L"\x202eكبير Google\x202c\x202cالتطبيق\x202e\x202c"},
534 // Tests RTL string with multiple dir-formatting characters.
535 {L"\x202eك\x202eبير Google الت\x202eطبيق",
536 L"\x202eك\x202eبير Google الت\x202eطبيق\x202c\x202c\x202c"},
537 // Test the other dir-formatting characters (U+202A, U+202B, and U+202D).
538 {L"\x202aكبير Google التطبيق", L"\x202aكبير Google التطبيق\x202c"},
539 {L"\x202bكبير Google التطبيق", L"\x202bكبير Google التطبيق\x202c"},
540 {L"\x202dكبير Google التطبيق", L"\x202dكبير Google التطبيق\x202c"},
541
542 };
543
544 for (auto& i : cases) {
545 // On Windows for an LTR locale, no changes to the string are made.
546 std::u16string prefix, suffix = u"";
547 #if !BUILDFLAG(IS_WIN)
548 prefix = u"\x200e\x202b";
549 suffix = u"\x202c\x200e";
550 #endif // !BUILDFLAG(IS_WIN)
551 std::u16string unsanitized_text = WideToUTF16(i.unformatted_text);
552 std::u16string sanitized_text =
553 prefix + WideToUTF16(i.formatted_text) + suffix;
554 SanitizeUserSuppliedString(&unsanitized_text);
555 EXPECT_EQ(sanitized_text, unsanitized_text);
556 }
557 }
558
559 class SetICULocaleTest : public PlatformTest {};
560
TEST_F(SetICULocaleTest,OverlongLocaleId)561 TEST_F(SetICULocaleTest, OverlongLocaleId) {
562 test::ScopedRestoreICUDefaultLocale restore_locale;
563 std::string id("fr-ca-x-foo");
564 std::string lid("fr_CA@x=foo");
565 while (id.length() < 152) {
566 id.append("-x-foo");
567 lid.append("-x-foo");
568 }
569 SetICUDefaultLocale(id);
570 EXPECT_STRNE("en_US", icu::Locale::getDefault().getName());
571 id.append("zzz");
572 lid.append("zzz");
573 SetICUDefaultLocale(id);
574 // ICU-21639 fix the long locale issue now.
575 EXPECT_STREQ(lid.c_str(), icu::Locale::getDefault().getName());
576 }
577
578 } // namespace i18n
579 } // namespace base
580