xref: /aosp_15_r20/external/cronet/base/i18n/rtl_unittest.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/rtl.h"
6 
7 #include <stddef.h>
8 
9 #include <algorithm>
10 
11 #include "base/files/file_path.h"
12 #include "base/logging.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/sys_string_conversions.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/test/icu_test_util.h"
17 #include "build/build_config.h"
18 #include "testing/gtest/include/gtest/gtest.h"
19 #include "testing/platform_test.h"
20 #include "third_party/icu/source/common/unicode/locid.h"
21 #include "third_party/icu/source/i18n/unicode/usearch.h"
22 
23 namespace base {
24 namespace i18n {
25 
26 class RTLTest : public PlatformTest {
27 };
28 
TEST_F(RTLTest,GetFirstStrongCharacterDirection)29 TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
30   struct {
31     const wchar_t* text;
32     TextDirection direction;
33   } cases[] = {
34       // Test pure LTR string.
35       {L"foo bar", LEFT_TO_RIGHT},
36       // Test pure RTL string.
37       {L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
38       // Test bidi string in which the first character with strong
39       // directionality
40       // is a character with type L.
41       {L"foo \x05d0 bar", LEFT_TO_RIGHT},
42       // Test bidi string in which the first character with strong
43       // directionality
44       // is a character with type R.
45       {L"\x05d0 foo bar", RIGHT_TO_LEFT},
46       // Test bidi string which starts with a character with weak directionality
47       // and in which the first character with strong directionality is a
48       // character with type L.
49       {L"!foo \x05d0 bar", LEFT_TO_RIGHT},
50       // Test bidi string which starts with a character with weak directionality
51       // and in which the first character with strong directionality is a
52       // character with type R.
53       {L",\x05d0 foo bar", RIGHT_TO_LEFT},
54       // Test bidi string in which the first character with strong
55       // directionality
56       // is a character with type LRE.
57       {L"\x202a \x05d0 foo  bar", LEFT_TO_RIGHT},
58       // Test bidi string in which the first character with strong
59       // directionality
60       // is a character with type LRO.
61       {L"\x202d \x05d0 foo  bar", LEFT_TO_RIGHT},
62       // Test bidi string in which the first character with strong
63       // directionality
64       // is a character with type RLE.
65       {L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT},
66       // Test bidi string in which the first character with strong
67       // directionality
68       // is a character with type RLO.
69       {L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT},
70       // Test bidi string in which the first character with strong
71       // directionality
72       // is a character with type AL.
73       {L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT},
74       // Test a string without strong directionality characters.
75       {L",!.{}", LEFT_TO_RIGHT},
76       // Test empty string.
77       {L"", LEFT_TO_RIGHT},
78       // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
79       // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
80       // information).
81       {
82 #if defined(WCHAR_T_IS_32_BIT)
83           L" ! \x10910"
84           L"abc 123",
85 #elif defined(WCHAR_T_IS_16_BIT)
86           L" ! \xd802\xdd10"
87           L"abc 123",
88 #else
89 #error wchar_t should be either UTF-16 or UTF-32
90 #endif
91           RIGHT_TO_LEFT},
92       {
93 #if defined(WCHAR_T_IS_32_BIT)
94           L" ! \x10401"
95           L"abc 123",
96 #elif defined(WCHAR_T_IS_16_BIT)
97           L" ! \xd801\xdc01"
98           L"abc 123",
99 #else
100 #error wchar_t should be either UTF-16 or UTF-32
101 #endif
102           LEFT_TO_RIGHT},
103   };
104 
105   for (auto& i : cases)
106     EXPECT_EQ(i.direction,
107               GetFirstStrongCharacterDirection(WideToUTF16(i.text)));
108 }
109 
110 
111 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
112 // GetLastStrongCharacterDirection because they should be followed by PDF
113 // character.
TEST_F(RTLTest,GetLastStrongCharacterDirection)114 TEST_F(RTLTest, GetLastStrongCharacterDirection) {
115   struct {
116     const wchar_t* text;
117     TextDirection direction;
118   } cases[] = {
119       // Test pure LTR string.
120       {L"foo bar", LEFT_TO_RIGHT},
121       // Test pure RTL string.
122       {L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
123       // Test bidi string in which the last character with strong directionality
124       // is a character with type L.
125       {L"foo \x05d0 bar", LEFT_TO_RIGHT},
126       // Test bidi string in which the last character with strong directionality
127       // is a character with type R.
128       {L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT},
129       // Test bidi string which ends with a character with weak directionality
130       // and in which the last character with strong directionality is a
131       // character with type L.
132       {L"!foo \x05d0 bar!", LEFT_TO_RIGHT},
133       // Test bidi string which ends with a character with weak directionality
134       // and in which the last character with strong directionality is a
135       // character with type R.
136       {L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT},
137       // Test bidi string in which the last character with strong directionality
138       // is a character with type AL.
139       {L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT},
140       // Test a string without strong directionality characters.
141       {L",!.{}", LEFT_TO_RIGHT},
142       // Test empty string.
143       {L"", LEFT_TO_RIGHT},
144       // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
145       // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
146       // information).
147       {
148 #if defined(WCHAR_T_IS_32_BIT)
149           L"abc 123"
150           L" ! \x10910 !",
151 #elif defined(WCHAR_T_IS_16_BIT)
152           L"abc 123"
153           L" ! \xd802\xdd10 !",
154 #else
155 #error wchar_t should be either UTF-16 or UTF-32
156 #endif
157           RIGHT_TO_LEFT},
158       {
159 #if defined(WCHAR_T_IS_32_BIT)
160           L"abc 123"
161           L" ! \x10401 !",
162 #elif defined(WCHAR_T_IS_16_BIT)
163           L"abc 123"
164           L" ! \xd801\xdc01 !",
165 #else
166 #error wchar_t should be either UTF-16 or UTF-32
167 #endif
168           LEFT_TO_RIGHT},
169   };
170 
171   for (auto& i : cases)
172     EXPECT_EQ(i.direction,
173               GetLastStrongCharacterDirection(WideToUTF16(i.text)));
174 }
175 
TEST_F(RTLTest,GetStringDirection)176 TEST_F(RTLTest, GetStringDirection) {
177   struct {
178     const wchar_t* text;
179     TextDirection direction;
180   } cases[] = {
181       // Test pure LTR string.
182       {L"foobar", LEFT_TO_RIGHT},
183       {L".foobar", LEFT_TO_RIGHT},
184       {L"foo, bar", LEFT_TO_RIGHT},
185       // Test pure LTR with strong directionality characters of type LRE.
186       {L"\x202a\x202a", LEFT_TO_RIGHT},
187       {L".\x202a\x202a", LEFT_TO_RIGHT},
188       {L"\x202a, \x202a", LEFT_TO_RIGHT},
189       // Test pure LTR with strong directionality characters of type LRO.
190       {L"\x202d\x202d", LEFT_TO_RIGHT},
191       {L".\x202d\x202d", LEFT_TO_RIGHT},
192       {L"\x202d, \x202d", LEFT_TO_RIGHT},
193       // Test pure LTR with various types of strong directionality characters.
194       {L"foo \x202a\x202d", LEFT_TO_RIGHT},
195       {L".\x202d foo \x202a", LEFT_TO_RIGHT},
196       {L"\x202a, \x202d foo", LEFT_TO_RIGHT},
197       // Test pure RTL with strong directionality characters of type R.
198       {L"\x05d0\x05d0", RIGHT_TO_LEFT},
199       {L".\x05d0\x05d0", RIGHT_TO_LEFT},
200       {L"\x05d0, \x05d0", RIGHT_TO_LEFT},
201       // Test pure RTL with strong directionality characters of type RLE.
202       {L"\x202b\x202b", RIGHT_TO_LEFT},
203       {L".\x202b\x202b", RIGHT_TO_LEFT},
204       {L"\x202b, \x202b", RIGHT_TO_LEFT},
205       // Test pure RTL with strong directionality characters of type RLO.
206       {L"\x202e\x202e", RIGHT_TO_LEFT},
207       {L".\x202e\x202e", RIGHT_TO_LEFT},
208       {L"\x202e, \x202e", RIGHT_TO_LEFT},
209       // Test pure RTL with strong directionality characters of type AL.
210       {L"\x0622\x0622", RIGHT_TO_LEFT},
211       {L".\x0622\x0622", RIGHT_TO_LEFT},
212       {L"\x0622, \x0622", RIGHT_TO_LEFT},
213       // Test pure RTL with various types of strong directionality characters.
214       {L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT},
215       {L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT},
216       {L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT},
217       // Test bidi strings.
218       {L"foo \x05d0 bar", UNKNOWN_DIRECTION},
219       {L"\x202b foo bar", UNKNOWN_DIRECTION},
220       {L"!foo \x0622 bar", UNKNOWN_DIRECTION},
221       {L"\x202a\x202b", UNKNOWN_DIRECTION},
222       {L"\x202e\x202d", UNKNOWN_DIRECTION},
223       {L"\x0622\x202a", UNKNOWN_DIRECTION},
224       {L"\x202d\x05d0", UNKNOWN_DIRECTION},
225       // Test a string without strong directionality characters.
226       {L",!.{}", LEFT_TO_RIGHT},
227       // Test empty string.
228       {L"", LEFT_TO_RIGHT},
229       {
230 #if defined(WCHAR_T_IS_32_BIT)
231           L" ! \x10910"
232           L"abc 123",
233 #elif defined(WCHAR_T_IS_16_BIT)
234           L" ! \xd802\xdd10"
235           L"abc 123",
236 #else
237 #error wchar_t should be either UTF-16 or UTF-32
238 #endif
239           UNKNOWN_DIRECTION},
240       {
241 #if defined(WCHAR_T_IS_32_BIT)
242           L" ! \x10401"
243           L"abc 123",
244 #elif defined(WCHAR_T_IS_16_BIT)
245           L" ! \xd801\xdc01"
246           L"abc 123",
247 #else
248 #error wchar_t should be either UTF-16 or UTF-32
249 #endif
250           LEFT_TO_RIGHT},
251   };
252 
253   for (auto& i : cases)
254     EXPECT_EQ(i.direction, GetStringDirection(WideToUTF16(i.text)));
255 }
256 
TEST_F(RTLTest,WrapPathWithLTRFormatting)257 TEST_F(RTLTest, WrapPathWithLTRFormatting) {
258   const wchar_t* cases[] = {
259     // Test common path, such as "c:\foo\bar".
260     L"c:/foo/bar",
261     // Test path with file name, such as "c:\foo\bar\test.jpg".
262     L"c:/foo/bar/test.jpg",
263     // Test path ending with punctuation, such as "c:\(foo)\bar.".
264     L"c:/(foo)/bar.",
265     // Test path ending with separator, such as "c:\foo\bar\".
266     L"c:/foo/bar/",
267     // Test path with RTL character.
268     L"c:/\x05d0",
269     // Test path with 2 level RTL directory names.
270     L"c:/\x05d0/\x0622",
271     // Test path with mixed RTL/LTR directory names and ending with punctuation.
272     L"c:/\x05d0/\x0622/(foo)/b.a.r.",
273     // Test path without driver name, such as "/foo/bar/test/jpg".
274     L"/foo/bar/test.jpg",
275     // Test path start with current directory, such as "./foo".
276     L"./foo",
277     // Test path start with parent directory, such as "../foo/bar.jpg".
278     L"../foo/bar.jpg",
279     // Test absolute path, such as "//foo/bar.jpg".
280     L"//foo/bar.jpg",
281     // Test path with mixed RTL/LTR directory names.
282     L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
283     // Test empty path.
284     L""
285   };
286 
287   for (auto*& i : cases) {
288     FilePath path;
289 #if BUILDFLAG(IS_WIN)
290     std::wstring win_path(i);
291     std::replace(win_path.begin(), win_path.end(), '/', '\\');
292     path = FilePath(win_path);
293     std::wstring wrapped_expected =
294         std::wstring(L"\x202a") + win_path + L"\x202c";
295 #else
296     path = FilePath(base::SysWideToNativeMB(i));
297     std::wstring wrapped_expected = std::wstring(L"\x202a") + i + L"\x202c";
298 #endif
299     std::u16string localized_file_path_string;
300     WrapPathWithLTRFormatting(path, &localized_file_path_string);
301 
302     std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
303     EXPECT_EQ(wrapped_expected, wrapped_actual);
304   }
305 }
306 
TEST_F(RTLTest,WrapString)307 TEST_F(RTLTest, WrapString) {
308   const wchar_t* cases[] = {
309     L" . ",
310     L"abc",
311     L"a" L"\x5d0\x5d1",
312     L"a" L"\x5d1" L"b",
313     L"\x5d0\x5d1\x5d2",
314     L"\x5d0\x5d1" L"a",
315     L"\x5d0" L"a" L"\x5d1",
316   };
317 
318   const bool was_rtl = IsRTL();
319 
320   test::ScopedRestoreICUDefaultLocale restore_locale;
321   for (size_t i = 0; i < 2; ++i) {
322     // Toggle the application default text direction (to try each direction).
323     SetRTLForTesting(!IsRTL());
324 
325     std::u16string empty;
326     WrapStringWithLTRFormatting(&empty);
327     EXPECT_TRUE(empty.empty());
328     WrapStringWithRTLFormatting(&empty);
329     EXPECT_TRUE(empty.empty());
330 
331     for (auto*& test_case : cases) {
332       std::u16string input = WideToUTF16(test_case);
333       std::u16string ltr_wrap = input;
334       WrapStringWithLTRFormatting(&ltr_wrap);
335       EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
336       EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
337       EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
338 
339       std::u16string rtl_wrap = input;
340       WrapStringWithRTLFormatting(&rtl_wrap);
341       EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
342       EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
343       EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
344     }
345   }
346 
347   EXPECT_EQ(was_rtl, IsRTL());
348 }
349 
TEST_F(RTLTest,GetDisplayStringInLTRDirectionality)350 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
351   struct {
352     const wchar_t* path;
353     bool wrap_ltr;
354     bool wrap_rtl;
355   } cases[] = {
356     { L"test",                   false, true },
357     { L"test.html",              false, true },
358     { L"\x05d0\x05d1\x05d2",     true,  true },
359     { L"\x05d0\x05d1\x05d2.txt", true,  true },
360     { L"\x05d0" L"abc",          true,  true },
361     { L"\x05d0" L"abc.txt",      true,  true },
362     { L"abc\x05d0\x05d1",        false, true },
363     { L"abc\x05d0\x05d1.jpg",    false, true },
364   };
365 
366   const bool was_rtl = IsRTL();
367 
368   test::ScopedRestoreICUDefaultLocale restore_locale;
369   for (size_t i = 0; i < 2; ++i) {
370     // Toggle the application default text direction (to try each direction).
371     SetRTLForTesting(!IsRTL());
372     for (auto& test_case : cases) {
373       std::u16string input = WideToUTF16(test_case.path);
374       std::u16string output = GetDisplayStringInLTRDirectionality(input);
375       // Test the expected wrapping behavior for the current UI directionality.
376       if (IsRTL() ? test_case.wrap_rtl : test_case.wrap_ltr)
377         EXPECT_NE(output, input);
378       else
379         EXPECT_EQ(output, input);
380     }
381   }
382 
383   EXPECT_EQ(was_rtl, IsRTL());
384 }
385 
TEST_F(RTLTest,GetTextDirection)386 TEST_F(RTLTest, GetTextDirection) {
387   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
388   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
389   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
390   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
391   // iw is an obsolete code for Hebrew.
392   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
393   // Although we're not yet localized to Farsi and Urdu, we
394   // do have the text layout direction information for them.
395   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
396   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
397 #if 0
398   // Enable these when we include the minimal locale data for Azerbaijani
399   // written in Arabic and Dhivehi. At the moment, our copy of
400   // ICU data does not have entries for them.
401   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
402   // Dhivehi that uses Thaana script.
403   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
404 #endif
405   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
406   // Chinese in China with '-'.
407   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
408   // Filipino : 3-letter code
409   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
410   // Russian
411   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
412   // Japanese that uses multiple scripts
413   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
414 }
415 
TEST_F(RTLTest,GetTextDirectionForLocaleInStartUp)416 TEST_F(RTLTest, GetTextDirectionForLocaleInStartUp) {
417   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ar"));
418   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ar_EG"));
419   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("he"));
420   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("he_IL"));
421   // iw is an obsolete code for Hebrew.
422   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("iw"));
423   // Although we're not yet localized to Farsi and Urdu, we
424   // do have the text layout direction information for them.
425   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("fa"));
426   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocaleInStartUp("ur"));
427   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("en"));
428   // Chinese in China with '-'.
429   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("zh-CN"));
430   // Filipino : 3-letter code
431   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("fil"));
432   // Russian
433   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("ru"));
434   // Japanese that uses multiple scripts
435   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocaleInStartUp("ja"));
436 }
437 
TEST_F(RTLTest,UnadjustStringForLocaleDirection)438 TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
439   // These test strings are borrowed from WrapPathWithLTRFormatting
440   const wchar_t* cases[] = {
441     L"foo bar",
442     L"foo \x05d0 bar",
443     L"\x05d0 foo bar",
444     L"!foo \x05d0 bar",
445     L",\x05d0 foo bar",
446     L"\x202a \x05d0 foo  bar",
447     L"\x202d \x05d0 foo  bar",
448     L"\x202b foo \x05d0 bar",
449     L"\x202e foo \x05d0 bar",
450     L"\x0622 foo \x05d0 bar",
451   };
452 
453   const bool was_rtl = IsRTL();
454 
455   test::ScopedRestoreICUDefaultLocale restore_locale;
456   for (size_t i = 0; i < 2; ++i) {
457     // Toggle the application default text direction (to try each direction).
458     SetRTLForTesting(!IsRTL());
459 
460     for (auto*& test_case : cases) {
461       std::u16string unadjusted_string = WideToUTF16(test_case);
462       std::u16string adjusted_string = unadjusted_string;
463 
464       if (!AdjustStringForLocaleDirection(&adjusted_string))
465         continue;
466 
467       EXPECT_NE(unadjusted_string, adjusted_string);
468       EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
469       EXPECT_EQ(unadjusted_string, adjusted_string)
470           << " for test case [" << unadjusted_string
471           << "] with IsRTL() == " << IsRTL();
472     }
473   }
474 
475   EXPECT_EQ(was_rtl, IsRTL());
476 }
477 
TEST_F(RTLTest,EnsureTerminatedDirectionalFormatting)478 TEST_F(RTLTest, EnsureTerminatedDirectionalFormatting) {
479   struct {
480     const wchar_t* unformated_text;
481     const wchar_t* formatted_text;
482   } cases[] = {
483       // Tests string without any dir-formatting characters.
484       {L"google.com", L"google.com"},
485       // Tests string with properly terminated dir-formatting character.
486       {L"\x202egoogle.com\x202c", L"\x202egoogle.com\x202c"},
487       // Tests string with over-terminated dir-formatting characters.
488       {L"\x202egoogle\x202c.com\x202c", L"\x202egoogle\x202c.com\x202c"},
489       // Tests string beginning with a dir-formatting character.
490       {L"\x202emoc.elgoog", L"\x202emoc.elgoog\x202c"},
491       // Tests string that over-terminates then re-opens.
492       {L"\x202egoogle\x202c\x202c.\x202eom",
493        L"\x202egoogle\x202c\x202c.\x202eom\x202c"},
494       // Tests string containing a dir-formatting character in the middle.
495       {L"google\x202e.com", L"google\x202e.com\x202c"},
496       // Tests string with multiple dir-formatting characters.
497       {L"\x202egoogle\x202e.com/\x202eguest",
498        L"\x202egoogle\x202e.com/\x202eguest\x202c\x202c\x202c"},
499       // Test the other dir-formatting characters (U+202A, U+202B, and U+202D).
500       {L"\x202agoogle.com", L"\x202agoogle.com\x202c"},
501       {L"\x202bgoogle.com", L"\x202bgoogle.com\x202c"},
502       {L"\x202dgoogle.com", L"\x202dgoogle.com\x202c"},
503   };
504 
505   const bool was_rtl = IsRTL();
506 
507   test::ScopedRestoreICUDefaultLocale restore_locale;
508   for (size_t i = 0; i < 2; ++i) {
509     // Toggle the application default text direction (to try each direction).
510     SetRTLForTesting(!IsRTL());
511     for (auto& test_case : cases) {
512       std::u16string unsanitized_text = WideToUTF16(test_case.unformated_text);
513       std::u16string sanitized_text = WideToUTF16(test_case.formatted_text);
514       EnsureTerminatedDirectionalFormatting(&unsanitized_text);
515       EXPECT_EQ(sanitized_text, unsanitized_text);
516     }
517   }
518   EXPECT_EQ(was_rtl, IsRTL());
519 }
520 
TEST_F(RTLTest,SanitizeUserSuppliedString)521 TEST_F(RTLTest, SanitizeUserSuppliedString) {
522   struct {
523     const wchar_t* unformatted_text;
524     const wchar_t* formatted_text;
525   } cases[] = {
526       // Tests RTL string with properly terminated dir-formatting character.
527       {L"\x202eكبير Google التطبيق\x202c", L"\x202eكبير Google التطبيق\x202c"},
528       // Tests RTL string with over-terminated dir-formatting characters.
529       {L"\x202eكبير Google\x202cالتطبيق\x202c",
530        L"\x202eكبير Google\x202cالتطبيق\x202c"},
531       // Tests RTL string that over-terminates then re-opens.
532       {L"\x202eكبير Google\x202c\x202cالتطبيق\x202e",
533        L"\x202eكبير Google\x202c\x202cالتطبيق\x202e\x202c"},
534       // Tests RTL string with multiple dir-formatting characters.
535       {L"\x202eك\x202eبير Google الت\x202eطبيق",
536        L"\x202eك\x202eبير Google الت\x202eطبيق\x202c\x202c\x202c"},
537       // Test the other dir-formatting characters (U+202A, U+202B, and U+202D).
538       {L"\x202aكبير Google التطبيق", L"\x202aكبير Google التطبيق\x202c"},
539       {L"\x202bكبير Google التطبيق", L"\x202bكبير Google التطبيق\x202c"},
540       {L"\x202dكبير Google التطبيق", L"\x202dكبير Google التطبيق\x202c"},
541 
542   };
543 
544   for (auto& i : cases) {
545     // On Windows for an LTR locale, no changes to the string are made.
546     std::u16string prefix, suffix = u"";
547 #if !BUILDFLAG(IS_WIN)
548     prefix = u"\x200e\x202b";
549     suffix = u"\x202c\x200e";
550 #endif  // !BUILDFLAG(IS_WIN)
551     std::u16string unsanitized_text = WideToUTF16(i.unformatted_text);
552     std::u16string sanitized_text =
553         prefix + WideToUTF16(i.formatted_text) + suffix;
554     SanitizeUserSuppliedString(&unsanitized_text);
555     EXPECT_EQ(sanitized_text, unsanitized_text);
556   }
557 }
558 
559 class SetICULocaleTest : public PlatformTest {};
560 
TEST_F(SetICULocaleTest,OverlongLocaleId)561 TEST_F(SetICULocaleTest, OverlongLocaleId) {
562   test::ScopedRestoreICUDefaultLocale restore_locale;
563   std::string id("fr-ca-x-foo");
564   std::string lid("fr_CA@x=foo");
565   while (id.length() < 152) {
566     id.append("-x-foo");
567     lid.append("-x-foo");
568   }
569   SetICUDefaultLocale(id);
570   EXPECT_STRNE("en_US", icu::Locale::getDefault().getName());
571   id.append("zzz");
572   lid.append("zzz");
573   SetICUDefaultLocale(id);
574   // ICU-21639 fix the long locale issue now.
575   EXPECT_STREQ(lid.c_str(), icu::Locale::getDefault().getName());
576 }
577 
578 }  // namespace i18n
579 }  // namespace base
580