xref: /aosp_15_r20/external/icing/icing/util/i18n-utils_test.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2019 Google LLC
2*8b6cd535SAndroid Build Coastguard Worker //
3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*8b6cd535SAndroid Build Coastguard Worker //
7*8b6cd535SAndroid Build Coastguard Worker //      http://www.apache.org/licenses/LICENSE-2.0
8*8b6cd535SAndroid Build Coastguard Worker //
9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License.
14*8b6cd535SAndroid Build Coastguard Worker 
15*8b6cd535SAndroid Build Coastguard Worker #include "icing/util/i18n-utils.h"
16*8b6cd535SAndroid Build Coastguard Worker 
17*8b6cd535SAndroid Build Coastguard Worker #include <memory>
18*8b6cd535SAndroid Build Coastguard Worker 
19*8b6cd535SAndroid Build Coastguard Worker #include "gmock/gmock.h"
20*8b6cd535SAndroid Build Coastguard Worker #include "gtest/gtest.h"
21*8b6cd535SAndroid Build Coastguard Worker #include "unicode/uchar.h"
22*8b6cd535SAndroid Build Coastguard Worker 
23*8b6cd535SAndroid Build Coastguard Worker namespace icing {
24*8b6cd535SAndroid Build Coastguard Worker namespace lib {
25*8b6cd535SAndroid Build Coastguard Worker namespace {
26*8b6cd535SAndroid Build Coastguard Worker 
27*8b6cd535SAndroid Build Coastguard Worker using ::testing::Eq;
28*8b6cd535SAndroid Build Coastguard Worker 
TEST(IcuI18nUtilsTest,IsPunctuationAtSameAsIcuIsPunct)29*8b6cd535SAndroid Build Coastguard Worker TEST(IcuI18nUtilsTest, IsPunctuationAtSameAsIcuIsPunct) {
30*8b6cd535SAndroid Build Coastguard Worker   // Iterate through ASCII values
31*8b6cd535SAndroid Build Coastguard Worker   for (int i = 0; i <= 127; ++i) {
32*8b6cd535SAndroid Build Coastguard Worker     char ascii = i;
33*8b6cd535SAndroid Build Coastguard Worker 
34*8b6cd535SAndroid Build Coastguard Worker     std::string ascii_string = "";
35*8b6cd535SAndroid Build Coastguard Worker     ascii_string.push_back(ascii);
36*8b6cd535SAndroid Build Coastguard Worker 
37*8b6cd535SAndroid Build Coastguard Worker     EXPECT_EQ(i18n_utils::IsPunctuationAt(ascii_string, /*position=*/0),
38*8b6cd535SAndroid Build Coastguard Worker 
39*8b6cd535SAndroid Build Coastguard Worker               u_ispunct(ascii));
40*8b6cd535SAndroid Build Coastguard Worker   }
41*8b6cd535SAndroid Build Coastguard Worker }
42*8b6cd535SAndroid Build Coastguard Worker 
TEST(IcuI18nUtilsTest,IsAlphabeticAt)43*8b6cd535SAndroid Build Coastguard Worker TEST(IcuI18nUtilsTest, IsAlphabeticAt) {
44*8b6cd535SAndroid Build Coastguard Worker   // Test alphabetic and non-alphabetic ascii characters
45*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kSomeAscii = "iJ?9";
46*8b6cd535SAndroid Build Coastguard Worker   EXPECT_TRUE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/0));   // 'i'
47*8b6cd535SAndroid Build Coastguard Worker   EXPECT_TRUE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/1));   // 'J'
48*8b6cd535SAndroid Build Coastguard Worker   EXPECT_FALSE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/2));  // '?'
49*8b6cd535SAndroid Build Coastguard Worker   EXPECT_FALSE(i18n_utils::IsAlphabeticAt(kSomeAscii, /*position=*/3));  // '9'
50*8b6cd535SAndroid Build Coastguard Worker 
51*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kSomeNonAscii = "��ñ①カ";
52*8b6cd535SAndroid Build Coastguard Worker   EXPECT_FALSE(
53*8b6cd535SAndroid Build Coastguard Worker       i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/0));  // '��'
54*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
55*8b6cd535SAndroid Build Coastguard Worker                 kSomeNonAscii.data(), kSomeNonAscii.length(), 0)),
56*8b6cd535SAndroid Build Coastguard Worker             4);
57*8b6cd535SAndroid Build Coastguard Worker   EXPECT_TRUE(
58*8b6cd535SAndroid Build Coastguard Worker       i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/4));  // 'ñ'
59*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
60*8b6cd535SAndroid Build Coastguard Worker                 kSomeNonAscii.data(), kSomeNonAscii.length(), 4)),
61*8b6cd535SAndroid Build Coastguard Worker             2);
62*8b6cd535SAndroid Build Coastguard Worker   EXPECT_FALSE(
63*8b6cd535SAndroid Build Coastguard Worker       i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/6));  // '①'
64*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
65*8b6cd535SAndroid Build Coastguard Worker                 kSomeNonAscii.data(), kSomeNonAscii.length(), 6)),
66*8b6cd535SAndroid Build Coastguard Worker             3);
67*8b6cd535SAndroid Build Coastguard Worker   EXPECT_TRUE(
68*8b6cd535SAndroid Build Coastguard Worker       i18n_utils::IsAlphabeticAt(kSomeNonAscii, /*position=*/9));  // 'カ'
69*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
70*8b6cd535SAndroid Build Coastguard Worker                 kSomeNonAscii.data(), kSomeNonAscii.length(), 9)),
71*8b6cd535SAndroid Build Coastguard Worker             3);
72*8b6cd535SAndroid Build Coastguard Worker }
73*8b6cd535SAndroid Build Coastguard Worker 
TEST(IcuI18nUtilsTest,GetUtf8Length)74*8b6cd535SAndroid Build Coastguard Worker TEST(IcuI18nUtilsTest, GetUtf8Length) {
75*8b6cd535SAndroid Build Coastguard Worker   // Test alphabetic and non-alphabetic ascii characters
76*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kSomeAscii = "iJ?9";
77*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
78*8b6cd535SAndroid Build Coastguard Worker                 kSomeAscii.data(), kSomeAscii.length(), 0)),
79*8b6cd535SAndroid Build Coastguard Worker             1);  // 'i'
80*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
81*8b6cd535SAndroid Build Coastguard Worker                 kSomeAscii.data(), kSomeAscii.length(), 1)),
82*8b6cd535SAndroid Build Coastguard Worker             1);  // 'J'
83*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
84*8b6cd535SAndroid Build Coastguard Worker                 kSomeAscii.data(), kSomeAscii.length(), 2)),
85*8b6cd535SAndroid Build Coastguard Worker             1);  // '?'
86*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
87*8b6cd535SAndroid Build Coastguard Worker                 kSomeAscii.data(), kSomeAscii.length(), 3)),
88*8b6cd535SAndroid Build Coastguard Worker             1);  // '9'
89*8b6cd535SAndroid Build Coastguard Worker 
90*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kSomeNonAscii = "��ñ①カ";
91*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
92*8b6cd535SAndroid Build Coastguard Worker                 kSomeNonAscii.data(), kSomeNonAscii.length(), 0)),
93*8b6cd535SAndroid Build Coastguard Worker             4);  // '��'
94*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
95*8b6cd535SAndroid Build Coastguard Worker                 kSomeNonAscii.data(), kSomeNonAscii.length(), 4)),
96*8b6cd535SAndroid Build Coastguard Worker             2);  // 'ñ'
97*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
98*8b6cd535SAndroid Build Coastguard Worker                 kSomeNonAscii.data(), kSomeNonAscii.length(), 6)),
99*8b6cd535SAndroid Build Coastguard Worker             3);  // '①'
100*8b6cd535SAndroid Build Coastguard Worker   EXPECT_EQ(i18n_utils::GetUtf8Length(i18n_utils::GetUChar32At(
101*8b6cd535SAndroid Build Coastguard Worker                 kSomeNonAscii.data(), kSomeNonAscii.length(), 9)),
102*8b6cd535SAndroid Build Coastguard Worker             3);  // 'カ'
103*8b6cd535SAndroid Build Coastguard Worker }
104*8b6cd535SAndroid Build Coastguard Worker 
TEST(IcuI18nUtilsTest,SafeTruncate)105*8b6cd535SAndroid Build Coastguard Worker TEST(IcuI18nUtilsTest, SafeTruncate) {
106*8b6cd535SAndroid Build Coastguard Worker   // Test alphabetic and non-alphabetic ascii characters
107*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kSomeAscii = "iJ?9";
108*8b6cd535SAndroid Build Coastguard Worker   std::string truncated(kSomeAscii);
109*8b6cd535SAndroid Build Coastguard Worker   i18n_utils::SafeTruncateUtf8(&truncated, kSomeAscii.length() + 1);
110*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(truncated, Eq("iJ?9"));
111*8b6cd535SAndroid Build Coastguard Worker   truncated = kSomeAscii;
112*8b6cd535SAndroid Build Coastguard Worker   i18n_utils::SafeTruncateUtf8(&truncated, kSomeAscii.length());
113*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(truncated, Eq("iJ?9"));
114*8b6cd535SAndroid Build Coastguard Worker   truncated = kSomeAscii;
115*8b6cd535SAndroid Build Coastguard Worker   i18n_utils::SafeTruncateUtf8(&truncated, kSomeAscii.length() - 1);
116*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(truncated, Eq("iJ?"));
117*8b6cd535SAndroid Build Coastguard Worker 
118*8b6cd535SAndroid Build Coastguard Worker   constexpr std::string_view kSomeNonAscii = "��ñ①カ";
119*8b6cd535SAndroid Build Coastguard Worker   truncated = kSomeNonAscii;
120*8b6cd535SAndroid Build Coastguard Worker   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() + 1);
121*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(truncated, Eq("��ñ①カ"));
122*8b6cd535SAndroid Build Coastguard Worker   truncated = kSomeNonAscii;
123*8b6cd535SAndroid Build Coastguard Worker   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length());
124*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(truncated, Eq("��ñ①カ"));
125*8b6cd535SAndroid Build Coastguard Worker   truncated = kSomeNonAscii;
126*8b6cd535SAndroid Build Coastguard Worker   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 1);
127*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(truncated, Eq("��ñ①"));
128*8b6cd535SAndroid Build Coastguard Worker   truncated = kSomeNonAscii;
129*8b6cd535SAndroid Build Coastguard Worker   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 2);
130*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(truncated, Eq("��ñ①"));
131*8b6cd535SAndroid Build Coastguard Worker   truncated = kSomeNonAscii;
132*8b6cd535SAndroid Build Coastguard Worker   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 3);
133*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(truncated, Eq("��ñ①"));
134*8b6cd535SAndroid Build Coastguard Worker   truncated = kSomeNonAscii;
135*8b6cd535SAndroid Build Coastguard Worker   i18n_utils::SafeTruncateUtf8(&truncated, kSomeNonAscii.length() - 4);
136*8b6cd535SAndroid Build Coastguard Worker   EXPECT_THAT(truncated, Eq("��ñ"));
137*8b6cd535SAndroid Build Coastguard Worker }
138*8b6cd535SAndroid Build Coastguard Worker 
139*8b6cd535SAndroid Build Coastguard Worker }  // namespace
140*8b6cd535SAndroid Build Coastguard Worker }  // namespace lib
141*8b6cd535SAndroid Build Coastguard Worker }  // namespace icing
142