xref: /aosp_15_r20/external/pdfium/core/fxcrt/fx_string.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/fx_string.h"
8 
9 #include <stdint.h>
10 
11 #include <iterator>
12 
13 #include "build/build_config.h"
14 #include "core/fxcrt/bytestring.h"
15 #include "core/fxcrt/code_point_view.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/span_util.h"
18 #include "core/fxcrt/string_view_template.h"
19 #include "core/fxcrt/utf16.h"
20 #include "core/fxcrt/widestring.h"
21 #include "third_party/base/compiler_specific.h"
22 #include "third_party/base/containers/span.h"
23 
24 namespace {
25 
26 // Appends a Unicode code point to a `ByteString` using UTF-8.
27 //
28 // TODO(crbug.com/pdfium/2041): Migrate to `ByteString`.
AppendCodePointToByteString(char32_t code_point,ByteString & buffer)29 void AppendCodePointToByteString(char32_t code_point, ByteString& buffer) {
30   if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
31     // Invalid code point above U+10FFFF.
32     return;
33   }
34 
35   if (code_point < 0x80) {
36     // 7-bit code points are unchanged in UTF-8.
37     buffer += code_point;
38     return;
39   }
40 
41   int byte_size;
42   if (code_point < 0x800) {
43     byte_size = 2;
44   } else if (code_point < 0x10000) {
45     byte_size = 3;
46   } else {
47     byte_size = 4;
48   }
49 
50   static constexpr uint8_t kPrefix[] = {0xc0, 0xe0, 0xf0};
51   int order = 1 << ((byte_size - 1) * 6);
52   buffer += kPrefix[byte_size - 2] | (code_point / order);
53   for (int i = 0; i < byte_size - 1; i++) {
54     code_point = code_point % order;
55     order >>= 6;
56     buffer += 0x80 | (code_point / order);
57   }
58 }
59 
60 // Appends a Unicode code point to a `WideString` using either UTF-16 or UTF-32,
61 // depending on the platform's definition of `wchar_t`.
62 //
63 // TODO(crbug.com/pdfium/2031): Always use UTF-16.
64 // TODO(crbug.com/pdfium/2041): Migrate to `WideString`.
AppendCodePointToWideString(char32_t code_point,WideString & buffer)65 void AppendCodePointToWideString(char32_t code_point, WideString& buffer) {
66   if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
67     // Invalid code point above U+10FFFF.
68     return;
69   }
70 
71 #if defined(WCHAR_T_IS_UTF16)
72   if (code_point < pdfium::kMinimumSupplementaryCodePoint) {
73     buffer += static_cast<wchar_t>(code_point);
74   } else {
75     // Encode as UTF-16 surrogate pair.
76     pdfium::SurrogatePair surrogate_pair(code_point);
77     buffer += surrogate_pair.high();
78     buffer += surrogate_pair.low();
79   }
80 #else
81   buffer += static_cast<wchar_t>(code_point);
82 #endif  // defined(WCHAR_T_IS_UTF16)
83 }
84 
85 }  // namespace
86 
FX_UTF8Encode(WideStringView wsStr)87 ByteString FX_UTF8Encode(WideStringView wsStr) {
88   ByteString buffer;
89   for (char32_t code_point : pdfium::CodePointView(wsStr)) {
90     AppendCodePointToByteString(code_point, buffer);
91   }
92   return buffer;
93 }
94 
FX_UTF8Decode(ByteStringView bsStr)95 WideString FX_UTF8Decode(ByteStringView bsStr) {
96   WideString buffer;
97 
98   int remaining = 0;
99   char32_t code_point = 0;
100   for (char byte : bsStr) {
101     uint8_t code_unit = static_cast<uint8_t>(byte);
102     if (code_unit < 0x80) {
103       remaining = 0;
104       AppendCodePointToWideString(code_unit, buffer);
105     } else if (code_unit < 0xc0) {
106       if (remaining > 0) {
107         --remaining;
108         code_point = (code_point << 6) | (code_unit & 0x3f);
109         if (remaining == 0) {
110           AppendCodePointToWideString(code_point, buffer);
111         }
112       }
113     } else if (code_unit < 0xe0) {
114       remaining = 1;
115       code_point = code_unit & 0x1f;
116     } else if (code_unit < 0xf0) {
117       remaining = 2;
118       code_point = code_unit & 0x0f;
119     } else if (code_unit < 0xf8) {
120       remaining = 3;
121       code_point = code_unit & 0x07;
122     } else {
123       remaining = 0;
124     }
125   }
126 
127   return buffer;
128 }
129 
130 namespace {
131 
132 constexpr float kFractionScalesFloat[] = {
133     0.1f,         0.01f,         0.001f,        0.0001f,
134     0.00001f,     0.000001f,     0.0000001f,    0.00000001f,
135     0.000000001f, 0.0000000001f, 0.00000000001f};
136 
137 const double kFractionScalesDouble[] = {
138     0.1,       0.01,       0.001,       0.0001,       0.00001,      0.000001,
139     0.0000001, 0.00000001, 0.000000001, 0.0000000001, 0.00000000001};
140 
141 template <class T>
StringTo(ByteStringView strc,pdfium::span<const T> fractional_scales)142 T StringTo(ByteStringView strc, pdfium::span<const T> fractional_scales) {
143   if (strc.IsEmpty())
144     return 0;
145 
146   bool bNegative = false;
147   size_t cc = 0;
148   size_t len = strc.GetLength();
149   if (strc[0] == '+') {
150     cc++;
151   } else if (strc[0] == '-') {
152     bNegative = true;
153     cc++;
154   }
155   while (cc < len) {
156     if (strc[cc] != '+' && strc[cc] != '-')
157       break;
158     cc++;
159   }
160   T value = 0;
161   while (cc < len) {
162     if (strc[cc] == '.')
163       break;
164     value = value * 10 + FXSYS_DecimalCharToInt(strc.CharAt(cc));
165     cc++;
166   }
167   size_t scale = 0;
168   if (cc < len && strc[cc] == '.') {
169     cc++;
170     while (cc < len) {
171       value +=
172           fractional_scales[scale] * FXSYS_DecimalCharToInt(strc.CharAt(cc));
173       scale++;
174       if (scale == fractional_scales.size())
175         break;
176       cc++;
177     }
178   }
179   return bNegative ? -value : value;
180 }
181 
182 template <class T>
ToString(T value,int (* round_func)(T),pdfium::span<char> buf)183 size_t ToString(T value, int (*round_func)(T), pdfium::span<char> buf) {
184   buf[0] = '0';
185   buf[1] = '\0';
186   if (value == 0) {
187     return 1;
188   }
189   bool bNegative = false;
190   if (value < 0) {
191     bNegative = true;
192     value = -value;
193   }
194   int scale = 1;
195   int scaled = round_func(value);
196   while (scaled < 100000) {
197     if (scale == 1000000) {
198       break;
199     }
200     scale *= 10;
201     scaled = round_func(value * scale);
202   }
203   if (scaled == 0) {
204     return 1;
205   }
206   char buf2[32];
207   size_t buf_size = 0;
208   if (bNegative) {
209     buf[buf_size++] = '-';
210   }
211   int i = scaled / scale;
212   FXSYS_itoa(i, buf2, 10);
213   size_t len = strlen(buf2);
214   fxcrt::spancpy(buf.subspan(buf_size), pdfium::make_span(buf2).first(len));
215   buf_size += len;
216   int fraction = scaled % scale;
217   if (fraction == 0) {
218     return buf_size;
219   }
220   buf[buf_size++] = '.';
221   scale /= 10;
222   while (fraction) {
223     buf[buf_size++] = '0' + fraction / scale;
224     fraction %= scale;
225     scale /= 10;
226   }
227   return buf_size;
228 }
229 
230 }  // namespace
231 
StringToFloat(ByteStringView strc)232 float StringToFloat(ByteStringView strc) {
233   return StringTo<float>(strc, kFractionScalesFloat);
234 }
235 
StringToFloat(WideStringView wsStr)236 float StringToFloat(WideStringView wsStr) {
237   return StringToFloat(FX_UTF8Encode(wsStr).AsStringView());
238 }
239 
FloatToString(float f,pdfium::span<char> buf)240 size_t FloatToString(float f, pdfium::span<char> buf) {
241   return ToString<float>(f, FXSYS_roundf, buf);
242 }
243 
StringToDouble(ByteStringView strc)244 double StringToDouble(ByteStringView strc) {
245   return StringTo<double>(strc, kFractionScalesDouble);
246 }
247 
StringToDouble(WideStringView wsStr)248 double StringToDouble(WideStringView wsStr) {
249   return StringToDouble(FX_UTF8Encode(wsStr).AsStringView());
250 }
251 
DoubleToString(double d,pdfium::span<char> buf)252 size_t DoubleToString(double d, pdfium::span<char> buf) {
253   return ToString<double>(d, FXSYS_round, buf);
254 }
255 
256 namespace fxcrt {
257 
258 template std::vector<ByteString> Split<ByteString>(const ByteString& that,
259                                                    ByteString::CharType ch);
260 template std::vector<WideString> Split<WideString>(const WideString& that,
261                                                    WideString::CharType ch);
262 
263 }  // namespace fxcrt
264