1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/fx_string.h"
8
9 #include <stdint.h>
10
11 #include <iterator>
12
13 #include "build/build_config.h"
14 #include "core/fxcrt/bytestring.h"
15 #include "core/fxcrt/code_point_view.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/span_util.h"
18 #include "core/fxcrt/string_view_template.h"
19 #include "core/fxcrt/utf16.h"
20 #include "core/fxcrt/widestring.h"
21 #include "third_party/base/compiler_specific.h"
22 #include "third_party/base/containers/span.h"
23
24 namespace {
25
26 // Appends a Unicode code point to a `ByteString` using UTF-8.
27 //
28 // TODO(crbug.com/pdfium/2041): Migrate to `ByteString`.
AppendCodePointToByteString(char32_t code_point,ByteString & buffer)29 void AppendCodePointToByteString(char32_t code_point, ByteString& buffer) {
30 if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
31 // Invalid code point above U+10FFFF.
32 return;
33 }
34
35 if (code_point < 0x80) {
36 // 7-bit code points are unchanged in UTF-8.
37 buffer += code_point;
38 return;
39 }
40
41 int byte_size;
42 if (code_point < 0x800) {
43 byte_size = 2;
44 } else if (code_point < 0x10000) {
45 byte_size = 3;
46 } else {
47 byte_size = 4;
48 }
49
50 static constexpr uint8_t kPrefix[] = {0xc0, 0xe0, 0xf0};
51 int order = 1 << ((byte_size - 1) * 6);
52 buffer += kPrefix[byte_size - 2] | (code_point / order);
53 for (int i = 0; i < byte_size - 1; i++) {
54 code_point = code_point % order;
55 order >>= 6;
56 buffer += 0x80 | (code_point / order);
57 }
58 }
59
60 // Appends a Unicode code point to a `WideString` using either UTF-16 or UTF-32,
61 // depending on the platform's definition of `wchar_t`.
62 //
63 // TODO(crbug.com/pdfium/2031): Always use UTF-16.
64 // TODO(crbug.com/pdfium/2041): Migrate to `WideString`.
AppendCodePointToWideString(char32_t code_point,WideString & buffer)65 void AppendCodePointToWideString(char32_t code_point, WideString& buffer) {
66 if (code_point > pdfium::kMaximumSupplementaryCodePoint) {
67 // Invalid code point above U+10FFFF.
68 return;
69 }
70
71 #if defined(WCHAR_T_IS_UTF16)
72 if (code_point < pdfium::kMinimumSupplementaryCodePoint) {
73 buffer += static_cast<wchar_t>(code_point);
74 } else {
75 // Encode as UTF-16 surrogate pair.
76 pdfium::SurrogatePair surrogate_pair(code_point);
77 buffer += surrogate_pair.high();
78 buffer += surrogate_pair.low();
79 }
80 #else
81 buffer += static_cast<wchar_t>(code_point);
82 #endif // defined(WCHAR_T_IS_UTF16)
83 }
84
85 } // namespace
86
FX_UTF8Encode(WideStringView wsStr)87 ByteString FX_UTF8Encode(WideStringView wsStr) {
88 ByteString buffer;
89 for (char32_t code_point : pdfium::CodePointView(wsStr)) {
90 AppendCodePointToByteString(code_point, buffer);
91 }
92 return buffer;
93 }
94
FX_UTF8Decode(ByteStringView bsStr)95 WideString FX_UTF8Decode(ByteStringView bsStr) {
96 WideString buffer;
97
98 int remaining = 0;
99 char32_t code_point = 0;
100 for (char byte : bsStr) {
101 uint8_t code_unit = static_cast<uint8_t>(byte);
102 if (code_unit < 0x80) {
103 remaining = 0;
104 AppendCodePointToWideString(code_unit, buffer);
105 } else if (code_unit < 0xc0) {
106 if (remaining > 0) {
107 --remaining;
108 code_point = (code_point << 6) | (code_unit & 0x3f);
109 if (remaining == 0) {
110 AppendCodePointToWideString(code_point, buffer);
111 }
112 }
113 } else if (code_unit < 0xe0) {
114 remaining = 1;
115 code_point = code_unit & 0x1f;
116 } else if (code_unit < 0xf0) {
117 remaining = 2;
118 code_point = code_unit & 0x0f;
119 } else if (code_unit < 0xf8) {
120 remaining = 3;
121 code_point = code_unit & 0x07;
122 } else {
123 remaining = 0;
124 }
125 }
126
127 return buffer;
128 }
129
130 namespace {
131
132 constexpr float kFractionScalesFloat[] = {
133 0.1f, 0.01f, 0.001f, 0.0001f,
134 0.00001f, 0.000001f, 0.0000001f, 0.00000001f,
135 0.000000001f, 0.0000000001f, 0.00000000001f};
136
137 const double kFractionScalesDouble[] = {
138 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001,
139 0.0000001, 0.00000001, 0.000000001, 0.0000000001, 0.00000000001};
140
141 template <class T>
StringTo(ByteStringView strc,pdfium::span<const T> fractional_scales)142 T StringTo(ByteStringView strc, pdfium::span<const T> fractional_scales) {
143 if (strc.IsEmpty())
144 return 0;
145
146 bool bNegative = false;
147 size_t cc = 0;
148 size_t len = strc.GetLength();
149 if (strc[0] == '+') {
150 cc++;
151 } else if (strc[0] == '-') {
152 bNegative = true;
153 cc++;
154 }
155 while (cc < len) {
156 if (strc[cc] != '+' && strc[cc] != '-')
157 break;
158 cc++;
159 }
160 T value = 0;
161 while (cc < len) {
162 if (strc[cc] == '.')
163 break;
164 value = value * 10 + FXSYS_DecimalCharToInt(strc.CharAt(cc));
165 cc++;
166 }
167 size_t scale = 0;
168 if (cc < len && strc[cc] == '.') {
169 cc++;
170 while (cc < len) {
171 value +=
172 fractional_scales[scale] * FXSYS_DecimalCharToInt(strc.CharAt(cc));
173 scale++;
174 if (scale == fractional_scales.size())
175 break;
176 cc++;
177 }
178 }
179 return bNegative ? -value : value;
180 }
181
182 template <class T>
ToString(T value,int (* round_func)(T),pdfium::span<char> buf)183 size_t ToString(T value, int (*round_func)(T), pdfium::span<char> buf) {
184 buf[0] = '0';
185 buf[1] = '\0';
186 if (value == 0) {
187 return 1;
188 }
189 bool bNegative = false;
190 if (value < 0) {
191 bNegative = true;
192 value = -value;
193 }
194 int scale = 1;
195 int scaled = round_func(value);
196 while (scaled < 100000) {
197 if (scale == 1000000) {
198 break;
199 }
200 scale *= 10;
201 scaled = round_func(value * scale);
202 }
203 if (scaled == 0) {
204 return 1;
205 }
206 char buf2[32];
207 size_t buf_size = 0;
208 if (bNegative) {
209 buf[buf_size++] = '-';
210 }
211 int i = scaled / scale;
212 FXSYS_itoa(i, buf2, 10);
213 size_t len = strlen(buf2);
214 fxcrt::spancpy(buf.subspan(buf_size), pdfium::make_span(buf2).first(len));
215 buf_size += len;
216 int fraction = scaled % scale;
217 if (fraction == 0) {
218 return buf_size;
219 }
220 buf[buf_size++] = '.';
221 scale /= 10;
222 while (fraction) {
223 buf[buf_size++] = '0' + fraction / scale;
224 fraction %= scale;
225 scale /= 10;
226 }
227 return buf_size;
228 }
229
230 } // namespace
231
StringToFloat(ByteStringView strc)232 float StringToFloat(ByteStringView strc) {
233 return StringTo<float>(strc, kFractionScalesFloat);
234 }
235
StringToFloat(WideStringView wsStr)236 float StringToFloat(WideStringView wsStr) {
237 return StringToFloat(FX_UTF8Encode(wsStr).AsStringView());
238 }
239
FloatToString(float f,pdfium::span<char> buf)240 size_t FloatToString(float f, pdfium::span<char> buf) {
241 return ToString<float>(f, FXSYS_roundf, buf);
242 }
243
StringToDouble(ByteStringView strc)244 double StringToDouble(ByteStringView strc) {
245 return StringTo<double>(strc, kFractionScalesDouble);
246 }
247
StringToDouble(WideStringView wsStr)248 double StringToDouble(WideStringView wsStr) {
249 return StringToDouble(FX_UTF8Encode(wsStr).AsStringView());
250 }
251
DoubleToString(double d,pdfium::span<char> buf)252 size_t DoubleToString(double d, pdfium::span<char> buf) {
253 return ToString<double>(d, FXSYS_round, buf);
254 }
255
256 namespace fxcrt {
257
258 template std::vector<ByteString> Split<ByteString>(const ByteString& that,
259 ByteString::CharType ch);
260 template std::vector<WideString> Split<WideString>(const WideString& that,
261 WideString::CharType ch);
262
263 } // namespace fxcrt
264