xref: /aosp_15_r20/external/cronet/url/url_canon_internal.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "url/url_canon_internal.h"
6 
7 #include <errno.h>
8 #include <stddef.h>
9 #include <stdlib.h>
10 #ifdef __SSE2__
11 #include <immintrin.h>
12 #elif defined(__aarch64__)
13 #include <arm_neon.h>
14 #endif
15 
16 #include <cstdio>
17 #include <string>
18 
19 #include "base/bits.h"
20 #include "base/numerics/safe_conversions.h"
21 #include "base/strings/utf_string_conversion_utils.h"
22 #include "url/url_features.h"
23 
24 namespace url {
25 
26 namespace {
27 
28 // Find the initial segment of the given string that consists solely
29 // of characters valid for CHAR_QUERY. (We can have false negatives in
30 // one specific case, namely the exclamation mark 0x21, but false negatives
31 // are fine, and it's not worth adding a separate test for.) This is
32 // a fast path to speed up checking of very long query strings that are
33 // already valid, which happen on some web pages.
34 //
35 // This has some startup cost to load the constants and such, so it's
36 // usually not worth it for short strings.
FindInitialQuerySafeString(const char * source,size_t length)37 size_t FindInitialQuerySafeString(const char* source, size_t length) {
38 #if defined(__SSE2__) || defined(__aarch64__)
39   constexpr size_t kChunkSize = 16;
40   size_t i;
41   for (i = 0; i < base::bits::AlignDown(length, kChunkSize); i += kChunkSize) {
42     char b __attribute__((vector_size(16)));
43     memcpy(&b, source + i, sizeof(b));
44 
45     // Compare each element with the ranges for CHAR_QUERY
46     // (see kSharedCharTypeTable), vectorized so that it creates
47     // a mask of which elements match. For completeness, we could
48     // have had (...) | b == 0x21 here, but exclamation marks are
49     // rare and the extra test costs us some time.
50     auto mask = b >= 0x24 && b <= 0x7e && b != 0x27 && b != 0x3c && b != 0x3e;
51 
52 #ifdef __SSE2__
53     if (_mm_movemask_epi8(reinterpret_cast<__m128i>(mask)) != 0xffff) {
54       return i;
55     }
56 #else
57     if (vminvq_u8(reinterpret_cast<uint8x16_t>(mask)) == 0) {
58       return i;
59     }
60 #endif
61   }
62   return i;
63 #else
64   // Need SIMD support (with fast reductions) for this to be efficient.
65   return 0;
66 #endif
67 }
68 
69 template <typename CHAR, typename UCHAR>
DoAppendStringOfType(const CHAR * source,size_t length,SharedCharTypes type,CanonOutput * output)70 void DoAppendStringOfType(const CHAR* source,
71                           size_t length,
72                           SharedCharTypes type,
73                           CanonOutput* output) {
74   size_t i = 0;
75   // We only instantiate this for char, to avoid a Clang crash
76   // (and because Append() does not support converting).
77   if constexpr (sizeof(CHAR) == 1) {
78     if (type == CHAR_QUERY && length >= kMinimumLengthForSIMD) {
79       i = FindInitialQuerySafeString(source, length);
80       output->Append(source, i);
81     }
82   }
83   for (; i < length; i++) {
84     if (static_cast<UCHAR>(source[i]) >= 0x80) {
85       // ReadUTFCharLossy will fill the code point with
86       // kUnicodeReplacementCharacter when the input is invalid, which is what
87       // we want.
88       base_icu::UChar32 code_point;
89       ReadUTFCharLossy(source, &i, length, &code_point);
90       AppendUTF8EscapedValue(code_point, output);
91     } else {
92       // Just append the 7-bit character, possibly escaping it.
93       unsigned char uch = static_cast<unsigned char>(source[i]);
94       if (!IsCharOfType(uch, type))
95         AppendEscapedChar(uch, output);
96       else
97         output->push_back(uch);
98     }
99   }
100 }
101 
102 // This function assumes the input values are all contained in 8-bit,
103 // although it allows any type. Returns true if input is valid, false if not.
104 template <typename CHAR, typename UCHAR>
DoAppendInvalidNarrowString(const CHAR * spec,size_t begin,size_t end,CanonOutput * output)105 void DoAppendInvalidNarrowString(const CHAR* spec,
106                                  size_t begin,
107                                  size_t end,
108                                  CanonOutput* output) {
109   for (size_t i = begin; i < end; i++) {
110     UCHAR uch = static_cast<UCHAR>(spec[i]);
111     if (uch >= 0x80) {
112       // Handle UTF-8/16 encodings. This call will correctly handle the error
113       // case by appending the invalid character.
114       AppendUTF8EscapedChar(spec, &i, end, output);
115     } else if (uch <= ' ' || uch == 0x7f) {
116       // This function is for error handling, so we escape all control
117       // characters and spaces, but not anything else since we lack
118       // context to do something more specific.
119       AppendEscapedChar(static_cast<unsigned char>(uch), output);
120     } else {
121       output->push_back(static_cast<char>(uch));
122     }
123   }
124 }
125 
126 // Overrides one component, see the Replacements structure for
127 // what the various combionations of source pointer and component mean.
DoOverrideComponent(const char * override_source,const Component & override_component,const char ** dest,Component * dest_component)128 void DoOverrideComponent(const char* override_source,
129                          const Component& override_component,
130                          const char** dest,
131                          Component* dest_component) {
132   if (override_source) {
133     *dest = override_source;
134     *dest_component = override_component;
135   }
136 }
137 
138 // Similar to DoOverrideComponent except that it takes a UTF-16 input and does
139 // not actually set the output character pointer.
140 //
141 // The input is converted to UTF-8 at the end of the given buffer as a temporary
142 // holding place. The component identifying the portion of the buffer used in
143 // the |utf8_buffer| will be specified in |*dest_component|.
144 //
145 // This will not actually set any |dest| pointer like DoOverrideComponent
146 // does because all of the pointers will point into the |utf8_buffer|, which
147 // may get resized while we're overriding a subsequent component. Instead, the
148 // caller should use the beginning of the |utf8_buffer| as the string pointer
149 // for all components once all overrides have been prepared.
PrepareUTF16OverrideComponent(const char16_t * override_source,const Component & override_component,CanonOutput * utf8_buffer,Component * dest_component)150 bool PrepareUTF16OverrideComponent(const char16_t* override_source,
151                                    const Component& override_component,
152                                    CanonOutput* utf8_buffer,
153                                    Component* dest_component) {
154   bool success = true;
155   if (override_source) {
156     if (!override_component.is_valid()) {
157       // Non-"valid" component (means delete), so we need to preserve that.
158       *dest_component = Component();
159     } else {
160       // Convert to UTF-8.
161       dest_component->begin = utf8_buffer->length();
162       success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
163                                    static_cast<size_t>(override_component.len),
164                                    utf8_buffer);
165       dest_component->len = utf8_buffer->length() - dest_component->begin;
166     }
167   }
168   return success;
169 }
170 
171 }  // namespace
172 
173 // See the header file for this array's declaration.
174 // clang-format off
175 const unsigned char kSharedCharTypeTable[0x100] = {
176     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x00 - 0x0f
177     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x10 - 0x1f
178     0,                           // 0x20  ' ' (escape spaces in queries)
179     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x21  !
180     0,                           // 0x22  "
181     0,                           // 0x23  #  (invalid in query since it marks the ref)
182     CHAR_QUERY | CHAR_USERINFO,  // 0x24  $
183     CHAR_QUERY | CHAR_USERINFO,  // 0x25  %
184     CHAR_QUERY | CHAR_USERINFO,  // 0x26  &
185     0,                           // 0x27  '  (Try to prevent XSS.)
186     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x28  (
187     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x29  )
188     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2a  *
189     CHAR_QUERY | CHAR_USERINFO,  // 0x2b  +
190     CHAR_QUERY | CHAR_USERINFO,  // 0x2c  ,
191     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x2d  -
192     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x2e  .
193     CHAR_QUERY,                  // 0x2f  /
194     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x30  0
195     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x31  1
196     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x32  2
197     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x33  3
198     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x34  4
199     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x35  5
200     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x36  6
201     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT | CHAR_COMPONENT,  // 0x37  7
202     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x38  8
203     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_COMPONENT,             // 0x39  9
204     CHAR_QUERY,  // 0x3a  :
205     CHAR_QUERY,  // 0x3b  ;
206     0,           // 0x3c  <  (Try to prevent certain types of XSS.)
207     CHAR_QUERY,  // 0x3d  =
208     0,           // 0x3e  >  (Try to prevent certain types of XSS.)
209     CHAR_QUERY,  // 0x3f  ?
210     CHAR_QUERY,  // 0x40  @
211     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x41  A
212     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x42  B
213     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x43  C
214     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x44  D
215     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x45  E
216     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x46  F
217     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x47  G
218     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x48  H
219     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x49  I
220     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4a  J
221     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4b  K
222     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4c  L
223     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4d  M
224     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4e  N
225     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x4f  O
226     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x50  P
227     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x51  Q
228     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x52  R
229     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x53  S
230     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x54  T
231     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x55  U
232     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x56  V
233     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x57  W
234     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT, // 0x58  X
235     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x59  Y
236     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5a  Z
237     CHAR_QUERY,  // 0x5b  [
238     CHAR_QUERY,  // 0x5c  '\'
239     CHAR_QUERY,  // 0x5d  ]
240     CHAR_QUERY,  // 0x5e  ^
241     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x5f  _
242     CHAR_QUERY,  // 0x60  `
243     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x61  a
244     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x62  b
245     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x63  c
246     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x64  d
247     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x65  e
248     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_COMPONENT,  // 0x66  f
249     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x67  g
250     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x68  h
251     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x69  i
252     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6a  j
253     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6b  k
254     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6c  l
255     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6d  m
256     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6e  n
257     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x6f  o
258     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x70  p
259     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x71  q
260     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x72  r
261     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x73  s
262     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x74  t
263     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x75  u
264     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x76  v
265     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x77  w
266     CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_COMPONENT,  // 0x78  x
267     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x79  y
268     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7a  z
269     CHAR_QUERY,  // 0x7b  {
270     CHAR_QUERY,  // 0x7c  |
271     CHAR_QUERY,  // 0x7d  }
272     CHAR_QUERY | CHAR_USERINFO | CHAR_COMPONENT,  // 0x7e  ~
273     0,           // 0x7f
274     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x80 - 0x8f
275     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0x90 - 0x9f
276     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xa0 - 0xaf
277     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xb0 - 0xbf
278     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xc0 - 0xcf
279     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xd0 - 0xdf
280     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xe0 - 0xef
281     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
282 };
283 // clang-format on
284 
285 const char kCharToHexLookup[8] = {
286     0,         // 0x00 - 0x1f
287     '0',       // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
288     'A' - 10,  // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
289     'a' - 10,  // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
290     0,         // 0x80 - 0x9F
291     0,         // 0xA0 - 0xBF
292     0,         // 0xC0 - 0xDF
293     0,         // 0xE0 - 0xFF
294 };
295 
296 const base_icu::UChar32 kUnicodeReplacementCharacter = 0xfffd;
297 
AppendStringOfType(const char * source,size_t length,SharedCharTypes type,CanonOutput * output)298 void AppendStringOfType(const char* source,
299                         size_t length,
300                         SharedCharTypes type,
301                         CanonOutput* output) {
302   DoAppendStringOfType<char, unsigned char>(source, length, type, output);
303 }
304 
AppendStringOfType(const char16_t * source,size_t length,SharedCharTypes type,CanonOutput * output)305 void AppendStringOfType(const char16_t* source,
306                         size_t length,
307                         SharedCharTypes type,
308                         CanonOutput* output) {
309   DoAppendStringOfType<char16_t, char16_t>(source, length, type, output);
310 }
311 
ReadUTFCharLossy(const char * str,size_t * begin,size_t length,base_icu::UChar32 * code_point_out)312 bool ReadUTFCharLossy(const char* str,
313                       size_t* begin,
314                       size_t length,
315                       base_icu::UChar32* code_point_out) {
316   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
317     *code_point_out = kUnicodeReplacementCharacter;
318     return false;
319   }
320   return true;
321 }
322 
ReadUTFCharLossy(const char16_t * str,size_t * begin,size_t length,base_icu::UChar32 * code_point_out)323 bool ReadUTFCharLossy(const char16_t* str,
324                       size_t* begin,
325                       size_t length,
326                       base_icu::UChar32* code_point_out) {
327   if (!base::ReadUnicodeCharacter(str, length, begin, code_point_out)) {
328     *code_point_out = kUnicodeReplacementCharacter;
329     return false;
330   }
331   return true;
332 }
333 
AppendInvalidNarrowString(const char * spec,size_t begin,size_t end,CanonOutput * output)334 void AppendInvalidNarrowString(const char* spec,
335                                size_t begin,
336                                size_t end,
337                                CanonOutput* output) {
338   DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
339 }
340 
AppendInvalidNarrowString(const char16_t * spec,size_t begin,size_t end,CanonOutput * output)341 void AppendInvalidNarrowString(const char16_t* spec,
342                                size_t begin,
343                                size_t end,
344                                CanonOutput* output) {
345   DoAppendInvalidNarrowString<char16_t, char16_t>(spec, begin, end, output);
346 }
347 
ConvertUTF16ToUTF8(const char16_t * input,size_t input_len,CanonOutput * output)348 bool ConvertUTF16ToUTF8(const char16_t* input,
349                         size_t input_len,
350                         CanonOutput* output) {
351   bool success = true;
352   for (size_t i = 0; i < input_len; i++) {
353     base_icu::UChar32 code_point;
354     success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
355     AppendUTF8Value(code_point, output);
356   }
357   return success;
358 }
359 
ConvertUTF8ToUTF16(const char * input,size_t input_len,CanonOutputT<char16_t> * output)360 bool ConvertUTF8ToUTF16(const char* input,
361                         size_t input_len,
362                         CanonOutputT<char16_t>* output) {
363   bool success = true;
364   for (size_t i = 0; i < input_len; i++) {
365     base_icu::UChar32 code_point;
366     success &= ReadUTFCharLossy(input, &i, input_len, &code_point);
367     AppendUTF16Value(code_point, output);
368   }
369   return success;
370 }
371 
SetupOverrideComponents(const char * base,const Replacements<char> & repl,URLComponentSource<char> * source,Parsed * parsed)372 void SetupOverrideComponents(const char* base,
373                              const Replacements<char>& repl,
374                              URLComponentSource<char>* source,
375                              Parsed* parsed) {
376   // Get the source and parsed structures of the things we are replacing.
377   const URLComponentSource<char>& repl_source = repl.sources();
378   const Parsed& repl_parsed = repl.components();
379 
380   DoOverrideComponent(repl_source.scheme, repl_parsed.scheme, &source->scheme,
381                       &parsed->scheme);
382   DoOverrideComponent(repl_source.username, repl_parsed.username,
383                       &source->username, &parsed->username);
384   DoOverrideComponent(repl_source.password, repl_parsed.password,
385                       &source->password, &parsed->password);
386 
387   DoOverrideComponent(repl_source.host, repl_parsed.host, &source->host,
388                       &parsed->host);
389   if (!url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
390     // For backward compatibility, the following is probably required while the
391     // flag is disabled by default.
392     if (parsed->host.len == -1) {
393       parsed->host.len = 0;
394     }
395   }
396 
397   DoOverrideComponent(repl_source.port, repl_parsed.port, &source->port,
398                       &parsed->port);
399   DoOverrideComponent(repl_source.path, repl_parsed.path, &source->path,
400                       &parsed->path);
401   DoOverrideComponent(repl_source.query, repl_parsed.query, &source->query,
402                       &parsed->query);
403   DoOverrideComponent(repl_source.ref, repl_parsed.ref, &source->ref,
404                       &parsed->ref);
405 }
406 
SetupUTF16OverrideComponents(const char * base,const Replacements<char16_t> & repl,CanonOutput * utf8_buffer,URLComponentSource<char> * source,Parsed * parsed)407 bool SetupUTF16OverrideComponents(const char* base,
408                                   const Replacements<char16_t>& repl,
409                                   CanonOutput* utf8_buffer,
410                                   URLComponentSource<char>* source,
411                                   Parsed* parsed) {
412   bool success = true;
413 
414   // Get the source and parsed structures of the things we are replacing.
415   const URLComponentSource<char16_t>& repl_source = repl.sources();
416   const Parsed& repl_parsed = repl.components();
417 
418   success &= PrepareUTF16OverrideComponent(
419       repl_source.scheme, repl_parsed.scheme, utf8_buffer, &parsed->scheme);
420   success &=
421       PrepareUTF16OverrideComponent(repl_source.username, repl_parsed.username,
422                                     utf8_buffer, &parsed->username);
423   success &=
424       PrepareUTF16OverrideComponent(repl_source.password, repl_parsed.password,
425                                     utf8_buffer, &parsed->password);
426   success &= PrepareUTF16OverrideComponent(repl_source.host, repl_parsed.host,
427                                            utf8_buffer, &parsed->host);
428   success &= PrepareUTF16OverrideComponent(repl_source.port, repl_parsed.port,
429                                            utf8_buffer, &parsed->port);
430   success &= PrepareUTF16OverrideComponent(repl_source.path, repl_parsed.path,
431                                            utf8_buffer, &parsed->path);
432   success &= PrepareUTF16OverrideComponent(repl_source.query, repl_parsed.query,
433                                            utf8_buffer, &parsed->query);
434   success &= PrepareUTF16OverrideComponent(repl_source.ref, repl_parsed.ref,
435                                            utf8_buffer, &parsed->ref);
436 
437   // PrepareUTF16OverrideComponent will not have set the data pointer since the
438   // buffer could be resized, invalidating the pointers. We set the data
439   // pointers for affected components now that the buffer is finalized.
440   if (repl_source.scheme)
441     source->scheme = utf8_buffer->data();
442   if (repl_source.username)
443     source->username = utf8_buffer->data();
444   if (repl_source.password)
445     source->password = utf8_buffer->data();
446   if (repl_source.host)
447     source->host = utf8_buffer->data();
448   if (repl_source.port)
449     source->port = utf8_buffer->data();
450   if (repl_source.path)
451     source->path = utf8_buffer->data();
452   if (repl_source.query)
453     source->query = utf8_buffer->data();
454   if (repl_source.ref)
455     source->ref = utf8_buffer->data();
456 
457   return success;
458 }
459 
460 #ifndef WIN32
461 
_itoa_s(int value,char * buffer,size_t size_in_chars,int radix)462 int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
463   const char* format_str;
464   if (radix == 10)
465     format_str = "%d";
466   else if (radix == 16)
467     format_str = "%x";
468   else
469     return EINVAL;
470 
471   int written = snprintf(buffer, size_in_chars, format_str, value);
472   if (static_cast<size_t>(written) >= size_in_chars) {
473     // Output was truncated, or written was negative.
474     return EINVAL;
475   }
476   return 0;
477 }
478 
_itow_s(int value,char16_t * buffer,size_t size_in_chars,int radix)479 int _itow_s(int value, char16_t* buffer, size_t size_in_chars, int radix) {
480   if (radix != 10)
481     return EINVAL;
482 
483   // No more than 12 characters will be required for a 32-bit integer.
484   // Add an extra byte for the terminating null.
485   char temp[13];
486   int written = snprintf(temp, sizeof(temp), "%d", value);
487   if (static_cast<size_t>(written) >= size_in_chars) {
488     // Output was truncated, or written was negative.
489     return EINVAL;
490   }
491 
492   for (int i = 0; i < written; ++i) {
493     buffer[i] = static_cast<char16_t>(temp[i]);
494   }
495   buffer[written] = '\0';
496   return 0;
497 }
498 
499 #endif  // !WIN32
500 
501 }  // namespace url
502