xref: /aosp_15_r20/external/angle/third_party/abseil-cpp/absl/strings/internal/escaping.cc (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/internal/escaping.h"
16 
17 #include <limits>
18 
19 #include "absl/base/internal/endian.h"
20 #include "absl/base/internal/raw_logging.h"
21 
22 namespace absl {
23 ABSL_NAMESPACE_BEGIN
24 namespace strings_internal {
25 
26 // The two strings below provide maps from normal 6-bit characters to their
27 // base64-escaped equivalent.
28 // For the inverse case, see kUn(WebSafe)Base64 in the external
29 // escaping.cc.
30 ABSL_CONST_INIT const char kBase64Chars[] =
31     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
32 
33 ABSL_CONST_INIT const char kWebSafeBase64Chars[] =
34     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
35 
CalculateBase64EscapedLenInternal(size_t input_len,bool do_padding)36 size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
37   // Base64 encodes three bytes of input at a time. If the input is not
38   // divisible by three, we pad as appropriate.
39   //
40   // Base64 encodes each three bytes of input into four bytes of output.
41   constexpr size_t kMaxSize = (std::numeric_limits<size_t>::max() - 1) / 4 * 3;
42   ABSL_INTERNAL_CHECK(input_len <= kMaxSize,
43                       "CalculateBase64EscapedLenInternal() overflow");
44   size_t len = (input_len / 3) * 4;
45 
46   // Since all base 64 input is an integral number of octets, only the following
47   // cases can arise:
48   if (input_len % 3 == 0) {
49     // (from https://tools.ietf.org/html/rfc3548)
50     // (1) the final quantum of encoding input is an integral multiple of 24
51     // bits; here, the final unit of encoded output will be an integral
52     // multiple of 4 characters with no "=" padding,
53   } else if (input_len % 3 == 1) {
54     // (from https://tools.ietf.org/html/rfc3548)
55     // (2) the final quantum of encoding input is exactly 8 bits; here, the
56     // final unit of encoded output will be two characters followed by two
57     // "=" padding characters, or
58     len += 2;
59     if (do_padding) {
60       len += 2;
61     }
62   } else {  // (input_len % 3 == 2)
63     // (from https://tools.ietf.org/html/rfc3548)
64     // (3) the final quantum of encoding input is exactly 16 bits; here, the
65     // final unit of encoded output will be three characters followed by one
66     // "=" padding character.
67     len += 3;
68     if (do_padding) {
69       len += 1;
70     }
71   }
72 
73   return len;
74 }
75 
76 // ----------------------------------------------------------------------
77 //   Take the input in groups of 4 characters and turn each
78 //   character into a code 0 to 63 thus:
79 //           A-Z map to 0 to 25
80 //           a-z map to 26 to 51
81 //           0-9 map to 52 to 61
82 //           +(- for WebSafe) maps to 62
83 //           /(_ for WebSafe) maps to 63
84 //   There will be four numbers, all less than 64 which can be represented
85 //   by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
86 //   Arrange the 6 digit binary numbers into three bytes as such:
87 //   aaaaaabb bbbbcccc ccdddddd
88 //   Equals signs (one or two) are used at the end of the encoded block to
89 //   indicate that the text was not an integer multiple of three bytes long.
90 // ----------------------------------------------------------------------
Base64EscapeInternal(const unsigned char * src,size_t szsrc,char * dest,size_t szdest,const char * base64,bool do_padding)91 size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
92                             size_t szdest, const char* base64,
93                             bool do_padding) {
94   static const char kPad64 = '=';
95 
96   if (szsrc * 4 > szdest * 3) return 0;
97 
98   char* cur_dest = dest;
99   const unsigned char* cur_src = src;
100 
101   char* const limit_dest = dest + szdest;
102   const unsigned char* const limit_src = src + szsrc;
103 
104   // (from https://tools.ietf.org/html/rfc3548)
105   // Special processing is performed if fewer than 24 bits are available
106   // at the end of the data being encoded.  A full encoding quantum is
107   // always completed at the end of a quantity.  When fewer than 24 input
108   // bits are available in an input group, zero bits are added (on the
109   // right) to form an integral number of 6-bit groups.
110   //
111   // If do_padding is true, padding at the end of the data is performed. This
112   // output padding uses the '=' character.
113 
114   // Three bytes of data encodes to four characters of cyphertext.
115   // So we can pump through three-byte chunks atomically.
116   if (szsrc >= 3) {                    // "limit_src - 3" is UB if szsrc < 3.
117     while (cur_src < limit_src - 3) {  // While we have >= 32 bits.
118       uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
119 
120       cur_dest[0] = base64[in >> 18];
121       in &= 0x3FFFF;
122       cur_dest[1] = base64[in >> 12];
123       in &= 0xFFF;
124       cur_dest[2] = base64[in >> 6];
125       in &= 0x3F;
126       cur_dest[3] = base64[in];
127 
128       cur_dest += 4;
129       cur_src += 3;
130     }
131   }
132   // To save time, we didn't update szdest or szsrc in the loop.  So do it now.
133   szdest = static_cast<size_t>(limit_dest - cur_dest);
134   szsrc = static_cast<size_t>(limit_src - cur_src);
135 
136   /* now deal with the tail (<=3 bytes) */
137   switch (szsrc) {
138     case 0:
139       // Nothing left; nothing more to do.
140       break;
141     case 1: {
142       // One byte left: this encodes to two characters, and (optionally)
143       // two pad characters to round out the four-character cypherblock.
144       if (szdest < 2) return 0;
145       uint32_t in = cur_src[0];
146       cur_dest[0] = base64[in >> 2];
147       in &= 0x3;
148       cur_dest[1] = base64[in << 4];
149       cur_dest += 2;
150       szdest -= 2;
151       if (do_padding) {
152         if (szdest < 2) return 0;
153         cur_dest[0] = kPad64;
154         cur_dest[1] = kPad64;
155         cur_dest += 2;
156         szdest -= 2;
157       }
158       break;
159     }
160     case 2: {
161       // Two bytes left: this encodes to three characters, and (optionally)
162       // one pad character to round out the four-character cypherblock.
163       if (szdest < 3) return 0;
164       uint32_t in = absl::big_endian::Load16(cur_src);
165       cur_dest[0] = base64[in >> 10];
166       in &= 0x3FF;
167       cur_dest[1] = base64[in >> 4];
168       in &= 0x00F;
169       cur_dest[2] = base64[in << 2];
170       cur_dest += 3;
171       szdest -= 3;
172       if (do_padding) {
173         if (szdest < 1) return 0;
174         cur_dest[0] = kPad64;
175         cur_dest += 1;
176         szdest -= 1;
177       }
178       break;
179     }
180     case 3: {
181       // Three bytes left: same as in the big loop above.  We can't do this in
182       // the loop because the loop above always reads 4 bytes, and the fourth
183       // byte is past the end of the input.
184       if (szdest < 4) return 0;
185       uint32_t in =
186           (uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1);
187       cur_dest[0] = base64[in >> 18];
188       in &= 0x3FFFF;
189       cur_dest[1] = base64[in >> 12];
190       in &= 0xFFF;
191       cur_dest[2] = base64[in >> 6];
192       in &= 0x3F;
193       cur_dest[3] = base64[in];
194       cur_dest += 4;
195       szdest -= 4;
196       break;
197     }
198     default:
199       // Should not be reached: blocks of 4 bytes are handled
200       // in the while loop before this switch statement.
201       ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
202       break;
203   }
204   return static_cast<size_t>(cur_dest - dest);
205 }
206 
207 }  // namespace strings_internal
208 ABSL_NAMESPACE_END
209 }  // namespace absl
210