xref: /aosp_15_r20/external/cronet/base/json/string_escape.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2006-2008 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #include "base/json/string_escape.h"
6*6777b538SAndroid Build Coastguard Worker 
7*6777b538SAndroid Build Coastguard Worker #include <stddef.h>
8*6777b538SAndroid Build Coastguard Worker #include <stdint.h>
9*6777b538SAndroid Build Coastguard Worker 
10*6777b538SAndroid Build Coastguard Worker #include <limits>
11*6777b538SAndroid Build Coastguard Worker #include <string>
12*6777b538SAndroid Build Coastguard Worker #include <string_view>
13*6777b538SAndroid Build Coastguard Worker 
14*6777b538SAndroid Build Coastguard Worker #include "base/check_op.h"
15*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
16*6777b538SAndroid Build Coastguard Worker #include "base/strings/stringprintf.h"
17*6777b538SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversion_utils.h"
18*6777b538SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversions.h"
19*6777b538SAndroid Build Coastguard Worker #include "base/third_party/icu/icu_utf.h"
20*6777b538SAndroid Build Coastguard Worker 
21*6777b538SAndroid Build Coastguard Worker namespace base {
22*6777b538SAndroid Build Coastguard Worker 
23*6777b538SAndroid Build Coastguard Worker namespace {
24*6777b538SAndroid Build Coastguard Worker 
25*6777b538SAndroid Build Coastguard Worker // Format string for printing a \uXXXX escape sequence.
26*6777b538SAndroid Build Coastguard Worker const char kU16EscapeFormat[] = "\\u%04X";
27*6777b538SAndroid Build Coastguard Worker 
28*6777b538SAndroid Build Coastguard Worker // The code point to output for an invalid input code unit.
29*6777b538SAndroid Build Coastguard Worker const base_icu::UChar32 kReplacementCodePoint = 0xFFFD;
30*6777b538SAndroid Build Coastguard Worker 
31*6777b538SAndroid Build Coastguard Worker // Used below in EscapeSpecialCodePoint().
32*6777b538SAndroid Build Coastguard Worker static_assert('<' == 0x3C, "less than sign must be 0x3c");
33*6777b538SAndroid Build Coastguard Worker 
34*6777b538SAndroid Build Coastguard Worker // Try to escape the |code_point| if it is a known special character. If
35*6777b538SAndroid Build Coastguard Worker // successful, returns true and appends the escape sequence to |dest|. This
36*6777b538SAndroid Build Coastguard Worker // isn't required by the spec, but it's more readable by humans.
EscapeSpecialCodePoint(base_icu::UChar32 code_point,std::string * dest)37*6777b538SAndroid Build Coastguard Worker bool EscapeSpecialCodePoint(base_icu::UChar32 code_point, std::string* dest) {
38*6777b538SAndroid Build Coastguard Worker   // WARNING: if you add a new case here, you need to update the reader as well.
39*6777b538SAndroid Build Coastguard Worker   // Note: \v is in the reader, but not here since the JSON spec doesn't
40*6777b538SAndroid Build Coastguard Worker   // allow it.
41*6777b538SAndroid Build Coastguard Worker   switch (code_point) {
42*6777b538SAndroid Build Coastguard Worker     case '\b':
43*6777b538SAndroid Build Coastguard Worker       dest->append("\\b");
44*6777b538SAndroid Build Coastguard Worker       break;
45*6777b538SAndroid Build Coastguard Worker     case '\f':
46*6777b538SAndroid Build Coastguard Worker       dest->append("\\f");
47*6777b538SAndroid Build Coastguard Worker       break;
48*6777b538SAndroid Build Coastguard Worker     case '\n':
49*6777b538SAndroid Build Coastguard Worker       dest->append("\\n");
50*6777b538SAndroid Build Coastguard Worker       break;
51*6777b538SAndroid Build Coastguard Worker     case '\r':
52*6777b538SAndroid Build Coastguard Worker       dest->append("\\r");
53*6777b538SAndroid Build Coastguard Worker       break;
54*6777b538SAndroid Build Coastguard Worker     case '\t':
55*6777b538SAndroid Build Coastguard Worker       dest->append("\\t");
56*6777b538SAndroid Build Coastguard Worker       break;
57*6777b538SAndroid Build Coastguard Worker     case '\\':
58*6777b538SAndroid Build Coastguard Worker       dest->append("\\\\");
59*6777b538SAndroid Build Coastguard Worker       break;
60*6777b538SAndroid Build Coastguard Worker     case '"':
61*6777b538SAndroid Build Coastguard Worker       dest->append("\\\"");
62*6777b538SAndroid Build Coastguard Worker       break;
63*6777b538SAndroid Build Coastguard Worker     // Escape < to prevent script execution; escaping > is not necessary and
64*6777b538SAndroid Build Coastguard Worker     // not doing so save a few bytes.
65*6777b538SAndroid Build Coastguard Worker     case '<':
66*6777b538SAndroid Build Coastguard Worker       dest->append("\\u003C");
67*6777b538SAndroid Build Coastguard Worker       break;
68*6777b538SAndroid Build Coastguard Worker     // Escape the "Line Separator" and "Paragraph Separator" characters, since
69*6777b538SAndroid Build Coastguard Worker     // they should be treated like a new line \r or \n.
70*6777b538SAndroid Build Coastguard Worker     case 0x2028:
71*6777b538SAndroid Build Coastguard Worker       dest->append("\\u2028");
72*6777b538SAndroid Build Coastguard Worker       break;
73*6777b538SAndroid Build Coastguard Worker     case 0x2029:
74*6777b538SAndroid Build Coastguard Worker       dest->append("\\u2029");
75*6777b538SAndroid Build Coastguard Worker       break;
76*6777b538SAndroid Build Coastguard Worker     default:
77*6777b538SAndroid Build Coastguard Worker       return false;
78*6777b538SAndroid Build Coastguard Worker   }
79*6777b538SAndroid Build Coastguard Worker   return true;
80*6777b538SAndroid Build Coastguard Worker }
81*6777b538SAndroid Build Coastguard Worker 
82*6777b538SAndroid Build Coastguard Worker template <typename S>
EscapeJSONStringImpl(const S & str,bool put_in_quotes,std::string * dest)83*6777b538SAndroid Build Coastguard Worker bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
84*6777b538SAndroid Build Coastguard Worker   bool did_replacement = false;
85*6777b538SAndroid Build Coastguard Worker 
86*6777b538SAndroid Build Coastguard Worker   if (put_in_quotes)
87*6777b538SAndroid Build Coastguard Worker     dest->push_back('"');
88*6777b538SAndroid Build Coastguard Worker 
89*6777b538SAndroid Build Coastguard Worker   const size_t length = str.length();
90*6777b538SAndroid Build Coastguard Worker   for (size_t i = 0; i < length; ++i) {
91*6777b538SAndroid Build Coastguard Worker     base_icu::UChar32 code_point;
92*6777b538SAndroid Build Coastguard Worker     if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point) ||
93*6777b538SAndroid Build Coastguard Worker         code_point == CBU_SENTINEL) {
94*6777b538SAndroid Build Coastguard Worker       code_point = kReplacementCodePoint;
95*6777b538SAndroid Build Coastguard Worker       did_replacement = true;
96*6777b538SAndroid Build Coastguard Worker     }
97*6777b538SAndroid Build Coastguard Worker 
98*6777b538SAndroid Build Coastguard Worker     if (EscapeSpecialCodePoint(code_point, dest))
99*6777b538SAndroid Build Coastguard Worker       continue;
100*6777b538SAndroid Build Coastguard Worker 
101*6777b538SAndroid Build Coastguard Worker     // Escape non-printing characters.
102*6777b538SAndroid Build Coastguard Worker     if (code_point < 32)
103*6777b538SAndroid Build Coastguard Worker       base::StringAppendF(dest, kU16EscapeFormat, code_point);
104*6777b538SAndroid Build Coastguard Worker     else
105*6777b538SAndroid Build Coastguard Worker       WriteUnicodeCharacter(code_point, dest);
106*6777b538SAndroid Build Coastguard Worker   }
107*6777b538SAndroid Build Coastguard Worker 
108*6777b538SAndroid Build Coastguard Worker   if (put_in_quotes)
109*6777b538SAndroid Build Coastguard Worker     dest->push_back('"');
110*6777b538SAndroid Build Coastguard Worker 
111*6777b538SAndroid Build Coastguard Worker   return !did_replacement;
112*6777b538SAndroid Build Coastguard Worker }
113*6777b538SAndroid Build Coastguard Worker 
114*6777b538SAndroid Build Coastguard Worker }  // namespace
115*6777b538SAndroid Build Coastguard Worker 
EscapeJSONString(std::string_view str,bool put_in_quotes,std::string * dest)116*6777b538SAndroid Build Coastguard Worker bool EscapeJSONString(std::string_view str,
117*6777b538SAndroid Build Coastguard Worker                       bool put_in_quotes,
118*6777b538SAndroid Build Coastguard Worker                       std::string* dest) {
119*6777b538SAndroid Build Coastguard Worker   return EscapeJSONStringImpl(str, put_in_quotes, dest);
120*6777b538SAndroid Build Coastguard Worker }
121*6777b538SAndroid Build Coastguard Worker 
EscapeJSONString(std::u16string_view str,bool put_in_quotes,std::string * dest)122*6777b538SAndroid Build Coastguard Worker bool EscapeJSONString(std::u16string_view str,
123*6777b538SAndroid Build Coastguard Worker                       bool put_in_quotes,
124*6777b538SAndroid Build Coastguard Worker                       std::string* dest) {
125*6777b538SAndroid Build Coastguard Worker   return EscapeJSONStringImpl(str, put_in_quotes, dest);
126*6777b538SAndroid Build Coastguard Worker }
127*6777b538SAndroid Build Coastguard Worker 
GetQuotedJSONString(std::string_view str)128*6777b538SAndroid Build Coastguard Worker std::string GetQuotedJSONString(std::string_view str) {
129*6777b538SAndroid Build Coastguard Worker   std::string dest;
130*6777b538SAndroid Build Coastguard Worker   EscapeJSONStringImpl(str, true, &dest);
131*6777b538SAndroid Build Coastguard Worker   return dest;
132*6777b538SAndroid Build Coastguard Worker }
133*6777b538SAndroid Build Coastguard Worker 
GetQuotedJSONString(std::u16string_view str)134*6777b538SAndroid Build Coastguard Worker std::string GetQuotedJSONString(std::u16string_view str) {
135*6777b538SAndroid Build Coastguard Worker   std::string dest;
136*6777b538SAndroid Build Coastguard Worker   EscapeJSONStringImpl(str, true, &dest);
137*6777b538SAndroid Build Coastguard Worker   return dest;
138*6777b538SAndroid Build Coastguard Worker }
139*6777b538SAndroid Build Coastguard Worker 
EscapeBytesAsInvalidJSONString(std::string_view str,bool put_in_quotes)140*6777b538SAndroid Build Coastguard Worker std::string EscapeBytesAsInvalidJSONString(std::string_view str,
141*6777b538SAndroid Build Coastguard Worker                                            bool put_in_quotes) {
142*6777b538SAndroid Build Coastguard Worker   std::string dest;
143*6777b538SAndroid Build Coastguard Worker 
144*6777b538SAndroid Build Coastguard Worker   if (put_in_quotes)
145*6777b538SAndroid Build Coastguard Worker     dest.push_back('"');
146*6777b538SAndroid Build Coastguard Worker 
147*6777b538SAndroid Build Coastguard Worker   for (char c : str) {
148*6777b538SAndroid Build Coastguard Worker     if (EscapeSpecialCodePoint(c, &dest))
149*6777b538SAndroid Build Coastguard Worker       continue;
150*6777b538SAndroid Build Coastguard Worker 
151*6777b538SAndroid Build Coastguard Worker     if (c < 32 || c > 126) {
152*6777b538SAndroid Build Coastguard Worker       base::StringAppendF(&dest, kU16EscapeFormat,
153*6777b538SAndroid Build Coastguard Worker                           static_cast<unsigned char>(c));
154*6777b538SAndroid Build Coastguard Worker     } else {
155*6777b538SAndroid Build Coastguard Worker       dest.push_back(c);
156*6777b538SAndroid Build Coastguard Worker     }
157*6777b538SAndroid Build Coastguard Worker   }
158*6777b538SAndroid Build Coastguard Worker 
159*6777b538SAndroid Build Coastguard Worker   if (put_in_quotes)
160*6777b538SAndroid Build Coastguard Worker     dest.push_back('"');
161*6777b538SAndroid Build Coastguard Worker 
162*6777b538SAndroid Build Coastguard Worker   return dest;
163*6777b538SAndroid Build Coastguard Worker }
164*6777b538SAndroid Build Coastguard Worker 
165*6777b538SAndroid Build Coastguard Worker }  // namespace base
166