xref: /aosp_15_r20/external/cronet/base/json/string_escape.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2006-2008 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/json/string_escape.h"
6 
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <limits>
11 #include <string>
12 #include <string_view>
13 
14 #include "base/check_op.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/stringprintf.h"
17 #include "base/strings/utf_string_conversion_utils.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/third_party/icu/icu_utf.h"
20 
21 namespace base {
22 
23 namespace {
24 
25 // Format string for printing a \uXXXX escape sequence.
26 const char kU16EscapeFormat[] = "\\u%04X";
27 
28 // The code point to output for an invalid input code unit.
29 const base_icu::UChar32 kReplacementCodePoint = 0xFFFD;
30 
31 // Used below in EscapeSpecialCodePoint().
32 static_assert('<' == 0x3C, "less than sign must be 0x3c");
33 
34 // Try to escape the |code_point| if it is a known special character. If
35 // successful, returns true and appends the escape sequence to |dest|. This
36 // isn't required by the spec, but it's more readable by humans.
EscapeSpecialCodePoint(base_icu::UChar32 code_point,std::string * dest)37 bool EscapeSpecialCodePoint(base_icu::UChar32 code_point, std::string* dest) {
38   // WARNING: if you add a new case here, you need to update the reader as well.
39   // Note: \v is in the reader, but not here since the JSON spec doesn't
40   // allow it.
41   switch (code_point) {
42     case '\b':
43       dest->append("\\b");
44       break;
45     case '\f':
46       dest->append("\\f");
47       break;
48     case '\n':
49       dest->append("\\n");
50       break;
51     case '\r':
52       dest->append("\\r");
53       break;
54     case '\t':
55       dest->append("\\t");
56       break;
57     case '\\':
58       dest->append("\\\\");
59       break;
60     case '"':
61       dest->append("\\\"");
62       break;
63     // Escape < to prevent script execution; escaping > is not necessary and
64     // not doing so save a few bytes.
65     case '<':
66       dest->append("\\u003C");
67       break;
68     // Escape the "Line Separator" and "Paragraph Separator" characters, since
69     // they should be treated like a new line \r or \n.
70     case 0x2028:
71       dest->append("\\u2028");
72       break;
73     case 0x2029:
74       dest->append("\\u2029");
75       break;
76     default:
77       return false;
78   }
79   return true;
80 }
81 
82 template <typename S>
EscapeJSONStringImpl(const S & str,bool put_in_quotes,std::string * dest)83 bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
84   bool did_replacement = false;
85 
86   if (put_in_quotes)
87     dest->push_back('"');
88 
89   const size_t length = str.length();
90   for (size_t i = 0; i < length; ++i) {
91     base_icu::UChar32 code_point;
92     if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point) ||
93         code_point == CBU_SENTINEL) {
94       code_point = kReplacementCodePoint;
95       did_replacement = true;
96     }
97 
98     if (EscapeSpecialCodePoint(code_point, dest))
99       continue;
100 
101     // Escape non-printing characters.
102     if (code_point < 32)
103       base::StringAppendF(dest, kU16EscapeFormat, code_point);
104     else
105       WriteUnicodeCharacter(code_point, dest);
106   }
107 
108   if (put_in_quotes)
109     dest->push_back('"');
110 
111   return !did_replacement;
112 }
113 
114 }  // namespace
115 
EscapeJSONString(std::string_view str,bool put_in_quotes,std::string * dest)116 bool EscapeJSONString(std::string_view str,
117                       bool put_in_quotes,
118                       std::string* dest) {
119   return EscapeJSONStringImpl(str, put_in_quotes, dest);
120 }
121 
EscapeJSONString(std::u16string_view str,bool put_in_quotes,std::string * dest)122 bool EscapeJSONString(std::u16string_view str,
123                       bool put_in_quotes,
124                       std::string* dest) {
125   return EscapeJSONStringImpl(str, put_in_quotes, dest);
126 }
127 
GetQuotedJSONString(std::string_view str)128 std::string GetQuotedJSONString(std::string_view str) {
129   std::string dest;
130   EscapeJSONStringImpl(str, true, &dest);
131   return dest;
132 }
133 
GetQuotedJSONString(std::u16string_view str)134 std::string GetQuotedJSONString(std::u16string_view str) {
135   std::string dest;
136   EscapeJSONStringImpl(str, true, &dest);
137   return dest;
138 }
139 
EscapeBytesAsInvalidJSONString(std::string_view str,bool put_in_quotes)140 std::string EscapeBytesAsInvalidJSONString(std::string_view str,
141                                            bool put_in_quotes) {
142   std::string dest;
143 
144   if (put_in_quotes)
145     dest.push_back('"');
146 
147   for (char c : str) {
148     if (EscapeSpecialCodePoint(c, &dest))
149       continue;
150 
151     if (c < 32 || c > 126) {
152       base::StringAppendF(&dest, kU16EscapeFormat,
153                           static_cast<unsigned char>(c));
154     } else {
155       dest.push_back(c);
156     }
157   }
158 
159   if (put_in_quotes)
160     dest.push_back('"');
161 
162   return dest;
163 }
164 
165 }  // namespace base
166