1*635a8641SAndroid Build Coastguard Worker // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2*635a8641SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*635a8641SAndroid Build Coastguard Worker // found in the LICENSE file.
4*635a8641SAndroid Build Coastguard Worker
5*635a8641SAndroid Build Coastguard Worker #include "base/json/string_escape.h"
6*635a8641SAndroid Build Coastguard Worker
7*635a8641SAndroid Build Coastguard Worker #include <stddef.h>
8*635a8641SAndroid Build Coastguard Worker #include <stdint.h>
9*635a8641SAndroid Build Coastguard Worker
10*635a8641SAndroid Build Coastguard Worker #include <limits>
11*635a8641SAndroid Build Coastguard Worker #include <string>
12*635a8641SAndroid Build Coastguard Worker
13*635a8641SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
14*635a8641SAndroid Build Coastguard Worker #include "base/strings/stringprintf.h"
15*635a8641SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversion_utils.h"
16*635a8641SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversions.h"
17*635a8641SAndroid Build Coastguard Worker #include "base/third_party/icu/icu_utf.h"
18*635a8641SAndroid Build Coastguard Worker
19*635a8641SAndroid Build Coastguard Worker namespace base {
20*635a8641SAndroid Build Coastguard Worker
21*635a8641SAndroid Build Coastguard Worker namespace {
22*635a8641SAndroid Build Coastguard Worker
23*635a8641SAndroid Build Coastguard Worker // Format string for printing a \uXXXX escape sequence.
24*635a8641SAndroid Build Coastguard Worker const char kU16EscapeFormat[] = "\\u%04X";
25*635a8641SAndroid Build Coastguard Worker
26*635a8641SAndroid Build Coastguard Worker // The code point to output for an invalid input code unit.
27*635a8641SAndroid Build Coastguard Worker const uint32_t kReplacementCodePoint = 0xFFFD;
28*635a8641SAndroid Build Coastguard Worker
29*635a8641SAndroid Build Coastguard Worker // Used below in EscapeSpecialCodePoint().
30*635a8641SAndroid Build Coastguard Worker static_assert('<' == 0x3C, "less than sign must be 0x3c");
31*635a8641SAndroid Build Coastguard Worker
32*635a8641SAndroid Build Coastguard Worker // Try to escape the |code_point| if it is a known special character. If
33*635a8641SAndroid Build Coastguard Worker // successful, returns true and appends the escape sequence to |dest|. This
34*635a8641SAndroid Build Coastguard Worker // isn't required by the spec, but it's more readable by humans.
EscapeSpecialCodePoint(uint32_t code_point,std::string * dest)35*635a8641SAndroid Build Coastguard Worker bool EscapeSpecialCodePoint(uint32_t code_point, std::string* dest) {
36*635a8641SAndroid Build Coastguard Worker // WARNING: if you add a new case here, you need to update the reader as well.
37*635a8641SAndroid Build Coastguard Worker // Note: \v is in the reader, but not here since the JSON spec doesn't
38*635a8641SAndroid Build Coastguard Worker // allow it.
39*635a8641SAndroid Build Coastguard Worker switch (code_point) {
40*635a8641SAndroid Build Coastguard Worker case '\b':
41*635a8641SAndroid Build Coastguard Worker dest->append("\\b");
42*635a8641SAndroid Build Coastguard Worker break;
43*635a8641SAndroid Build Coastguard Worker case '\f':
44*635a8641SAndroid Build Coastguard Worker dest->append("\\f");
45*635a8641SAndroid Build Coastguard Worker break;
46*635a8641SAndroid Build Coastguard Worker case '\n':
47*635a8641SAndroid Build Coastguard Worker dest->append("\\n");
48*635a8641SAndroid Build Coastguard Worker break;
49*635a8641SAndroid Build Coastguard Worker case '\r':
50*635a8641SAndroid Build Coastguard Worker dest->append("\\r");
51*635a8641SAndroid Build Coastguard Worker break;
52*635a8641SAndroid Build Coastguard Worker case '\t':
53*635a8641SAndroid Build Coastguard Worker dest->append("\\t");
54*635a8641SAndroid Build Coastguard Worker break;
55*635a8641SAndroid Build Coastguard Worker case '\\':
56*635a8641SAndroid Build Coastguard Worker dest->append("\\\\");
57*635a8641SAndroid Build Coastguard Worker break;
58*635a8641SAndroid Build Coastguard Worker case '"':
59*635a8641SAndroid Build Coastguard Worker dest->append("\\\"");
60*635a8641SAndroid Build Coastguard Worker break;
61*635a8641SAndroid Build Coastguard Worker // Escape < to prevent script execution; escaping > is not necessary and
62*635a8641SAndroid Build Coastguard Worker // not doing so save a few bytes.
63*635a8641SAndroid Build Coastguard Worker case '<':
64*635a8641SAndroid Build Coastguard Worker dest->append("\\u003C");
65*635a8641SAndroid Build Coastguard Worker break;
66*635a8641SAndroid Build Coastguard Worker // Escape the "Line Separator" and "Paragraph Separator" characters, since
67*635a8641SAndroid Build Coastguard Worker // they should be treated like a new line \r or \n.
68*635a8641SAndroid Build Coastguard Worker case 0x2028:
69*635a8641SAndroid Build Coastguard Worker dest->append("\\u2028");
70*635a8641SAndroid Build Coastguard Worker break;
71*635a8641SAndroid Build Coastguard Worker case 0x2029:
72*635a8641SAndroid Build Coastguard Worker dest->append("\\u2029");
73*635a8641SAndroid Build Coastguard Worker break;
74*635a8641SAndroid Build Coastguard Worker default:
75*635a8641SAndroid Build Coastguard Worker return false;
76*635a8641SAndroid Build Coastguard Worker }
77*635a8641SAndroid Build Coastguard Worker return true;
78*635a8641SAndroid Build Coastguard Worker }
79*635a8641SAndroid Build Coastguard Worker
80*635a8641SAndroid Build Coastguard Worker template <typename S>
EscapeJSONStringImpl(const S & str,bool put_in_quotes,std::string * dest)81*635a8641SAndroid Build Coastguard Worker bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
82*635a8641SAndroid Build Coastguard Worker bool did_replacement = false;
83*635a8641SAndroid Build Coastguard Worker
84*635a8641SAndroid Build Coastguard Worker if (put_in_quotes)
85*635a8641SAndroid Build Coastguard Worker dest->push_back('"');
86*635a8641SAndroid Build Coastguard Worker
87*635a8641SAndroid Build Coastguard Worker // Casting is necessary because ICU uses int32_t. Try and do so safely.
88*635a8641SAndroid Build Coastguard Worker CHECK_LE(str.length(),
89*635a8641SAndroid Build Coastguard Worker static_cast<size_t>(std::numeric_limits<int32_t>::max()));
90*635a8641SAndroid Build Coastguard Worker const int32_t length = static_cast<int32_t>(str.length());
91*635a8641SAndroid Build Coastguard Worker
92*635a8641SAndroid Build Coastguard Worker for (int32_t i = 0; i < length; ++i) {
93*635a8641SAndroid Build Coastguard Worker uint32_t code_point;
94*635a8641SAndroid Build Coastguard Worker if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point) ||
95*635a8641SAndroid Build Coastguard Worker code_point == static_cast<decltype(code_point)>(CBU_SENTINEL) ||
96*635a8641SAndroid Build Coastguard Worker !IsValidCharacter(code_point)) {
97*635a8641SAndroid Build Coastguard Worker code_point = kReplacementCodePoint;
98*635a8641SAndroid Build Coastguard Worker did_replacement = true;
99*635a8641SAndroid Build Coastguard Worker }
100*635a8641SAndroid Build Coastguard Worker
101*635a8641SAndroid Build Coastguard Worker if (EscapeSpecialCodePoint(code_point, dest))
102*635a8641SAndroid Build Coastguard Worker continue;
103*635a8641SAndroid Build Coastguard Worker
104*635a8641SAndroid Build Coastguard Worker // Escape non-printing characters.
105*635a8641SAndroid Build Coastguard Worker if (code_point < 32)
106*635a8641SAndroid Build Coastguard Worker base::StringAppendF(dest, kU16EscapeFormat, code_point);
107*635a8641SAndroid Build Coastguard Worker else
108*635a8641SAndroid Build Coastguard Worker WriteUnicodeCharacter(code_point, dest);
109*635a8641SAndroid Build Coastguard Worker }
110*635a8641SAndroid Build Coastguard Worker
111*635a8641SAndroid Build Coastguard Worker if (put_in_quotes)
112*635a8641SAndroid Build Coastguard Worker dest->push_back('"');
113*635a8641SAndroid Build Coastguard Worker
114*635a8641SAndroid Build Coastguard Worker return !did_replacement;
115*635a8641SAndroid Build Coastguard Worker }
116*635a8641SAndroid Build Coastguard Worker
117*635a8641SAndroid Build Coastguard Worker } // namespace
118*635a8641SAndroid Build Coastguard Worker
EscapeJSONString(StringPiece str,bool put_in_quotes,std::string * dest)119*635a8641SAndroid Build Coastguard Worker bool EscapeJSONString(StringPiece str, bool put_in_quotes, std::string* dest) {
120*635a8641SAndroid Build Coastguard Worker return EscapeJSONStringImpl(str, put_in_quotes, dest);
121*635a8641SAndroid Build Coastguard Worker }
122*635a8641SAndroid Build Coastguard Worker
EscapeJSONString(StringPiece16 str,bool put_in_quotes,std::string * dest)123*635a8641SAndroid Build Coastguard Worker bool EscapeJSONString(StringPiece16 str,
124*635a8641SAndroid Build Coastguard Worker bool put_in_quotes,
125*635a8641SAndroid Build Coastguard Worker std::string* dest) {
126*635a8641SAndroid Build Coastguard Worker return EscapeJSONStringImpl(str, put_in_quotes, dest);
127*635a8641SAndroid Build Coastguard Worker }
128*635a8641SAndroid Build Coastguard Worker
GetQuotedJSONString(StringPiece str)129*635a8641SAndroid Build Coastguard Worker std::string GetQuotedJSONString(StringPiece str) {
130*635a8641SAndroid Build Coastguard Worker std::string dest;
131*635a8641SAndroid Build Coastguard Worker bool ok = EscapeJSONStringImpl(str, true, &dest);
132*635a8641SAndroid Build Coastguard Worker DCHECK(ok);
133*635a8641SAndroid Build Coastguard Worker return dest;
134*635a8641SAndroid Build Coastguard Worker }
135*635a8641SAndroid Build Coastguard Worker
GetQuotedJSONString(StringPiece16 str)136*635a8641SAndroid Build Coastguard Worker std::string GetQuotedJSONString(StringPiece16 str) {
137*635a8641SAndroid Build Coastguard Worker std::string dest;
138*635a8641SAndroid Build Coastguard Worker bool ok = EscapeJSONStringImpl(str, true, &dest);
139*635a8641SAndroid Build Coastguard Worker DCHECK(ok);
140*635a8641SAndroid Build Coastguard Worker return dest;
141*635a8641SAndroid Build Coastguard Worker }
142*635a8641SAndroid Build Coastguard Worker
EscapeBytesAsInvalidJSONString(StringPiece str,bool put_in_quotes)143*635a8641SAndroid Build Coastguard Worker std::string EscapeBytesAsInvalidJSONString(StringPiece str,
144*635a8641SAndroid Build Coastguard Worker bool put_in_quotes) {
145*635a8641SAndroid Build Coastguard Worker std::string dest;
146*635a8641SAndroid Build Coastguard Worker
147*635a8641SAndroid Build Coastguard Worker if (put_in_quotes)
148*635a8641SAndroid Build Coastguard Worker dest.push_back('"');
149*635a8641SAndroid Build Coastguard Worker
150*635a8641SAndroid Build Coastguard Worker for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
151*635a8641SAndroid Build Coastguard Worker unsigned char c = *it;
152*635a8641SAndroid Build Coastguard Worker if (EscapeSpecialCodePoint(c, &dest))
153*635a8641SAndroid Build Coastguard Worker continue;
154*635a8641SAndroid Build Coastguard Worker
155*635a8641SAndroid Build Coastguard Worker if (c < 32 || c > 126)
156*635a8641SAndroid Build Coastguard Worker base::StringAppendF(&dest, kU16EscapeFormat, c);
157*635a8641SAndroid Build Coastguard Worker else
158*635a8641SAndroid Build Coastguard Worker dest.push_back(*it);
159*635a8641SAndroid Build Coastguard Worker }
160*635a8641SAndroid Build Coastguard Worker
161*635a8641SAndroid Build Coastguard Worker if (put_in_quotes)
162*635a8641SAndroid Build Coastguard Worker dest.push_back('"');
163*635a8641SAndroid Build Coastguard Worker
164*635a8641SAndroid Build Coastguard Worker return dest;
165*635a8641SAndroid Build Coastguard Worker }
166*635a8641SAndroid Build Coastguard Worker
167*635a8641SAndroid Build Coastguard Worker } // namespace base
168