1*ccdc9c3eSSadaf Ebrahimi // Copyright 1999-2005 The RE2 Authors. All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi
5*ccdc9c3eSSadaf Ebrahimi #include <stdarg.h>
6*ccdc9c3eSSadaf Ebrahimi #include <stdio.h>
7*ccdc9c3eSSadaf Ebrahimi
8*ccdc9c3eSSadaf Ebrahimi #include "util/strutil.h"
9*ccdc9c3eSSadaf Ebrahimi
10*ccdc9c3eSSadaf Ebrahimi #ifdef _WIN32
11*ccdc9c3eSSadaf Ebrahimi #define snprintf _snprintf
12*ccdc9c3eSSadaf Ebrahimi #define vsnprintf _vsnprintf
13*ccdc9c3eSSadaf Ebrahimi #endif
14*ccdc9c3eSSadaf Ebrahimi
15*ccdc9c3eSSadaf Ebrahimi namespace re2 {
16*ccdc9c3eSSadaf Ebrahimi
17*ccdc9c3eSSadaf Ebrahimi // ----------------------------------------------------------------------
18*ccdc9c3eSSadaf Ebrahimi // CEscapeString()
19*ccdc9c3eSSadaf Ebrahimi // Copies 'src' to 'dest', escaping dangerous characters using
20*ccdc9c3eSSadaf Ebrahimi // C-style escape sequences. 'src' and 'dest' should not overlap.
21*ccdc9c3eSSadaf Ebrahimi // Returns the number of bytes written to 'dest' (not including the \0)
22*ccdc9c3eSSadaf Ebrahimi // or (size_t)-1 if there was insufficient space.
23*ccdc9c3eSSadaf Ebrahimi // ----------------------------------------------------------------------
CEscapeString(const char * src,size_t src_len,char * dest,size_t dest_len)24*ccdc9c3eSSadaf Ebrahimi static size_t CEscapeString(const char* src, size_t src_len,
25*ccdc9c3eSSadaf Ebrahimi char* dest, size_t dest_len) {
26*ccdc9c3eSSadaf Ebrahimi const char* src_end = src + src_len;
27*ccdc9c3eSSadaf Ebrahimi size_t used = 0;
28*ccdc9c3eSSadaf Ebrahimi
29*ccdc9c3eSSadaf Ebrahimi for (; src < src_end; src++) {
30*ccdc9c3eSSadaf Ebrahimi if (dest_len - used < 2) // space for two-character escape
31*ccdc9c3eSSadaf Ebrahimi return (size_t)-1;
32*ccdc9c3eSSadaf Ebrahimi
33*ccdc9c3eSSadaf Ebrahimi unsigned char c = *src;
34*ccdc9c3eSSadaf Ebrahimi switch (c) {
35*ccdc9c3eSSadaf Ebrahimi case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break;
36*ccdc9c3eSSadaf Ebrahimi case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break;
37*ccdc9c3eSSadaf Ebrahimi case '\t': dest[used++] = '\\'; dest[used++] = 't'; break;
38*ccdc9c3eSSadaf Ebrahimi case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
39*ccdc9c3eSSadaf Ebrahimi case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
40*ccdc9c3eSSadaf Ebrahimi case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
41*ccdc9c3eSSadaf Ebrahimi default:
42*ccdc9c3eSSadaf Ebrahimi // Note that if we emit \xNN and the src character after that is a hex
43*ccdc9c3eSSadaf Ebrahimi // digit then that digit must be escaped too to prevent it being
44*ccdc9c3eSSadaf Ebrahimi // interpreted as part of the character code by C.
45*ccdc9c3eSSadaf Ebrahimi if (c < ' ' || c > '~') {
46*ccdc9c3eSSadaf Ebrahimi if (dest_len - used < 5) // space for four-character escape + \0
47*ccdc9c3eSSadaf Ebrahimi return (size_t)-1;
48*ccdc9c3eSSadaf Ebrahimi snprintf(dest + used, 5, "\\%03o", c);
49*ccdc9c3eSSadaf Ebrahimi used += 4;
50*ccdc9c3eSSadaf Ebrahimi } else {
51*ccdc9c3eSSadaf Ebrahimi dest[used++] = c; break;
52*ccdc9c3eSSadaf Ebrahimi }
53*ccdc9c3eSSadaf Ebrahimi }
54*ccdc9c3eSSadaf Ebrahimi }
55*ccdc9c3eSSadaf Ebrahimi
56*ccdc9c3eSSadaf Ebrahimi if (dest_len - used < 1) // make sure that there is room for \0
57*ccdc9c3eSSadaf Ebrahimi return (size_t)-1;
58*ccdc9c3eSSadaf Ebrahimi
59*ccdc9c3eSSadaf Ebrahimi dest[used] = '\0'; // doesn't count towards return value though
60*ccdc9c3eSSadaf Ebrahimi return used;
61*ccdc9c3eSSadaf Ebrahimi }
62*ccdc9c3eSSadaf Ebrahimi
63*ccdc9c3eSSadaf Ebrahimi // ----------------------------------------------------------------------
64*ccdc9c3eSSadaf Ebrahimi // CEscape()
65*ccdc9c3eSSadaf Ebrahimi // Copies 'src' to result, escaping dangerous characters using
66*ccdc9c3eSSadaf Ebrahimi // C-style escape sequences. 'src' and 'dest' should not overlap.
67*ccdc9c3eSSadaf Ebrahimi // ----------------------------------------------------------------------
CEscape(const StringPiece & src)68*ccdc9c3eSSadaf Ebrahimi string CEscape(const StringPiece& src) {
69*ccdc9c3eSSadaf Ebrahimi const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
70*ccdc9c3eSSadaf Ebrahimi char* dest = new char[dest_len];
71*ccdc9c3eSSadaf Ebrahimi const size_t used = CEscapeString(src.data(), src.size(),
72*ccdc9c3eSSadaf Ebrahimi dest, dest_len);
73*ccdc9c3eSSadaf Ebrahimi string s = string(dest, used);
74*ccdc9c3eSSadaf Ebrahimi delete[] dest;
75*ccdc9c3eSSadaf Ebrahimi return s;
76*ccdc9c3eSSadaf Ebrahimi }
77*ccdc9c3eSSadaf Ebrahimi
PrefixSuccessor(string * prefix)78*ccdc9c3eSSadaf Ebrahimi void PrefixSuccessor(string* prefix) {
79*ccdc9c3eSSadaf Ebrahimi // We can increment the last character in the string and be done
80*ccdc9c3eSSadaf Ebrahimi // unless that character is 255, in which case we have to erase the
81*ccdc9c3eSSadaf Ebrahimi // last character and increment the previous character, unless that
82*ccdc9c3eSSadaf Ebrahimi // is 255, etc. If the string is empty or consists entirely of
83*ccdc9c3eSSadaf Ebrahimi // 255's, we just return the empty string.
84*ccdc9c3eSSadaf Ebrahimi while (!prefix->empty()) {
85*ccdc9c3eSSadaf Ebrahimi char& c = prefix->back();
86*ccdc9c3eSSadaf Ebrahimi if (c == '\xff') { // char literal avoids signed/unsigned.
87*ccdc9c3eSSadaf Ebrahimi prefix->pop_back();
88*ccdc9c3eSSadaf Ebrahimi } else {
89*ccdc9c3eSSadaf Ebrahimi ++c;
90*ccdc9c3eSSadaf Ebrahimi break;
91*ccdc9c3eSSadaf Ebrahimi }
92*ccdc9c3eSSadaf Ebrahimi }
93*ccdc9c3eSSadaf Ebrahimi }
94*ccdc9c3eSSadaf Ebrahimi
StringAppendV(string * dst,const char * format,va_list ap)95*ccdc9c3eSSadaf Ebrahimi static void StringAppendV(string* dst, const char* format, va_list ap) {
96*ccdc9c3eSSadaf Ebrahimi // First try with a small fixed size buffer
97*ccdc9c3eSSadaf Ebrahimi char space[1024];
98*ccdc9c3eSSadaf Ebrahimi
99*ccdc9c3eSSadaf Ebrahimi // It's possible for methods that use a va_list to invalidate
100*ccdc9c3eSSadaf Ebrahimi // the data in it upon use. The fix is to make a copy
101*ccdc9c3eSSadaf Ebrahimi // of the structure before using it and use that copy instead.
102*ccdc9c3eSSadaf Ebrahimi va_list backup_ap;
103*ccdc9c3eSSadaf Ebrahimi va_copy(backup_ap, ap);
104*ccdc9c3eSSadaf Ebrahimi int result = vsnprintf(space, sizeof(space), format, backup_ap);
105*ccdc9c3eSSadaf Ebrahimi va_end(backup_ap);
106*ccdc9c3eSSadaf Ebrahimi
107*ccdc9c3eSSadaf Ebrahimi if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) {
108*ccdc9c3eSSadaf Ebrahimi // It fit
109*ccdc9c3eSSadaf Ebrahimi dst->append(space, result);
110*ccdc9c3eSSadaf Ebrahimi return;
111*ccdc9c3eSSadaf Ebrahimi }
112*ccdc9c3eSSadaf Ebrahimi
113*ccdc9c3eSSadaf Ebrahimi // Repeatedly increase buffer size until it fits
114*ccdc9c3eSSadaf Ebrahimi int length = sizeof(space);
115*ccdc9c3eSSadaf Ebrahimi while (true) {
116*ccdc9c3eSSadaf Ebrahimi if (result < 0) {
117*ccdc9c3eSSadaf Ebrahimi // Older behavior: just try doubling the buffer size
118*ccdc9c3eSSadaf Ebrahimi length *= 2;
119*ccdc9c3eSSadaf Ebrahimi } else {
120*ccdc9c3eSSadaf Ebrahimi // We need exactly "result+1" characters
121*ccdc9c3eSSadaf Ebrahimi length = result+1;
122*ccdc9c3eSSadaf Ebrahimi }
123*ccdc9c3eSSadaf Ebrahimi char* buf = new char[length];
124*ccdc9c3eSSadaf Ebrahimi
125*ccdc9c3eSSadaf Ebrahimi // Restore the va_list before we use it again
126*ccdc9c3eSSadaf Ebrahimi va_copy(backup_ap, ap);
127*ccdc9c3eSSadaf Ebrahimi result = vsnprintf(buf, length, format, backup_ap);
128*ccdc9c3eSSadaf Ebrahimi va_end(backup_ap);
129*ccdc9c3eSSadaf Ebrahimi
130*ccdc9c3eSSadaf Ebrahimi if ((result >= 0) && (result < length)) {
131*ccdc9c3eSSadaf Ebrahimi // It fit
132*ccdc9c3eSSadaf Ebrahimi dst->append(buf, result);
133*ccdc9c3eSSadaf Ebrahimi delete[] buf;
134*ccdc9c3eSSadaf Ebrahimi return;
135*ccdc9c3eSSadaf Ebrahimi }
136*ccdc9c3eSSadaf Ebrahimi delete[] buf;
137*ccdc9c3eSSadaf Ebrahimi }
138*ccdc9c3eSSadaf Ebrahimi }
139*ccdc9c3eSSadaf Ebrahimi
StringPrintf(const char * format,...)140*ccdc9c3eSSadaf Ebrahimi string StringPrintf(const char* format, ...) {
141*ccdc9c3eSSadaf Ebrahimi va_list ap;
142*ccdc9c3eSSadaf Ebrahimi va_start(ap, format);
143*ccdc9c3eSSadaf Ebrahimi string result;
144*ccdc9c3eSSadaf Ebrahimi StringAppendV(&result, format, ap);
145*ccdc9c3eSSadaf Ebrahimi va_end(ap);
146*ccdc9c3eSSadaf Ebrahimi return result;
147*ccdc9c3eSSadaf Ebrahimi }
148*ccdc9c3eSSadaf Ebrahimi
SStringPrintf(string * dst,const char * format,...)149*ccdc9c3eSSadaf Ebrahimi void SStringPrintf(string* dst, const char* format, ...) {
150*ccdc9c3eSSadaf Ebrahimi va_list ap;
151*ccdc9c3eSSadaf Ebrahimi va_start(ap, format);
152*ccdc9c3eSSadaf Ebrahimi dst->clear();
153*ccdc9c3eSSadaf Ebrahimi StringAppendV(dst, format, ap);
154*ccdc9c3eSSadaf Ebrahimi va_end(ap);
155*ccdc9c3eSSadaf Ebrahimi }
156*ccdc9c3eSSadaf Ebrahimi
StringAppendF(string * dst,const char * format,...)157*ccdc9c3eSSadaf Ebrahimi void StringAppendF(string* dst, const char* format, ...) {
158*ccdc9c3eSSadaf Ebrahimi va_list ap;
159*ccdc9c3eSSadaf Ebrahimi va_start(ap, format);
160*ccdc9c3eSSadaf Ebrahimi StringAppendV(dst, format, ap);
161*ccdc9c3eSSadaf Ebrahimi va_end(ap);
162*ccdc9c3eSSadaf Ebrahimi }
163*ccdc9c3eSSadaf Ebrahimi
164*ccdc9c3eSSadaf Ebrahimi } // namespace re2
165