xref: /aosp_15_r20/external/regex-re2/util/strutil.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1*ccdc9c3eSSadaf Ebrahimi // Copyright 1999-2005 The RE2 Authors.  All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi 
5*ccdc9c3eSSadaf Ebrahimi #include <stdarg.h>
6*ccdc9c3eSSadaf Ebrahimi #include <stdio.h>
7*ccdc9c3eSSadaf Ebrahimi 
8*ccdc9c3eSSadaf Ebrahimi #include "util/strutil.h"
9*ccdc9c3eSSadaf Ebrahimi 
10*ccdc9c3eSSadaf Ebrahimi #ifdef _WIN32
11*ccdc9c3eSSadaf Ebrahimi #define snprintf _snprintf
12*ccdc9c3eSSadaf Ebrahimi #define vsnprintf _vsnprintf
13*ccdc9c3eSSadaf Ebrahimi #endif
14*ccdc9c3eSSadaf Ebrahimi 
15*ccdc9c3eSSadaf Ebrahimi namespace re2 {
16*ccdc9c3eSSadaf Ebrahimi 
17*ccdc9c3eSSadaf Ebrahimi // ----------------------------------------------------------------------
18*ccdc9c3eSSadaf Ebrahimi // CEscapeString()
19*ccdc9c3eSSadaf Ebrahimi //    Copies 'src' to 'dest', escaping dangerous characters using
20*ccdc9c3eSSadaf Ebrahimi //    C-style escape sequences.  'src' and 'dest' should not overlap.
21*ccdc9c3eSSadaf Ebrahimi //    Returns the number of bytes written to 'dest' (not including the \0)
22*ccdc9c3eSSadaf Ebrahimi //    or (size_t)-1 if there was insufficient space.
23*ccdc9c3eSSadaf Ebrahimi // ----------------------------------------------------------------------
CEscapeString(const char * src,size_t src_len,char * dest,size_t dest_len)24*ccdc9c3eSSadaf Ebrahimi static size_t CEscapeString(const char* src, size_t src_len,
25*ccdc9c3eSSadaf Ebrahimi                             char* dest, size_t dest_len) {
26*ccdc9c3eSSadaf Ebrahimi   const char* src_end = src + src_len;
27*ccdc9c3eSSadaf Ebrahimi   size_t used = 0;
28*ccdc9c3eSSadaf Ebrahimi 
29*ccdc9c3eSSadaf Ebrahimi   for (; src < src_end; src++) {
30*ccdc9c3eSSadaf Ebrahimi     if (dest_len - used < 2)   // space for two-character escape
31*ccdc9c3eSSadaf Ebrahimi       return (size_t)-1;
32*ccdc9c3eSSadaf Ebrahimi 
33*ccdc9c3eSSadaf Ebrahimi     unsigned char c = *src;
34*ccdc9c3eSSadaf Ebrahimi     switch (c) {
35*ccdc9c3eSSadaf Ebrahimi       case '\n': dest[used++] = '\\'; dest[used++] = 'n';  break;
36*ccdc9c3eSSadaf Ebrahimi       case '\r': dest[used++] = '\\'; dest[used++] = 'r';  break;
37*ccdc9c3eSSadaf Ebrahimi       case '\t': dest[used++] = '\\'; dest[used++] = 't';  break;
38*ccdc9c3eSSadaf Ebrahimi       case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
39*ccdc9c3eSSadaf Ebrahimi       case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
40*ccdc9c3eSSadaf Ebrahimi       case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
41*ccdc9c3eSSadaf Ebrahimi       default:
42*ccdc9c3eSSadaf Ebrahimi         // Note that if we emit \xNN and the src character after that is a hex
43*ccdc9c3eSSadaf Ebrahimi         // digit then that digit must be escaped too to prevent it being
44*ccdc9c3eSSadaf Ebrahimi         // interpreted as part of the character code by C.
45*ccdc9c3eSSadaf Ebrahimi         if (c < ' ' || c > '~') {
46*ccdc9c3eSSadaf Ebrahimi           if (dest_len - used < 5)   // space for four-character escape + \0
47*ccdc9c3eSSadaf Ebrahimi             return (size_t)-1;
48*ccdc9c3eSSadaf Ebrahimi           snprintf(dest + used, 5, "\\%03o", c);
49*ccdc9c3eSSadaf Ebrahimi           used += 4;
50*ccdc9c3eSSadaf Ebrahimi         } else {
51*ccdc9c3eSSadaf Ebrahimi           dest[used++] = c; break;
52*ccdc9c3eSSadaf Ebrahimi         }
53*ccdc9c3eSSadaf Ebrahimi     }
54*ccdc9c3eSSadaf Ebrahimi   }
55*ccdc9c3eSSadaf Ebrahimi 
56*ccdc9c3eSSadaf Ebrahimi   if (dest_len - used < 1)   // make sure that there is room for \0
57*ccdc9c3eSSadaf Ebrahimi     return (size_t)-1;
58*ccdc9c3eSSadaf Ebrahimi 
59*ccdc9c3eSSadaf Ebrahimi   dest[used] = '\0';   // doesn't count towards return value though
60*ccdc9c3eSSadaf Ebrahimi   return used;
61*ccdc9c3eSSadaf Ebrahimi }
62*ccdc9c3eSSadaf Ebrahimi 
63*ccdc9c3eSSadaf Ebrahimi // ----------------------------------------------------------------------
64*ccdc9c3eSSadaf Ebrahimi // CEscape()
65*ccdc9c3eSSadaf Ebrahimi //    Copies 'src' to result, escaping dangerous characters using
66*ccdc9c3eSSadaf Ebrahimi //    C-style escape sequences.  'src' and 'dest' should not overlap.
67*ccdc9c3eSSadaf Ebrahimi // ----------------------------------------------------------------------
CEscape(const StringPiece & src)68*ccdc9c3eSSadaf Ebrahimi string CEscape(const StringPiece& src) {
69*ccdc9c3eSSadaf Ebrahimi   const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
70*ccdc9c3eSSadaf Ebrahimi   char* dest = new char[dest_len];
71*ccdc9c3eSSadaf Ebrahimi   const size_t used = CEscapeString(src.data(), src.size(),
72*ccdc9c3eSSadaf Ebrahimi                                     dest, dest_len);
73*ccdc9c3eSSadaf Ebrahimi   string s = string(dest, used);
74*ccdc9c3eSSadaf Ebrahimi   delete[] dest;
75*ccdc9c3eSSadaf Ebrahimi   return s;
76*ccdc9c3eSSadaf Ebrahimi }
77*ccdc9c3eSSadaf Ebrahimi 
PrefixSuccessor(string * prefix)78*ccdc9c3eSSadaf Ebrahimi void PrefixSuccessor(string* prefix) {
79*ccdc9c3eSSadaf Ebrahimi   // We can increment the last character in the string and be done
80*ccdc9c3eSSadaf Ebrahimi   // unless that character is 255, in which case we have to erase the
81*ccdc9c3eSSadaf Ebrahimi   // last character and increment the previous character, unless that
82*ccdc9c3eSSadaf Ebrahimi   // is 255, etc. If the string is empty or consists entirely of
83*ccdc9c3eSSadaf Ebrahimi   // 255's, we just return the empty string.
84*ccdc9c3eSSadaf Ebrahimi   while (!prefix->empty()) {
85*ccdc9c3eSSadaf Ebrahimi     char& c = prefix->back();
86*ccdc9c3eSSadaf Ebrahimi     if (c == '\xff') {  // char literal avoids signed/unsigned.
87*ccdc9c3eSSadaf Ebrahimi       prefix->pop_back();
88*ccdc9c3eSSadaf Ebrahimi     } else {
89*ccdc9c3eSSadaf Ebrahimi       ++c;
90*ccdc9c3eSSadaf Ebrahimi       break;
91*ccdc9c3eSSadaf Ebrahimi     }
92*ccdc9c3eSSadaf Ebrahimi   }
93*ccdc9c3eSSadaf Ebrahimi }
94*ccdc9c3eSSadaf Ebrahimi 
StringAppendV(string * dst,const char * format,va_list ap)95*ccdc9c3eSSadaf Ebrahimi static void StringAppendV(string* dst, const char* format, va_list ap) {
96*ccdc9c3eSSadaf Ebrahimi   // First try with a small fixed size buffer
97*ccdc9c3eSSadaf Ebrahimi   char space[1024];
98*ccdc9c3eSSadaf Ebrahimi 
99*ccdc9c3eSSadaf Ebrahimi   // It's possible for methods that use a va_list to invalidate
100*ccdc9c3eSSadaf Ebrahimi   // the data in it upon use.  The fix is to make a copy
101*ccdc9c3eSSadaf Ebrahimi   // of the structure before using it and use that copy instead.
102*ccdc9c3eSSadaf Ebrahimi   va_list backup_ap;
103*ccdc9c3eSSadaf Ebrahimi   va_copy(backup_ap, ap);
104*ccdc9c3eSSadaf Ebrahimi   int result = vsnprintf(space, sizeof(space), format, backup_ap);
105*ccdc9c3eSSadaf Ebrahimi   va_end(backup_ap);
106*ccdc9c3eSSadaf Ebrahimi 
107*ccdc9c3eSSadaf Ebrahimi   if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) {
108*ccdc9c3eSSadaf Ebrahimi     // It fit
109*ccdc9c3eSSadaf Ebrahimi     dst->append(space, result);
110*ccdc9c3eSSadaf Ebrahimi     return;
111*ccdc9c3eSSadaf Ebrahimi   }
112*ccdc9c3eSSadaf Ebrahimi 
113*ccdc9c3eSSadaf Ebrahimi   // Repeatedly increase buffer size until it fits
114*ccdc9c3eSSadaf Ebrahimi   int length = sizeof(space);
115*ccdc9c3eSSadaf Ebrahimi   while (true) {
116*ccdc9c3eSSadaf Ebrahimi     if (result < 0) {
117*ccdc9c3eSSadaf Ebrahimi       // Older behavior: just try doubling the buffer size
118*ccdc9c3eSSadaf Ebrahimi       length *= 2;
119*ccdc9c3eSSadaf Ebrahimi     } else {
120*ccdc9c3eSSadaf Ebrahimi       // We need exactly "result+1" characters
121*ccdc9c3eSSadaf Ebrahimi       length = result+1;
122*ccdc9c3eSSadaf Ebrahimi     }
123*ccdc9c3eSSadaf Ebrahimi     char* buf = new char[length];
124*ccdc9c3eSSadaf Ebrahimi 
125*ccdc9c3eSSadaf Ebrahimi     // Restore the va_list before we use it again
126*ccdc9c3eSSadaf Ebrahimi     va_copy(backup_ap, ap);
127*ccdc9c3eSSadaf Ebrahimi     result = vsnprintf(buf, length, format, backup_ap);
128*ccdc9c3eSSadaf Ebrahimi     va_end(backup_ap);
129*ccdc9c3eSSadaf Ebrahimi 
130*ccdc9c3eSSadaf Ebrahimi     if ((result >= 0) && (result < length)) {
131*ccdc9c3eSSadaf Ebrahimi       // It fit
132*ccdc9c3eSSadaf Ebrahimi       dst->append(buf, result);
133*ccdc9c3eSSadaf Ebrahimi       delete[] buf;
134*ccdc9c3eSSadaf Ebrahimi       return;
135*ccdc9c3eSSadaf Ebrahimi     }
136*ccdc9c3eSSadaf Ebrahimi     delete[] buf;
137*ccdc9c3eSSadaf Ebrahimi   }
138*ccdc9c3eSSadaf Ebrahimi }
139*ccdc9c3eSSadaf Ebrahimi 
StringPrintf(const char * format,...)140*ccdc9c3eSSadaf Ebrahimi string StringPrintf(const char* format, ...) {
141*ccdc9c3eSSadaf Ebrahimi   va_list ap;
142*ccdc9c3eSSadaf Ebrahimi   va_start(ap, format);
143*ccdc9c3eSSadaf Ebrahimi   string result;
144*ccdc9c3eSSadaf Ebrahimi   StringAppendV(&result, format, ap);
145*ccdc9c3eSSadaf Ebrahimi   va_end(ap);
146*ccdc9c3eSSadaf Ebrahimi   return result;
147*ccdc9c3eSSadaf Ebrahimi }
148*ccdc9c3eSSadaf Ebrahimi 
SStringPrintf(string * dst,const char * format,...)149*ccdc9c3eSSadaf Ebrahimi void SStringPrintf(string* dst, const char* format, ...) {
150*ccdc9c3eSSadaf Ebrahimi   va_list ap;
151*ccdc9c3eSSadaf Ebrahimi   va_start(ap, format);
152*ccdc9c3eSSadaf Ebrahimi   dst->clear();
153*ccdc9c3eSSadaf Ebrahimi   StringAppendV(dst, format, ap);
154*ccdc9c3eSSadaf Ebrahimi   va_end(ap);
155*ccdc9c3eSSadaf Ebrahimi }
156*ccdc9c3eSSadaf Ebrahimi 
StringAppendF(string * dst,const char * format,...)157*ccdc9c3eSSadaf Ebrahimi void StringAppendF(string* dst, const char* format, ...) {
158*ccdc9c3eSSadaf Ebrahimi   va_list ap;
159*ccdc9c3eSSadaf Ebrahimi   va_start(ap, format);
160*ccdc9c3eSSadaf Ebrahimi   StringAppendV(dst, format, ap);
161*ccdc9c3eSSadaf Ebrahimi   va_end(ap);
162*ccdc9c3eSSadaf Ebrahimi }
163*ccdc9c3eSSadaf Ebrahimi 
164*ccdc9c3eSSadaf Ebrahimi }  // namespace re2
165