1*9356374aSAndroid Build Coastguard Worker //
2*9356374aSAndroid Build Coastguard Worker // Copyright 2017 The Abseil Authors.
3*9356374aSAndroid Build Coastguard Worker //
4*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
5*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
6*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at
7*9356374aSAndroid Build Coastguard Worker //
8*9356374aSAndroid Build Coastguard Worker // https://www.apache.org/licenses/LICENSE-2.0
9*9356374aSAndroid Build Coastguard Worker //
10*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
11*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
12*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
14*9356374aSAndroid Build Coastguard Worker // limitations under the License.
15*9356374aSAndroid Build Coastguard Worker //
16*9356374aSAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
17*9356374aSAndroid Build Coastguard Worker // File: ascii.h
18*9356374aSAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
19*9356374aSAndroid Build Coastguard Worker //
20*9356374aSAndroid Build Coastguard Worker // This package contains functions operating on characters and strings
21*9356374aSAndroid Build Coastguard Worker // restricted to standard ASCII. These include character classification
22*9356374aSAndroid Build Coastguard Worker // functions analogous to those found in the ANSI C Standard Library <ctype.h>
23*9356374aSAndroid Build Coastguard Worker // header file.
24*9356374aSAndroid Build Coastguard Worker //
25*9356374aSAndroid Build Coastguard Worker // C++ implementations provide <ctype.h> functionality based on their
26*9356374aSAndroid Build Coastguard Worker // C environment locale. In general, reliance on such a locale is not ideal, as
27*9356374aSAndroid Build Coastguard Worker // the locale standard is problematic (and may not return invariant information
28*9356374aSAndroid Build Coastguard Worker // for the same character set, for example). These `ascii_*()` functions are
29*9356374aSAndroid Build Coastguard Worker // hard-wired for standard ASCII, much faster, and guaranteed to behave
30*9356374aSAndroid Build Coastguard Worker // consistently. They will never be overloaded, nor will their function
31*9356374aSAndroid Build Coastguard Worker // signature change.
32*9356374aSAndroid Build Coastguard Worker //
33*9356374aSAndroid Build Coastguard Worker // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
34*9356374aSAndroid Build Coastguard Worker // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
35*9356374aSAndroid Build Coastguard Worker // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
36*9356374aSAndroid Build Coastguard Worker // `ascii_isxdigit()`
37*9356374aSAndroid Build Coastguard Worker // Analogous to the <ctype.h> functions with similar names, these
38*9356374aSAndroid Build Coastguard Worker // functions take an unsigned char and return a bool, based on whether the
39*9356374aSAndroid Build Coastguard Worker // character matches the condition specified.
40*9356374aSAndroid Build Coastguard Worker //
41*9356374aSAndroid Build Coastguard Worker // If the input character has a numerical value greater than 127, these
42*9356374aSAndroid Build Coastguard Worker // functions return `false`.
43*9356374aSAndroid Build Coastguard Worker //
44*9356374aSAndroid Build Coastguard Worker // `ascii_tolower()`, `ascii_toupper()`
45*9356374aSAndroid Build Coastguard Worker // Analogous to the <ctype.h> functions with similar names, these functions
46*9356374aSAndroid Build Coastguard Worker // take an unsigned char and return a char.
47*9356374aSAndroid Build Coastguard Worker //
48*9356374aSAndroid Build Coastguard Worker // If the input character is not an ASCII {lower,upper}-case letter (including
49*9356374aSAndroid Build Coastguard Worker // numerical values greater than 127) then the functions return the same value
50*9356374aSAndroid Build Coastguard Worker // as the input character.
51*9356374aSAndroid Build Coastguard Worker
52*9356374aSAndroid Build Coastguard Worker #ifndef ABSL_STRINGS_ASCII_H_
53*9356374aSAndroid Build Coastguard Worker #define ABSL_STRINGS_ASCII_H_
54*9356374aSAndroid Build Coastguard Worker
55*9356374aSAndroid Build Coastguard Worker #include <algorithm>
56*9356374aSAndroid Build Coastguard Worker #include <cstddef>
57*9356374aSAndroid Build Coastguard Worker #include <string>
58*9356374aSAndroid Build Coastguard Worker
59*9356374aSAndroid Build Coastguard Worker #include "absl/base/attributes.h"
60*9356374aSAndroid Build Coastguard Worker #include "absl/base/config.h"
61*9356374aSAndroid Build Coastguard Worker #include "absl/base/nullability.h"
62*9356374aSAndroid Build Coastguard Worker #include "absl/strings/string_view.h"
63*9356374aSAndroid Build Coastguard Worker
64*9356374aSAndroid Build Coastguard Worker namespace absl {
65*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_BEGIN
66*9356374aSAndroid Build Coastguard Worker namespace ascii_internal {
67*9356374aSAndroid Build Coastguard Worker
68*9356374aSAndroid Build Coastguard Worker // Declaration for an array of bitfields holding character information.
69*9356374aSAndroid Build Coastguard Worker ABSL_DLL extern const unsigned char kPropertyBits[256];
70*9356374aSAndroid Build Coastguard Worker
71*9356374aSAndroid Build Coastguard Worker // Declaration for the array of characters to upper-case characters.
72*9356374aSAndroid Build Coastguard Worker ABSL_DLL extern const char kToUpper[256];
73*9356374aSAndroid Build Coastguard Worker
74*9356374aSAndroid Build Coastguard Worker // Declaration for the array of characters to lower-case characters.
75*9356374aSAndroid Build Coastguard Worker ABSL_DLL extern const char kToLower[256];
76*9356374aSAndroid Build Coastguard Worker
77*9356374aSAndroid Build Coastguard Worker } // namespace ascii_internal
78*9356374aSAndroid Build Coastguard Worker
79*9356374aSAndroid Build Coastguard Worker // ascii_isalpha()
80*9356374aSAndroid Build Coastguard Worker //
81*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is an alphabetic character.
ascii_isalpha(unsigned char c)82*9356374aSAndroid Build Coastguard Worker inline bool ascii_isalpha(unsigned char c) {
83*9356374aSAndroid Build Coastguard Worker return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
84*9356374aSAndroid Build Coastguard Worker }
85*9356374aSAndroid Build Coastguard Worker
86*9356374aSAndroid Build Coastguard Worker // ascii_isalnum()
87*9356374aSAndroid Build Coastguard Worker //
88*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is an alphanumeric character.
ascii_isalnum(unsigned char c)89*9356374aSAndroid Build Coastguard Worker inline bool ascii_isalnum(unsigned char c) {
90*9356374aSAndroid Build Coastguard Worker return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
91*9356374aSAndroid Build Coastguard Worker }
92*9356374aSAndroid Build Coastguard Worker
93*9356374aSAndroid Build Coastguard Worker // ascii_isspace()
94*9356374aSAndroid Build Coastguard Worker //
95*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is a whitespace character (space,
96*9356374aSAndroid Build Coastguard Worker // tab, vertical tab, formfeed, linefeed, or carriage return).
ascii_isspace(unsigned char c)97*9356374aSAndroid Build Coastguard Worker inline bool ascii_isspace(unsigned char c) {
98*9356374aSAndroid Build Coastguard Worker return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
99*9356374aSAndroid Build Coastguard Worker }
100*9356374aSAndroid Build Coastguard Worker
101*9356374aSAndroid Build Coastguard Worker // ascii_ispunct()
102*9356374aSAndroid Build Coastguard Worker //
103*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is a punctuation character.
ascii_ispunct(unsigned char c)104*9356374aSAndroid Build Coastguard Worker inline bool ascii_ispunct(unsigned char c) {
105*9356374aSAndroid Build Coastguard Worker return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
106*9356374aSAndroid Build Coastguard Worker }
107*9356374aSAndroid Build Coastguard Worker
108*9356374aSAndroid Build Coastguard Worker // ascii_isblank()
109*9356374aSAndroid Build Coastguard Worker //
110*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is a blank character (tab or space).
ascii_isblank(unsigned char c)111*9356374aSAndroid Build Coastguard Worker inline bool ascii_isblank(unsigned char c) {
112*9356374aSAndroid Build Coastguard Worker return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
113*9356374aSAndroid Build Coastguard Worker }
114*9356374aSAndroid Build Coastguard Worker
115*9356374aSAndroid Build Coastguard Worker // ascii_iscntrl()
116*9356374aSAndroid Build Coastguard Worker //
117*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is a control character.
ascii_iscntrl(unsigned char c)118*9356374aSAndroid Build Coastguard Worker inline bool ascii_iscntrl(unsigned char c) {
119*9356374aSAndroid Build Coastguard Worker return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
120*9356374aSAndroid Build Coastguard Worker }
121*9356374aSAndroid Build Coastguard Worker
122*9356374aSAndroid Build Coastguard Worker // ascii_isxdigit()
123*9356374aSAndroid Build Coastguard Worker //
124*9356374aSAndroid Build Coastguard Worker // Determines whether the given character can be represented as a hexadecimal
125*9356374aSAndroid Build Coastguard Worker // digit character (i.e. {0-9} or {A-F}).
ascii_isxdigit(unsigned char c)126*9356374aSAndroid Build Coastguard Worker inline bool ascii_isxdigit(unsigned char c) {
127*9356374aSAndroid Build Coastguard Worker return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
128*9356374aSAndroid Build Coastguard Worker }
129*9356374aSAndroid Build Coastguard Worker
130*9356374aSAndroid Build Coastguard Worker // ascii_isdigit()
131*9356374aSAndroid Build Coastguard Worker //
132*9356374aSAndroid Build Coastguard Worker // Determines whether the given character can be represented as a decimal
133*9356374aSAndroid Build Coastguard Worker // digit character (i.e. {0-9}).
ascii_isdigit(unsigned char c)134*9356374aSAndroid Build Coastguard Worker inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
135*9356374aSAndroid Build Coastguard Worker
136*9356374aSAndroid Build Coastguard Worker // ascii_isprint()
137*9356374aSAndroid Build Coastguard Worker //
138*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is printable, including spaces.
ascii_isprint(unsigned char c)139*9356374aSAndroid Build Coastguard Worker inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
140*9356374aSAndroid Build Coastguard Worker
141*9356374aSAndroid Build Coastguard Worker // ascii_isgraph()
142*9356374aSAndroid Build Coastguard Worker //
143*9356374aSAndroid Build Coastguard Worker // Determines whether the given character has a graphical representation.
ascii_isgraph(unsigned char c)144*9356374aSAndroid Build Coastguard Worker inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
145*9356374aSAndroid Build Coastguard Worker
146*9356374aSAndroid Build Coastguard Worker // ascii_isupper()
147*9356374aSAndroid Build Coastguard Worker //
148*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is uppercase.
ascii_isupper(unsigned char c)149*9356374aSAndroid Build Coastguard Worker inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
150*9356374aSAndroid Build Coastguard Worker
151*9356374aSAndroid Build Coastguard Worker // ascii_islower()
152*9356374aSAndroid Build Coastguard Worker //
153*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is lowercase.
ascii_islower(unsigned char c)154*9356374aSAndroid Build Coastguard Worker inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
155*9356374aSAndroid Build Coastguard Worker
156*9356374aSAndroid Build Coastguard Worker // ascii_isascii()
157*9356374aSAndroid Build Coastguard Worker //
158*9356374aSAndroid Build Coastguard Worker // Determines whether the given character is ASCII.
ascii_isascii(unsigned char c)159*9356374aSAndroid Build Coastguard Worker inline bool ascii_isascii(unsigned char c) { return c < 128; }
160*9356374aSAndroid Build Coastguard Worker
161*9356374aSAndroid Build Coastguard Worker // ascii_tolower()
162*9356374aSAndroid Build Coastguard Worker //
163*9356374aSAndroid Build Coastguard Worker // Returns an ASCII character, converting to lowercase if uppercase is
164*9356374aSAndroid Build Coastguard Worker // passed. Note that character values > 127 are simply returned.
ascii_tolower(unsigned char c)165*9356374aSAndroid Build Coastguard Worker inline char ascii_tolower(unsigned char c) {
166*9356374aSAndroid Build Coastguard Worker return ascii_internal::kToLower[c];
167*9356374aSAndroid Build Coastguard Worker }
168*9356374aSAndroid Build Coastguard Worker
169*9356374aSAndroid Build Coastguard Worker // Converts the characters in `s` to lowercase, changing the contents of `s`.
170*9356374aSAndroid Build Coastguard Worker void AsciiStrToLower(absl::Nonnull<std::string*> s);
171*9356374aSAndroid Build Coastguard Worker
172*9356374aSAndroid Build Coastguard Worker // Creates a lowercase string from a given absl::string_view.
AsciiStrToLower(absl::string_view s)173*9356374aSAndroid Build Coastguard Worker ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
174*9356374aSAndroid Build Coastguard Worker std::string result(s);
175*9356374aSAndroid Build Coastguard Worker absl::AsciiStrToLower(&result);
176*9356374aSAndroid Build Coastguard Worker return result;
177*9356374aSAndroid Build Coastguard Worker }
178*9356374aSAndroid Build Coastguard Worker
179*9356374aSAndroid Build Coastguard Worker // ascii_toupper()
180*9356374aSAndroid Build Coastguard Worker //
181*9356374aSAndroid Build Coastguard Worker // Returns the ASCII character, converting to upper-case if lower-case is
182*9356374aSAndroid Build Coastguard Worker // passed. Note that characters values > 127 are simply returned.
ascii_toupper(unsigned char c)183*9356374aSAndroid Build Coastguard Worker inline char ascii_toupper(unsigned char c) {
184*9356374aSAndroid Build Coastguard Worker return ascii_internal::kToUpper[c];
185*9356374aSAndroid Build Coastguard Worker }
186*9356374aSAndroid Build Coastguard Worker
187*9356374aSAndroid Build Coastguard Worker // Converts the characters in `s` to uppercase, changing the contents of `s`.
188*9356374aSAndroid Build Coastguard Worker void AsciiStrToUpper(absl::Nonnull<std::string*> s);
189*9356374aSAndroid Build Coastguard Worker
190*9356374aSAndroid Build Coastguard Worker // Creates an uppercase string from a given absl::string_view.
AsciiStrToUpper(absl::string_view s)191*9356374aSAndroid Build Coastguard Worker ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
192*9356374aSAndroid Build Coastguard Worker std::string result(s);
193*9356374aSAndroid Build Coastguard Worker absl::AsciiStrToUpper(&result);
194*9356374aSAndroid Build Coastguard Worker return result;
195*9356374aSAndroid Build Coastguard Worker }
196*9356374aSAndroid Build Coastguard Worker
197*9356374aSAndroid Build Coastguard Worker // Returns absl::string_view with whitespace stripped from the beginning of the
198*9356374aSAndroid Build Coastguard Worker // given string_view.
StripLeadingAsciiWhitespace(absl::string_view str)199*9356374aSAndroid Build Coastguard Worker ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
200*9356374aSAndroid Build Coastguard Worker absl::string_view str) {
201*9356374aSAndroid Build Coastguard Worker auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
202*9356374aSAndroid Build Coastguard Worker return str.substr(static_cast<size_t>(it - str.begin()));
203*9356374aSAndroid Build Coastguard Worker }
204*9356374aSAndroid Build Coastguard Worker
205*9356374aSAndroid Build Coastguard Worker // Strips in place whitespace from the beginning of the given string.
StripLeadingAsciiWhitespace(absl::Nonnull<std::string * > str)206*9356374aSAndroid Build Coastguard Worker inline void StripLeadingAsciiWhitespace(absl::Nonnull<std::string*> str) {
207*9356374aSAndroid Build Coastguard Worker auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
208*9356374aSAndroid Build Coastguard Worker str->erase(str->begin(), it);
209*9356374aSAndroid Build Coastguard Worker }
210*9356374aSAndroid Build Coastguard Worker
211*9356374aSAndroid Build Coastguard Worker // Returns absl::string_view with whitespace stripped from the end of the given
212*9356374aSAndroid Build Coastguard Worker // string_view.
StripTrailingAsciiWhitespace(absl::string_view str)213*9356374aSAndroid Build Coastguard Worker ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
214*9356374aSAndroid Build Coastguard Worker absl::string_view str) {
215*9356374aSAndroid Build Coastguard Worker auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
216*9356374aSAndroid Build Coastguard Worker return str.substr(0, static_cast<size_t>(str.rend() - it));
217*9356374aSAndroid Build Coastguard Worker }
218*9356374aSAndroid Build Coastguard Worker
219*9356374aSAndroid Build Coastguard Worker // Strips in place whitespace from the end of the given string
StripTrailingAsciiWhitespace(absl::Nonnull<std::string * > str)220*9356374aSAndroid Build Coastguard Worker inline void StripTrailingAsciiWhitespace(absl::Nonnull<std::string*> str) {
221*9356374aSAndroid Build Coastguard Worker auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
222*9356374aSAndroid Build Coastguard Worker str->erase(static_cast<size_t>(str->rend() - it));
223*9356374aSAndroid Build Coastguard Worker }
224*9356374aSAndroid Build Coastguard Worker
225*9356374aSAndroid Build Coastguard Worker // Returns absl::string_view with whitespace stripped from both ends of the
226*9356374aSAndroid Build Coastguard Worker // given string_view.
StripAsciiWhitespace(absl::string_view str)227*9356374aSAndroid Build Coastguard Worker ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
228*9356374aSAndroid Build Coastguard Worker absl::string_view str) {
229*9356374aSAndroid Build Coastguard Worker return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
230*9356374aSAndroid Build Coastguard Worker }
231*9356374aSAndroid Build Coastguard Worker
232*9356374aSAndroid Build Coastguard Worker // Strips in place whitespace from both ends of the given string
StripAsciiWhitespace(absl::Nonnull<std::string * > str)233*9356374aSAndroid Build Coastguard Worker inline void StripAsciiWhitespace(absl::Nonnull<std::string*> str) {
234*9356374aSAndroid Build Coastguard Worker StripTrailingAsciiWhitespace(str);
235*9356374aSAndroid Build Coastguard Worker StripLeadingAsciiWhitespace(str);
236*9356374aSAndroid Build Coastguard Worker }
237*9356374aSAndroid Build Coastguard Worker
238*9356374aSAndroid Build Coastguard Worker // Removes leading, trailing, and consecutive internal whitespace.
239*9356374aSAndroid Build Coastguard Worker void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str);
240*9356374aSAndroid Build Coastguard Worker
241*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_END
242*9356374aSAndroid Build Coastguard Worker } // namespace absl
243*9356374aSAndroid Build Coastguard Worker
244*9356374aSAndroid Build Coastguard Worker #endif // ABSL_STRINGS_ASCII_H_
245