xref: /aosp_15_r20/external/abseil-cpp/absl/strings/charset.h (revision 9356374a3709195abf420251b3e825997ff56c0f)
1*9356374aSAndroid Build Coastguard Worker // Copyright 2022 The Abseil Authors.
2*9356374aSAndroid Build Coastguard Worker //
3*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*9356374aSAndroid Build Coastguard Worker //
7*9356374aSAndroid Build Coastguard Worker //      https://www.apache.org/licenses/LICENSE-2.0
8*9356374aSAndroid Build Coastguard Worker //
9*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*9356374aSAndroid Build Coastguard Worker // limitations under the License.
14*9356374aSAndroid Build Coastguard Worker //
15*9356374aSAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
16*9356374aSAndroid Build Coastguard Worker // File: charset.h
17*9356374aSAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
18*9356374aSAndroid Build Coastguard Worker //
19*9356374aSAndroid Build Coastguard Worker // This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned
20*9356374aSAndroid Build Coastguard Worker // characters.
21*9356374aSAndroid Build Coastguard Worker //
22*9356374aSAndroid Build Coastguard Worker // Instances can be initialized as constexpr constants. For example:
23*9356374aSAndroid Build Coastguard Worker //
24*9356374aSAndroid Build Coastguard Worker //   constexpr absl::CharSet kJustX = absl::CharSet::Char('x');
25*9356374aSAndroid Build Coastguard Worker //   constexpr absl::CharSet kMySymbols = absl::CharSet("$@!");
26*9356374aSAndroid Build Coastguard Worker //   constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z');
27*9356374aSAndroid Build Coastguard Worker //
28*9356374aSAndroid Build Coastguard Worker // Multiple instances can be combined that still forms a constexpr expression.
29*9356374aSAndroid Build Coastguard Worker // For example:
30*9356374aSAndroid Build Coastguard Worker //
31*9356374aSAndroid Build Coastguard Worker //   constexpr absl::CharSet kLettersAndNumbers =
32*9356374aSAndroid Build Coastguard Worker //       absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9');
33*9356374aSAndroid Build Coastguard Worker //
34*9356374aSAndroid Build Coastguard Worker // Several pre-defined character classes are available that mirror the methods
35*9356374aSAndroid Build Coastguard Worker // from <cctype>. For example:
36*9356374aSAndroid Build Coastguard Worker //
37*9356374aSAndroid Build Coastguard Worker //   constexpr absl::CharSet kLettersAndWhitespace =
38*9356374aSAndroid Build Coastguard Worker //       absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace();
39*9356374aSAndroid Build Coastguard Worker //
40*9356374aSAndroid Build Coastguard Worker // To check membership, use the .contains method, e.g.
41*9356374aSAndroid Build Coastguard Worker //
42*9356374aSAndroid Build Coastguard Worker //   absl::CharSet hex_letters("abcdef");
43*9356374aSAndroid Build Coastguard Worker //   hex_letters.contains('a');  // true
44*9356374aSAndroid Build Coastguard Worker //   hex_letters.contains('g');  // false
45*9356374aSAndroid Build Coastguard Worker 
46*9356374aSAndroid Build Coastguard Worker #ifndef ABSL_STRINGS_CHARSET_H_
47*9356374aSAndroid Build Coastguard Worker #define ABSL_STRINGS_CHARSET_H_
48*9356374aSAndroid Build Coastguard Worker 
49*9356374aSAndroid Build Coastguard Worker #include <cstddef>
50*9356374aSAndroid Build Coastguard Worker #include <cstdint>
51*9356374aSAndroid Build Coastguard Worker #include <cstring>
52*9356374aSAndroid Build Coastguard Worker 
53*9356374aSAndroid Build Coastguard Worker #include "absl/base/macros.h"
54*9356374aSAndroid Build Coastguard Worker #include "absl/base/port.h"
55*9356374aSAndroid Build Coastguard Worker #include "absl/strings/string_view.h"
56*9356374aSAndroid Build Coastguard Worker 
57*9356374aSAndroid Build Coastguard Worker namespace absl {
58*9356374aSAndroid Build Coastguard Worker 
59*9356374aSAndroid Build Coastguard Worker class CharSet {
60*9356374aSAndroid Build Coastguard Worker  public:
CharSet()61*9356374aSAndroid Build Coastguard Worker   constexpr CharSet() : m_() {}
62*9356374aSAndroid Build Coastguard Worker 
63*9356374aSAndroid Build Coastguard Worker   // Initializes with a given string_view.
CharSet(absl::string_view str)64*9356374aSAndroid Build Coastguard Worker   constexpr explicit CharSet(absl::string_view str) : m_() {
65*9356374aSAndroid Build Coastguard Worker     for (char c : str) {
66*9356374aSAndroid Build Coastguard Worker       SetChar(static_cast<unsigned char>(c));
67*9356374aSAndroid Build Coastguard Worker     }
68*9356374aSAndroid Build Coastguard Worker   }
69*9356374aSAndroid Build Coastguard Worker 
contains(char c)70*9356374aSAndroid Build Coastguard Worker   constexpr bool contains(char c) const {
71*9356374aSAndroid Build Coastguard Worker     return ((m_[static_cast<unsigned char>(c) / 64] >>
72*9356374aSAndroid Build Coastguard Worker              (static_cast<unsigned char>(c) % 64)) &
73*9356374aSAndroid Build Coastguard Worker             0x1) == 0x1;
74*9356374aSAndroid Build Coastguard Worker   }
75*9356374aSAndroid Build Coastguard Worker 
empty()76*9356374aSAndroid Build Coastguard Worker   constexpr bool empty() const {
77*9356374aSAndroid Build Coastguard Worker     for (uint64_t c : m_) {
78*9356374aSAndroid Build Coastguard Worker       if (c != 0) return false;
79*9356374aSAndroid Build Coastguard Worker     }
80*9356374aSAndroid Build Coastguard Worker     return true;
81*9356374aSAndroid Build Coastguard Worker   }
82*9356374aSAndroid Build Coastguard Worker 
83*9356374aSAndroid Build Coastguard Worker   // Containing only a single specified char.
Char(char x)84*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet Char(char x) {
85*9356374aSAndroid Build Coastguard Worker     return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
86*9356374aSAndroid Build Coastguard Worker                    CharMaskForWord(x, 2), CharMaskForWord(x, 3));
87*9356374aSAndroid Build Coastguard Worker   }
88*9356374aSAndroid Build Coastguard Worker 
89*9356374aSAndroid Build Coastguard Worker   // Containing all the chars in the closed interval [lo,hi].
Range(char lo,char hi)90*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet Range(char lo, char hi) {
91*9356374aSAndroid Build Coastguard Worker     return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
92*9356374aSAndroid Build Coastguard Worker                    RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
93*9356374aSAndroid Build Coastguard Worker   }
94*9356374aSAndroid Build Coastguard Worker 
95*9356374aSAndroid Build Coastguard Worker   friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) {
96*9356374aSAndroid Build Coastguard Worker     return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
97*9356374aSAndroid Build Coastguard Worker                    a.m_[3] & b.m_[3]);
98*9356374aSAndroid Build Coastguard Worker   }
99*9356374aSAndroid Build Coastguard Worker 
100*9356374aSAndroid Build Coastguard Worker   friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) {
101*9356374aSAndroid Build Coastguard Worker     return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
102*9356374aSAndroid Build Coastguard Worker                    a.m_[3] | b.m_[3]);
103*9356374aSAndroid Build Coastguard Worker   }
104*9356374aSAndroid Build Coastguard Worker 
105*9356374aSAndroid Build Coastguard Worker   friend constexpr CharSet operator~(const CharSet& a) {
106*9356374aSAndroid Build Coastguard Worker     return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
107*9356374aSAndroid Build Coastguard Worker   }
108*9356374aSAndroid Build Coastguard Worker 
109*9356374aSAndroid Build Coastguard Worker   // Mirrors the char-classifying predicates in <cctype>.
AsciiUppercase()110*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); }
AsciiLowercase()111*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); }
AsciiDigits()112*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); }
AsciiAlphabet()113*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet AsciiAlphabet() {
114*9356374aSAndroid Build Coastguard Worker     return AsciiLowercase() | AsciiUppercase();
115*9356374aSAndroid Build Coastguard Worker   }
AsciiAlphanumerics()116*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet AsciiAlphanumerics() {
117*9356374aSAndroid Build Coastguard Worker     return AsciiDigits() | AsciiAlphabet();
118*9356374aSAndroid Build Coastguard Worker   }
AsciiHexDigits()119*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet AsciiHexDigits() {
120*9356374aSAndroid Build Coastguard Worker     return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f');
121*9356374aSAndroid Build Coastguard Worker   }
AsciiPrintable()122*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet AsciiPrintable() {
123*9356374aSAndroid Build Coastguard Worker     return CharSet::Range(0x20, 0x7e);
124*9356374aSAndroid Build Coastguard Worker   }
AsciiWhitespace()125*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); }
AsciiPunctuation()126*9356374aSAndroid Build Coastguard Worker   static constexpr CharSet AsciiPunctuation() {
127*9356374aSAndroid Build Coastguard Worker     return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics();
128*9356374aSAndroid Build Coastguard Worker   }
129*9356374aSAndroid Build Coastguard Worker 
130*9356374aSAndroid Build Coastguard Worker  private:
CharSet(uint64_t b0,uint64_t b1,uint64_t b2,uint64_t b3)131*9356374aSAndroid Build Coastguard Worker   constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
132*9356374aSAndroid Build Coastguard Worker       : m_{b0, b1, b2, b3} {}
133*9356374aSAndroid Build Coastguard Worker 
RangeForWord(char lo,char hi,uint64_t word)134*9356374aSAndroid Build Coastguard Worker   static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
135*9356374aSAndroid Build Coastguard Worker     return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
136*9356374aSAndroid Build Coastguard Worker            ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
137*9356374aSAndroid Build Coastguard Worker   }
138*9356374aSAndroid Build Coastguard Worker 
139*9356374aSAndroid Build Coastguard Worker   // All the chars in the specified word of the range [0, upper).
OpenRangeFromZeroForWord(uint64_t upper,uint64_t word)140*9356374aSAndroid Build Coastguard Worker   static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
141*9356374aSAndroid Build Coastguard Worker                                                      uint64_t word) {
142*9356374aSAndroid Build Coastguard Worker     return (upper <= 64 * word) ? 0
143*9356374aSAndroid Build Coastguard Worker            : (upper >= 64 * (word + 1))
144*9356374aSAndroid Build Coastguard Worker                ? ~static_cast<uint64_t>(0)
145*9356374aSAndroid Build Coastguard Worker                : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
146*9356374aSAndroid Build Coastguard Worker   }
147*9356374aSAndroid Build Coastguard Worker 
CharMaskForWord(char x,uint64_t word)148*9356374aSAndroid Build Coastguard Worker   static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
149*9356374aSAndroid Build Coastguard Worker     return (static_cast<unsigned char>(x) / 64 == word)
150*9356374aSAndroid Build Coastguard Worker                ? (static_cast<uint64_t>(1)
151*9356374aSAndroid Build Coastguard Worker                   << (static_cast<unsigned char>(x) % 64))
152*9356374aSAndroid Build Coastguard Worker                : 0;
153*9356374aSAndroid Build Coastguard Worker   }
154*9356374aSAndroid Build Coastguard Worker 
SetChar(unsigned char c)155*9356374aSAndroid Build Coastguard Worker   constexpr void SetChar(unsigned char c) {
156*9356374aSAndroid Build Coastguard Worker     m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
157*9356374aSAndroid Build Coastguard Worker   }
158*9356374aSAndroid Build Coastguard Worker 
159*9356374aSAndroid Build Coastguard Worker   uint64_t m_[4];
160*9356374aSAndroid Build Coastguard Worker };
161*9356374aSAndroid Build Coastguard Worker 
162*9356374aSAndroid Build Coastguard Worker }  // namespace absl
163*9356374aSAndroid Build Coastguard Worker 
164*9356374aSAndroid Build Coastguard Worker #endif  // ABSL_STRINGS_CHARSET_H_
165