xref: /aosp_15_r20/external/angle/third_party/abseil-cpp/absl/strings/charset.h (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 // Copyright 2022 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // -----------------------------------------------------------------------------
16 // File: charset.h
17 // -----------------------------------------------------------------------------
18 //
19 // This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned
20 // characters.
21 //
22 // Instances can be initialized as constexpr constants. For example:
23 //
24 //   constexpr absl::CharSet kJustX = absl::CharSet::Char('x');
25 //   constexpr absl::CharSet kMySymbols = absl::CharSet("$@!");
26 //   constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z');
27 //
28 // Multiple instances can be combined that still forms a constexpr expression.
29 // For example:
30 //
31 //   constexpr absl::CharSet kLettersAndNumbers =
32 //       absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9');
33 //
34 // Several pre-defined character classes are available that mirror the methods
35 // from <cctype>. For example:
36 //
37 //   constexpr absl::CharSet kLettersAndWhitespace =
38 //       absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace();
39 //
40 // To check membership, use the .contains method, e.g.
41 //
42 //   absl::CharSet hex_letters("abcdef");
43 //   hex_letters.contains('a');  // true
44 //   hex_letters.contains('g');  // false
45 
46 #ifndef ABSL_STRINGS_CHARSET_H_
47 #define ABSL_STRINGS_CHARSET_H_
48 
49 #include <cstdint>
50 
51 #include "absl/base/config.h"
52 #include "absl/strings/string_view.h"
53 
54 namespace absl {
55 ABSL_NAMESPACE_BEGIN
56 
57 class CharSet {
58  public:
CharSet()59   constexpr CharSet() : m_() {}
60 
61   // Initializes with a given string_view.
CharSet(absl::string_view str)62   constexpr explicit CharSet(absl::string_view str) : m_() {
63     for (char c : str) {
64       SetChar(static_cast<unsigned char>(c));
65     }
66   }
67 
contains(char c)68   constexpr bool contains(char c) const {
69     return ((m_[static_cast<unsigned char>(c) / 64] >>
70              (static_cast<unsigned char>(c) % 64)) &
71             0x1) == 0x1;
72   }
73 
empty()74   constexpr bool empty() const {
75     for (uint64_t c : m_) {
76       if (c != 0) return false;
77     }
78     return true;
79   }
80 
81   // Containing only a single specified char.
Char(char x)82   static constexpr CharSet Char(char x) {
83     return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
84                    CharMaskForWord(x, 2), CharMaskForWord(x, 3));
85   }
86 
87   // Containing all the chars in the closed interval [lo,hi].
Range(char lo,char hi)88   static constexpr CharSet Range(char lo, char hi) {
89     return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
90                    RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
91   }
92 
93   friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) {
94     return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
95                    a.m_[3] & b.m_[3]);
96   }
97 
98   friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) {
99     return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
100                    a.m_[3] | b.m_[3]);
101   }
102 
103   friend constexpr CharSet operator~(const CharSet& a) {
104     return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
105   }
106 
107   // Mirrors the char-classifying predicates in <cctype>.
AsciiUppercase()108   static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); }
AsciiLowercase()109   static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); }
AsciiDigits()110   static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); }
AsciiAlphabet()111   static constexpr CharSet AsciiAlphabet() {
112     return AsciiLowercase() | AsciiUppercase();
113   }
AsciiAlphanumerics()114   static constexpr CharSet AsciiAlphanumerics() {
115     return AsciiDigits() | AsciiAlphabet();
116   }
AsciiHexDigits()117   static constexpr CharSet AsciiHexDigits() {
118     return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f');
119   }
AsciiPrintable()120   static constexpr CharSet AsciiPrintable() {
121     return CharSet::Range(0x20, 0x7e);
122   }
AsciiWhitespace()123   static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); }
AsciiPunctuation()124   static constexpr CharSet AsciiPunctuation() {
125     return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics();
126   }
127 
128  private:
CharSet(uint64_t b0,uint64_t b1,uint64_t b2,uint64_t b3)129   constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
130       : m_{b0, b1, b2, b3} {}
131 
RangeForWord(char lo,char hi,uint64_t word)132   static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
133     return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
134            ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
135   }
136 
137   // All the chars in the specified word of the range [0, upper).
OpenRangeFromZeroForWord(uint64_t upper,uint64_t word)138   static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
139                                                      uint64_t word) {
140     return (upper <= 64 * word) ? 0
141            : (upper >= 64 * (word + 1))
142                ? ~static_cast<uint64_t>(0)
143                : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
144   }
145 
CharMaskForWord(char x,uint64_t word)146   static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
147     return (static_cast<unsigned char>(x) / 64 == word)
148                ? (static_cast<uint64_t>(1)
149                   << (static_cast<unsigned char>(x) % 64))
150                : 0;
151   }
152 
SetChar(unsigned char c)153   constexpr void SetChar(unsigned char c) {
154     m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
155   }
156 
157   uint64_t m_[4];
158 };
159 
160 ABSL_NAMESPACE_END
161 }  // namespace absl
162 
163 #endif  // ABSL_STRINGS_CHARSET_H_
164