1*9356374aSAndroid Build Coastguard Worker // Copyright 2022 The Abseil Authors. 2*9356374aSAndroid Build Coastguard Worker // 3*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License"); 4*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License. 5*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at 6*9356374aSAndroid Build Coastguard Worker // 7*9356374aSAndroid Build Coastguard Worker // https://www.apache.org/licenses/LICENSE-2.0 8*9356374aSAndroid Build Coastguard Worker // 9*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software 10*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS, 11*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and 13*9356374aSAndroid Build Coastguard Worker // limitations under the License. 14*9356374aSAndroid Build Coastguard Worker // 15*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------------- 16*9356374aSAndroid Build Coastguard Worker // File: charset.h 17*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------------- 18*9356374aSAndroid Build Coastguard Worker // 19*9356374aSAndroid Build Coastguard Worker // This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned 20*9356374aSAndroid Build Coastguard Worker // characters. 21*9356374aSAndroid Build Coastguard Worker // 22*9356374aSAndroid Build Coastguard Worker // Instances can be initialized as constexpr constants. For example: 23*9356374aSAndroid Build Coastguard Worker // 24*9356374aSAndroid Build Coastguard Worker // constexpr absl::CharSet kJustX = absl::CharSet::Char('x'); 25*9356374aSAndroid Build Coastguard Worker // constexpr absl::CharSet kMySymbols = absl::CharSet("$@!"); 26*9356374aSAndroid Build Coastguard Worker // constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z'); 27*9356374aSAndroid Build Coastguard Worker // 28*9356374aSAndroid Build Coastguard Worker // Multiple instances can be combined that still forms a constexpr expression. 29*9356374aSAndroid Build Coastguard Worker // For example: 30*9356374aSAndroid Build Coastguard Worker // 31*9356374aSAndroid Build Coastguard Worker // constexpr absl::CharSet kLettersAndNumbers = 32*9356374aSAndroid Build Coastguard Worker // absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9'); 33*9356374aSAndroid Build Coastguard Worker // 34*9356374aSAndroid Build Coastguard Worker // Several pre-defined character classes are available that mirror the methods 35*9356374aSAndroid Build Coastguard Worker // from <cctype>. For example: 36*9356374aSAndroid Build Coastguard Worker // 37*9356374aSAndroid Build Coastguard Worker // constexpr absl::CharSet kLettersAndWhitespace = 38*9356374aSAndroid Build Coastguard Worker // absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace(); 39*9356374aSAndroid Build Coastguard Worker // 40*9356374aSAndroid Build Coastguard Worker // To check membership, use the .contains method, e.g. 41*9356374aSAndroid Build Coastguard Worker // 42*9356374aSAndroid Build Coastguard Worker // absl::CharSet hex_letters("abcdef"); 43*9356374aSAndroid Build Coastguard Worker // hex_letters.contains('a'); // true 44*9356374aSAndroid Build Coastguard Worker // hex_letters.contains('g'); // false 45*9356374aSAndroid Build Coastguard Worker 46*9356374aSAndroid Build Coastguard Worker #ifndef ABSL_STRINGS_CHARSET_H_ 47*9356374aSAndroid Build Coastguard Worker #define ABSL_STRINGS_CHARSET_H_ 48*9356374aSAndroid Build Coastguard Worker 49*9356374aSAndroid Build Coastguard Worker #include <cstddef> 50*9356374aSAndroid Build Coastguard Worker #include <cstdint> 51*9356374aSAndroid Build Coastguard Worker #include <cstring> 52*9356374aSAndroid Build Coastguard Worker 53*9356374aSAndroid Build Coastguard Worker #include "absl/base/macros.h" 54*9356374aSAndroid Build Coastguard Worker #include "absl/base/port.h" 55*9356374aSAndroid Build Coastguard Worker #include "absl/strings/string_view.h" 56*9356374aSAndroid Build Coastguard Worker 57*9356374aSAndroid Build Coastguard Worker namespace absl { 58*9356374aSAndroid Build Coastguard Worker 59*9356374aSAndroid Build Coastguard Worker class CharSet { 60*9356374aSAndroid Build Coastguard Worker public: CharSet()61*9356374aSAndroid Build Coastguard Worker constexpr CharSet() : m_() {} 62*9356374aSAndroid Build Coastguard Worker 63*9356374aSAndroid Build Coastguard Worker // Initializes with a given string_view. CharSet(absl::string_view str)64*9356374aSAndroid Build Coastguard Worker constexpr explicit CharSet(absl::string_view str) : m_() { 65*9356374aSAndroid Build Coastguard Worker for (char c : str) { 66*9356374aSAndroid Build Coastguard Worker SetChar(static_cast<unsigned char>(c)); 67*9356374aSAndroid Build Coastguard Worker } 68*9356374aSAndroid Build Coastguard Worker } 69*9356374aSAndroid Build Coastguard Worker contains(char c)70*9356374aSAndroid Build Coastguard Worker constexpr bool contains(char c) const { 71*9356374aSAndroid Build Coastguard Worker return ((m_[static_cast<unsigned char>(c) / 64] >> 72*9356374aSAndroid Build Coastguard Worker (static_cast<unsigned char>(c) % 64)) & 73*9356374aSAndroid Build Coastguard Worker 0x1) == 0x1; 74*9356374aSAndroid Build Coastguard Worker } 75*9356374aSAndroid Build Coastguard Worker empty()76*9356374aSAndroid Build Coastguard Worker constexpr bool empty() const { 77*9356374aSAndroid Build Coastguard Worker for (uint64_t c : m_) { 78*9356374aSAndroid Build Coastguard Worker if (c != 0) return false; 79*9356374aSAndroid Build Coastguard Worker } 80*9356374aSAndroid Build Coastguard Worker return true; 81*9356374aSAndroid Build Coastguard Worker } 82*9356374aSAndroid Build Coastguard Worker 83*9356374aSAndroid Build Coastguard Worker // Containing only a single specified char. Char(char x)84*9356374aSAndroid Build Coastguard Worker static constexpr CharSet Char(char x) { 85*9356374aSAndroid Build Coastguard Worker return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1), 86*9356374aSAndroid Build Coastguard Worker CharMaskForWord(x, 2), CharMaskForWord(x, 3)); 87*9356374aSAndroid Build Coastguard Worker } 88*9356374aSAndroid Build Coastguard Worker 89*9356374aSAndroid Build Coastguard Worker // Containing all the chars in the closed interval [lo,hi]. Range(char lo,char hi)90*9356374aSAndroid Build Coastguard Worker static constexpr CharSet Range(char lo, char hi) { 91*9356374aSAndroid Build Coastguard Worker return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), 92*9356374aSAndroid Build Coastguard Worker RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); 93*9356374aSAndroid Build Coastguard Worker } 94*9356374aSAndroid Build Coastguard Worker 95*9356374aSAndroid Build Coastguard Worker friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) { 96*9356374aSAndroid Build Coastguard Worker return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], 97*9356374aSAndroid Build Coastguard Worker a.m_[3] & b.m_[3]); 98*9356374aSAndroid Build Coastguard Worker } 99*9356374aSAndroid Build Coastguard Worker 100*9356374aSAndroid Build Coastguard Worker friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) { 101*9356374aSAndroid Build Coastguard Worker return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], 102*9356374aSAndroid Build Coastguard Worker a.m_[3] | b.m_[3]); 103*9356374aSAndroid Build Coastguard Worker } 104*9356374aSAndroid Build Coastguard Worker 105*9356374aSAndroid Build Coastguard Worker friend constexpr CharSet operator~(const CharSet& a) { 106*9356374aSAndroid Build Coastguard Worker return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); 107*9356374aSAndroid Build Coastguard Worker } 108*9356374aSAndroid Build Coastguard Worker 109*9356374aSAndroid Build Coastguard Worker // Mirrors the char-classifying predicates in <cctype>. AsciiUppercase()110*9356374aSAndroid Build Coastguard Worker static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); } AsciiLowercase()111*9356374aSAndroid Build Coastguard Worker static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); } AsciiDigits()112*9356374aSAndroid Build Coastguard Worker static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); } AsciiAlphabet()113*9356374aSAndroid Build Coastguard Worker static constexpr CharSet AsciiAlphabet() { 114*9356374aSAndroid Build Coastguard Worker return AsciiLowercase() | AsciiUppercase(); 115*9356374aSAndroid Build Coastguard Worker } AsciiAlphanumerics()116*9356374aSAndroid Build Coastguard Worker static constexpr CharSet AsciiAlphanumerics() { 117*9356374aSAndroid Build Coastguard Worker return AsciiDigits() | AsciiAlphabet(); 118*9356374aSAndroid Build Coastguard Worker } AsciiHexDigits()119*9356374aSAndroid Build Coastguard Worker static constexpr CharSet AsciiHexDigits() { 120*9356374aSAndroid Build Coastguard Worker return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f'); 121*9356374aSAndroid Build Coastguard Worker } AsciiPrintable()122*9356374aSAndroid Build Coastguard Worker static constexpr CharSet AsciiPrintable() { 123*9356374aSAndroid Build Coastguard Worker return CharSet::Range(0x20, 0x7e); 124*9356374aSAndroid Build Coastguard Worker } AsciiWhitespace()125*9356374aSAndroid Build Coastguard Worker static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); } AsciiPunctuation()126*9356374aSAndroid Build Coastguard Worker static constexpr CharSet AsciiPunctuation() { 127*9356374aSAndroid Build Coastguard Worker return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics(); 128*9356374aSAndroid Build Coastguard Worker } 129*9356374aSAndroid Build Coastguard Worker 130*9356374aSAndroid Build Coastguard Worker private: CharSet(uint64_t b0,uint64_t b1,uint64_t b2,uint64_t b3)131*9356374aSAndroid Build Coastguard Worker constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) 132*9356374aSAndroid Build Coastguard Worker : m_{b0, b1, b2, b3} {} 133*9356374aSAndroid Build Coastguard Worker RangeForWord(char lo,char hi,uint64_t word)134*9356374aSAndroid Build Coastguard Worker static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) { 135*9356374aSAndroid Build Coastguard Worker return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) & 136*9356374aSAndroid Build Coastguard Worker ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word); 137*9356374aSAndroid Build Coastguard Worker } 138*9356374aSAndroid Build Coastguard Worker 139*9356374aSAndroid Build Coastguard Worker // All the chars in the specified word of the range [0, upper). OpenRangeFromZeroForWord(uint64_t upper,uint64_t word)140*9356374aSAndroid Build Coastguard Worker static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, 141*9356374aSAndroid Build Coastguard Worker uint64_t word) { 142*9356374aSAndroid Build Coastguard Worker return (upper <= 64 * word) ? 0 143*9356374aSAndroid Build Coastguard Worker : (upper >= 64 * (word + 1)) 144*9356374aSAndroid Build Coastguard Worker ? ~static_cast<uint64_t>(0) 145*9356374aSAndroid Build Coastguard Worker : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); 146*9356374aSAndroid Build Coastguard Worker } 147*9356374aSAndroid Build Coastguard Worker CharMaskForWord(char x,uint64_t word)148*9356374aSAndroid Build Coastguard Worker static constexpr uint64_t CharMaskForWord(char x, uint64_t word) { 149*9356374aSAndroid Build Coastguard Worker return (static_cast<unsigned char>(x) / 64 == word) 150*9356374aSAndroid Build Coastguard Worker ? (static_cast<uint64_t>(1) 151*9356374aSAndroid Build Coastguard Worker << (static_cast<unsigned char>(x) % 64)) 152*9356374aSAndroid Build Coastguard Worker : 0; 153*9356374aSAndroid Build Coastguard Worker } 154*9356374aSAndroid Build Coastguard Worker SetChar(unsigned char c)155*9356374aSAndroid Build Coastguard Worker constexpr void SetChar(unsigned char c) { 156*9356374aSAndroid Build Coastguard Worker m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); 157*9356374aSAndroid Build Coastguard Worker } 158*9356374aSAndroid Build Coastguard Worker 159*9356374aSAndroid Build Coastguard Worker uint64_t m_[4]; 160*9356374aSAndroid Build Coastguard Worker }; 161*9356374aSAndroid Build Coastguard Worker 162*9356374aSAndroid Build Coastguard Worker } // namespace absl 163*9356374aSAndroid Build Coastguard Worker 164*9356374aSAndroid Build Coastguard Worker #endif // ABSL_STRINGS_CHARSET_H_ 165