1 // Copyright 2022 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // ----------------------------------------------------------------------------- 16 // File: charset.h 17 // ----------------------------------------------------------------------------- 18 // 19 // This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned 20 // characters. 21 // 22 // Instances can be initialized as constexpr constants. For example: 23 // 24 // constexpr absl::CharSet kJustX = absl::CharSet::Char('x'); 25 // constexpr absl::CharSet kMySymbols = absl::CharSet("$@!"); 26 // constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z'); 27 // 28 // Multiple instances can be combined that still forms a constexpr expression. 29 // For example: 30 // 31 // constexpr absl::CharSet kLettersAndNumbers = 32 // absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9'); 33 // 34 // Several pre-defined character classes are available that mirror the methods 35 // from <cctype>. For example: 36 // 37 // constexpr absl::CharSet kLettersAndWhitespace = 38 // absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace(); 39 // 40 // To check membership, use the .contains method, e.g. 41 // 42 // absl::CharSet hex_letters("abcdef"); 43 // hex_letters.contains('a'); // true 44 // hex_letters.contains('g'); // false 45 46 #ifndef ABSL_STRINGS_CHARSET_H_ 47 #define ABSL_STRINGS_CHARSET_H_ 48 49 #include <cstdint> 50 51 #include "absl/base/config.h" 52 #include "absl/strings/string_view.h" 53 54 namespace absl { 55 ABSL_NAMESPACE_BEGIN 56 57 class CharSet { 58 public: CharSet()59 constexpr CharSet() : m_() {} 60 61 // Initializes with a given string_view. CharSet(absl::string_view str)62 constexpr explicit CharSet(absl::string_view str) : m_() { 63 for (char c : str) { 64 SetChar(static_cast<unsigned char>(c)); 65 } 66 } 67 contains(char c)68 constexpr bool contains(char c) const { 69 return ((m_[static_cast<unsigned char>(c) / 64] >> 70 (static_cast<unsigned char>(c) % 64)) & 71 0x1) == 0x1; 72 } 73 empty()74 constexpr bool empty() const { 75 for (uint64_t c : m_) { 76 if (c != 0) return false; 77 } 78 return true; 79 } 80 81 // Containing only a single specified char. Char(char x)82 static constexpr CharSet Char(char x) { 83 return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1), 84 CharMaskForWord(x, 2), CharMaskForWord(x, 3)); 85 } 86 87 // Containing all the chars in the closed interval [lo,hi]. Range(char lo,char hi)88 static constexpr CharSet Range(char lo, char hi) { 89 return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), 90 RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); 91 } 92 93 friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) { 94 return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], 95 a.m_[3] & b.m_[3]); 96 } 97 98 friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) { 99 return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], 100 a.m_[3] | b.m_[3]); 101 } 102 103 friend constexpr CharSet operator~(const CharSet& a) { 104 return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); 105 } 106 107 // Mirrors the char-classifying predicates in <cctype>. AsciiUppercase()108 static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); } AsciiLowercase()109 static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); } AsciiDigits()110 static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); } AsciiAlphabet()111 static constexpr CharSet AsciiAlphabet() { 112 return AsciiLowercase() | AsciiUppercase(); 113 } AsciiAlphanumerics()114 static constexpr CharSet AsciiAlphanumerics() { 115 return AsciiDigits() | AsciiAlphabet(); 116 } AsciiHexDigits()117 static constexpr CharSet AsciiHexDigits() { 118 return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f'); 119 } AsciiPrintable()120 static constexpr CharSet AsciiPrintable() { 121 return CharSet::Range(0x20, 0x7e); 122 } AsciiWhitespace()123 static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); } AsciiPunctuation()124 static constexpr CharSet AsciiPunctuation() { 125 return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics(); 126 } 127 128 private: CharSet(uint64_t b0,uint64_t b1,uint64_t b2,uint64_t b3)129 constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) 130 : m_{b0, b1, b2, b3} {} 131 RangeForWord(char lo,char hi,uint64_t word)132 static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) { 133 return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) & 134 ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word); 135 } 136 137 // All the chars in the specified word of the range [0, upper). OpenRangeFromZeroForWord(uint64_t upper,uint64_t word)138 static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, 139 uint64_t word) { 140 return (upper <= 64 * word) ? 0 141 : (upper >= 64 * (word + 1)) 142 ? ~static_cast<uint64_t>(0) 143 : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); 144 } 145 CharMaskForWord(char x,uint64_t word)146 static constexpr uint64_t CharMaskForWord(char x, uint64_t word) { 147 return (static_cast<unsigned char>(x) / 64 == word) 148 ? (static_cast<uint64_t>(1) 149 << (static_cast<unsigned char>(x) % 64)) 150 : 0; 151 } 152 SetChar(unsigned char c)153 constexpr void SetChar(unsigned char c) { 154 m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); 155 } 156 157 uint64_t m_[4]; 158 }; 159 160 ABSL_NAMESPACE_END 161 } // namespace absl 162 163 #endif // ABSL_STRINGS_CHARSET_H_ 164