1 // Copyright 2022 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // ----------------------------------------------------------------------------- 16 // File: charset.h 17 // ----------------------------------------------------------------------------- 18 // 19 // This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned 20 // characters. 21 // 22 // Instances can be initialized as constexpr constants. For example: 23 // 24 // constexpr absl::CharSet kJustX = absl::CharSet::Char('x'); 25 // constexpr absl::CharSet kMySymbols = absl::CharSet("$@!"); 26 // constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z'); 27 // 28 // Multiple instances can be combined that still forms a constexpr expression. 29 // For example: 30 // 31 // constexpr absl::CharSet kLettersAndNumbers = 32 // absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9'); 33 // 34 // Several pre-defined character classes are available that mirror the methods 35 // from <cctype>. For example: 36 // 37 // constexpr absl::CharSet kLettersAndWhitespace = 38 // absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace(); 39 // 40 // To check membership, use the .contains method, e.g. 41 // 42 // absl::CharSet hex_letters("abcdef"); 43 // hex_letters.contains('a'); // true 44 // hex_letters.contains('g'); // false 45 46 #ifndef ABSL_STRINGS_CHARSET_H_ 47 #define ABSL_STRINGS_CHARSET_H_ 48 49 #include <cstddef> 50 #include <cstdint> 51 #include <cstring> 52 53 #include "absl/base/macros.h" 54 #include "absl/base/port.h" 55 #include "absl/strings/string_view.h" 56 57 namespace absl { 58 59 class CharSet { 60 public: CharSet()61 constexpr CharSet() : m_() {} 62 63 // Initializes with a given string_view. CharSet(absl::string_view str)64 constexpr explicit CharSet(absl::string_view str) : m_() { 65 for (char c : str) { 66 SetChar(static_cast<unsigned char>(c)); 67 } 68 } 69 contains(char c)70 constexpr bool contains(char c) const { 71 return ((m_[static_cast<unsigned char>(c) / 64] >> 72 (static_cast<unsigned char>(c) % 64)) & 73 0x1) == 0x1; 74 } 75 empty()76 constexpr bool empty() const { 77 for (uint64_t c : m_) { 78 if (c != 0) return false; 79 } 80 return true; 81 } 82 83 // Containing only a single specified char. Char(char x)84 static constexpr CharSet Char(char x) { 85 return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1), 86 CharMaskForWord(x, 2), CharMaskForWord(x, 3)); 87 } 88 89 // Containing all the chars in the closed interval [lo,hi]. Range(char lo,char hi)90 static constexpr CharSet Range(char lo, char hi) { 91 return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), 92 RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); 93 } 94 95 friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) { 96 return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], 97 a.m_[3] & b.m_[3]); 98 } 99 100 friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) { 101 return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], 102 a.m_[3] | b.m_[3]); 103 } 104 105 friend constexpr CharSet operator~(const CharSet& a) { 106 return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); 107 } 108 109 // Mirrors the char-classifying predicates in <cctype>. AsciiUppercase()110 static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); } AsciiLowercase()111 static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); } AsciiDigits()112 static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); } AsciiAlphabet()113 static constexpr CharSet AsciiAlphabet() { 114 return AsciiLowercase() | AsciiUppercase(); 115 } AsciiAlphanumerics()116 static constexpr CharSet AsciiAlphanumerics() { 117 return AsciiDigits() | AsciiAlphabet(); 118 } AsciiHexDigits()119 static constexpr CharSet AsciiHexDigits() { 120 return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f'); 121 } AsciiPrintable()122 static constexpr CharSet AsciiPrintable() { 123 return CharSet::Range(0x20, 0x7e); 124 } AsciiWhitespace()125 static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); } AsciiPunctuation()126 static constexpr CharSet AsciiPunctuation() { 127 return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics(); 128 } 129 130 private: CharSet(uint64_t b0,uint64_t b1,uint64_t b2,uint64_t b3)131 constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) 132 : m_{b0, b1, b2, b3} {} 133 RangeForWord(char lo,char hi,uint64_t word)134 static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) { 135 return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) & 136 ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word); 137 } 138 139 // All the chars in the specified word of the range [0, upper). OpenRangeFromZeroForWord(uint64_t upper,uint64_t word)140 static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, 141 uint64_t word) { 142 return (upper <= 64 * word) ? 0 143 : (upper >= 64 * (word + 1)) 144 ? ~static_cast<uint64_t>(0) 145 : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); 146 } 147 CharMaskForWord(char x,uint64_t word)148 static constexpr uint64_t CharMaskForWord(char x, uint64_t word) { 149 return (static_cast<unsigned char>(x) / 64 == word) 150 ? (static_cast<uint64_t>(1) 151 << (static_cast<unsigned char>(x) % 64)) 152 : 0; 153 } 154 SetChar(unsigned char c)155 constexpr void SetChar(unsigned char c) { 156 m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); 157 } 158 159 uint64_t m_[4]; 160 }; 161 162 } // namespace absl 163 164 #endif // ABSL_STRINGS_CHARSET_H_ 165