1*9880d681SAndroid Build Coastguard Worker //===--- UnicodeCharRanges.h - Types and functions for character ranges ---===// 2*9880d681SAndroid Build Coastguard Worker // 3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure 4*9880d681SAndroid Build Coastguard Worker // 5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source 6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details. 7*9880d681SAndroid Build Coastguard Worker // 8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===// 9*9880d681SAndroid Build Coastguard Worker #ifndef LLVM_SUPPORT_UNICODECHARRANGES_H 10*9880d681SAndroid Build Coastguard Worker #define LLVM_SUPPORT_UNICODECHARRANGES_H 11*9880d681SAndroid Build Coastguard Worker 12*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/ArrayRef.h" 13*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/SmallPtrSet.h" 14*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Compiler.h" 15*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h" 16*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Mutex.h" 17*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/MutexGuard.h" 18*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h" 19*9880d681SAndroid Build Coastguard Worker #include <algorithm> 20*9880d681SAndroid Build Coastguard Worker 21*9880d681SAndroid Build Coastguard Worker namespace llvm { 22*9880d681SAndroid Build Coastguard Worker namespace sys { 23*9880d681SAndroid Build Coastguard Worker 24*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "unicode" 25*9880d681SAndroid Build Coastguard Worker 26*9880d681SAndroid Build Coastguard Worker /// \brief Represents a closed range of Unicode code points [Lower, Upper]. 27*9880d681SAndroid Build Coastguard Worker struct UnicodeCharRange { 28*9880d681SAndroid Build Coastguard Worker uint32_t Lower; 29*9880d681SAndroid Build Coastguard Worker uint32_t Upper; 30*9880d681SAndroid Build Coastguard Worker }; 31*9880d681SAndroid Build Coastguard Worker 32*9880d681SAndroid Build Coastguard Worker inline bool operator<(uint32_t Value, UnicodeCharRange Range) { 33*9880d681SAndroid Build Coastguard Worker return Value < Range.Lower; 34*9880d681SAndroid Build Coastguard Worker } 35*9880d681SAndroid Build Coastguard Worker inline bool operator<(UnicodeCharRange Range, uint32_t Value) { 36*9880d681SAndroid Build Coastguard Worker return Range.Upper < Value; 37*9880d681SAndroid Build Coastguard Worker } 38*9880d681SAndroid Build Coastguard Worker 39*9880d681SAndroid Build Coastguard Worker /// \brief Holds a reference to an ordered array of UnicodeCharRange and allows 40*9880d681SAndroid Build Coastguard Worker /// to quickly check if a code point is contained in the set represented by this 41*9880d681SAndroid Build Coastguard Worker /// array. 42*9880d681SAndroid Build Coastguard Worker class UnicodeCharSet { 43*9880d681SAndroid Build Coastguard Worker public: 44*9880d681SAndroid Build Coastguard Worker typedef ArrayRef<UnicodeCharRange> CharRanges; 45*9880d681SAndroid Build Coastguard Worker 46*9880d681SAndroid Build Coastguard Worker /// \brief Constructs a UnicodeCharSet instance from an array of 47*9880d681SAndroid Build Coastguard Worker /// UnicodeCharRanges. 48*9880d681SAndroid Build Coastguard Worker /// 49*9880d681SAndroid Build Coastguard Worker /// Array pointed by \p Ranges should have the lifetime at least as long as 50*9880d681SAndroid Build Coastguard Worker /// the UnicodeCharSet instance, and should not change. Array is validated by 51*9880d681SAndroid Build Coastguard Worker /// the constructor, so it makes sense to create as few UnicodeCharSet 52*9880d681SAndroid Build Coastguard Worker /// instances per each array of ranges, as possible. 53*9880d681SAndroid Build Coastguard Worker #ifdef NDEBUG 54*9880d681SAndroid Build Coastguard Worker 55*9880d681SAndroid Build Coastguard Worker // FIXME: This could use constexpr + static_assert. This way we 56*9880d681SAndroid Build Coastguard Worker // may get rid of NDEBUG in this header. Unfortunately there are some 57*9880d681SAndroid Build Coastguard Worker // problems to get this working with MSVC 2013. Change this when 58*9880d681SAndroid Build Coastguard Worker // the support for MSVC 2013 is dropped. UnicodeCharSet(CharRanges Ranges)59*9880d681SAndroid Build Coastguard Worker LLVM_CONSTEXPR UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {} 60*9880d681SAndroid Build Coastguard Worker #else UnicodeCharSet(CharRanges Ranges)61*9880d681SAndroid Build Coastguard Worker UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) { 62*9880d681SAndroid Build Coastguard Worker assert(rangesAreValid()); 63*9880d681SAndroid Build Coastguard Worker } 64*9880d681SAndroid Build Coastguard Worker #endif 65*9880d681SAndroid Build Coastguard Worker 66*9880d681SAndroid Build Coastguard Worker /// \brief Returns true if the character set contains the Unicode code point 67*9880d681SAndroid Build Coastguard Worker /// \p C. contains(uint32_t C)68*9880d681SAndroid Build Coastguard Worker bool contains(uint32_t C) const { 69*9880d681SAndroid Build Coastguard Worker return std::binary_search(Ranges.begin(), Ranges.end(), C); 70*9880d681SAndroid Build Coastguard Worker } 71*9880d681SAndroid Build Coastguard Worker 72*9880d681SAndroid Build Coastguard Worker private: 73*9880d681SAndroid Build Coastguard Worker /// \brief Returns true if each of the ranges is a proper closed range 74*9880d681SAndroid Build Coastguard Worker /// [min, max], and if the ranges themselves are ordered and non-overlapping. rangesAreValid()75*9880d681SAndroid Build Coastguard Worker bool rangesAreValid() const { 76*9880d681SAndroid Build Coastguard Worker uint32_t Prev = 0; 77*9880d681SAndroid Build Coastguard Worker for (CharRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); 78*9880d681SAndroid Build Coastguard Worker I != E; ++I) { 79*9880d681SAndroid Build Coastguard Worker if (I != Ranges.begin() && Prev >= I->Lower) { 80*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Upper bound 0x"); 81*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs().write_hex(Prev)); 82*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << " should be less than succeeding lower bound 0x"); 83*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs().write_hex(I->Lower) << "\n"); 84*9880d681SAndroid Build Coastguard Worker return false; 85*9880d681SAndroid Build Coastguard Worker } 86*9880d681SAndroid Build Coastguard Worker if (I->Upper < I->Lower) { 87*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Upper bound 0x"); 88*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs().write_hex(I->Lower)); 89*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << " should not be less than lower bound 0x"); 90*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs().write_hex(I->Upper) << "\n"); 91*9880d681SAndroid Build Coastguard Worker return false; 92*9880d681SAndroid Build Coastguard Worker } 93*9880d681SAndroid Build Coastguard Worker Prev = I->Upper; 94*9880d681SAndroid Build Coastguard Worker } 95*9880d681SAndroid Build Coastguard Worker 96*9880d681SAndroid Build Coastguard Worker return true; 97*9880d681SAndroid Build Coastguard Worker } 98*9880d681SAndroid Build Coastguard Worker 99*9880d681SAndroid Build Coastguard Worker const CharRanges Ranges; 100*9880d681SAndroid Build Coastguard Worker }; 101*9880d681SAndroid Build Coastguard Worker 102*9880d681SAndroid Build Coastguard Worker #undef DEBUG_TYPE // "unicode" 103*9880d681SAndroid Build Coastguard Worker 104*9880d681SAndroid Build Coastguard Worker } // namespace sys 105*9880d681SAndroid Build Coastguard Worker } // namespace llvm 106*9880d681SAndroid Build Coastguard Worker 107*9880d681SAndroid Build Coastguard Worker 108*9880d681SAndroid Build Coastguard Worker #endif // LLVM_SUPPORT_UNICODECHARRANGES_H 109