xref: /aosp_15_r20/external/llvm/include/llvm/Support/UnicodeCharRanges.h (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===--- UnicodeCharRanges.h - Types and functions for character ranges ---===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker #ifndef LLVM_SUPPORT_UNICODECHARRANGES_H
10*9880d681SAndroid Build Coastguard Worker #define LLVM_SUPPORT_UNICODECHARRANGES_H
11*9880d681SAndroid Build Coastguard Worker 
12*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/ArrayRef.h"
13*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/SmallPtrSet.h"
14*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Compiler.h"
15*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
16*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Mutex.h"
17*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/MutexGuard.h"
18*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
19*9880d681SAndroid Build Coastguard Worker #include <algorithm>
20*9880d681SAndroid Build Coastguard Worker 
21*9880d681SAndroid Build Coastguard Worker namespace llvm {
22*9880d681SAndroid Build Coastguard Worker namespace sys {
23*9880d681SAndroid Build Coastguard Worker 
24*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "unicode"
25*9880d681SAndroid Build Coastguard Worker 
26*9880d681SAndroid Build Coastguard Worker /// \brief Represents a closed range of Unicode code points [Lower, Upper].
27*9880d681SAndroid Build Coastguard Worker struct UnicodeCharRange {
28*9880d681SAndroid Build Coastguard Worker   uint32_t Lower;
29*9880d681SAndroid Build Coastguard Worker   uint32_t Upper;
30*9880d681SAndroid Build Coastguard Worker };
31*9880d681SAndroid Build Coastguard Worker 
32*9880d681SAndroid Build Coastguard Worker inline bool operator<(uint32_t Value, UnicodeCharRange Range) {
33*9880d681SAndroid Build Coastguard Worker   return Value < Range.Lower;
34*9880d681SAndroid Build Coastguard Worker }
35*9880d681SAndroid Build Coastguard Worker inline bool operator<(UnicodeCharRange Range, uint32_t Value) {
36*9880d681SAndroid Build Coastguard Worker   return Range.Upper < Value;
37*9880d681SAndroid Build Coastguard Worker }
38*9880d681SAndroid Build Coastguard Worker 
39*9880d681SAndroid Build Coastguard Worker /// \brief Holds a reference to an ordered array of UnicodeCharRange and allows
40*9880d681SAndroid Build Coastguard Worker /// to quickly check if a code point is contained in the set represented by this
41*9880d681SAndroid Build Coastguard Worker /// array.
42*9880d681SAndroid Build Coastguard Worker class UnicodeCharSet {
43*9880d681SAndroid Build Coastguard Worker public:
44*9880d681SAndroid Build Coastguard Worker   typedef ArrayRef<UnicodeCharRange> CharRanges;
45*9880d681SAndroid Build Coastguard Worker 
46*9880d681SAndroid Build Coastguard Worker   /// \brief Constructs a UnicodeCharSet instance from an array of
47*9880d681SAndroid Build Coastguard Worker   /// UnicodeCharRanges.
48*9880d681SAndroid Build Coastguard Worker   ///
49*9880d681SAndroid Build Coastguard Worker   /// Array pointed by \p Ranges should have the lifetime at least as long as
50*9880d681SAndroid Build Coastguard Worker   /// the UnicodeCharSet instance, and should not change. Array is validated by
51*9880d681SAndroid Build Coastguard Worker   /// the constructor, so it makes sense to create as few UnicodeCharSet
52*9880d681SAndroid Build Coastguard Worker   /// instances per each array of ranges, as possible.
53*9880d681SAndroid Build Coastguard Worker #ifdef NDEBUG
54*9880d681SAndroid Build Coastguard Worker 
55*9880d681SAndroid Build Coastguard Worker   // FIXME: This could use constexpr + static_assert. This way we
56*9880d681SAndroid Build Coastguard Worker   // may get rid of NDEBUG in this header. Unfortunately there are some
57*9880d681SAndroid Build Coastguard Worker   // problems to get this working with MSVC 2013. Change this when
58*9880d681SAndroid Build Coastguard Worker   // the support for MSVC 2013 is dropped.
UnicodeCharSet(CharRanges Ranges)59*9880d681SAndroid Build Coastguard Worker   LLVM_CONSTEXPR UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {}
60*9880d681SAndroid Build Coastguard Worker #else
UnicodeCharSet(CharRanges Ranges)61*9880d681SAndroid Build Coastguard Worker   UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {
62*9880d681SAndroid Build Coastguard Worker     assert(rangesAreValid());
63*9880d681SAndroid Build Coastguard Worker   }
64*9880d681SAndroid Build Coastguard Worker #endif
65*9880d681SAndroid Build Coastguard Worker 
66*9880d681SAndroid Build Coastguard Worker   /// \brief Returns true if the character set contains the Unicode code point
67*9880d681SAndroid Build Coastguard Worker   /// \p C.
contains(uint32_t C)68*9880d681SAndroid Build Coastguard Worker   bool contains(uint32_t C) const {
69*9880d681SAndroid Build Coastguard Worker     return std::binary_search(Ranges.begin(), Ranges.end(), C);
70*9880d681SAndroid Build Coastguard Worker   }
71*9880d681SAndroid Build Coastguard Worker 
72*9880d681SAndroid Build Coastguard Worker private:
73*9880d681SAndroid Build Coastguard Worker   /// \brief Returns true if each of the ranges is a proper closed range
74*9880d681SAndroid Build Coastguard Worker   /// [min, max], and if the ranges themselves are ordered and non-overlapping.
rangesAreValid()75*9880d681SAndroid Build Coastguard Worker   bool rangesAreValid() const {
76*9880d681SAndroid Build Coastguard Worker     uint32_t Prev = 0;
77*9880d681SAndroid Build Coastguard Worker     for (CharRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
78*9880d681SAndroid Build Coastguard Worker          I != E; ++I) {
79*9880d681SAndroid Build Coastguard Worker       if (I != Ranges.begin() && Prev >= I->Lower) {
80*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "Upper bound 0x");
81*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs().write_hex(Prev));
82*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << " should be less than succeeding lower bound 0x");
83*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs().write_hex(I->Lower) << "\n");
84*9880d681SAndroid Build Coastguard Worker         return false;
85*9880d681SAndroid Build Coastguard Worker       }
86*9880d681SAndroid Build Coastguard Worker       if (I->Upper < I->Lower) {
87*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "Upper bound 0x");
88*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs().write_hex(I->Lower));
89*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << " should not be less than lower bound 0x");
90*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs().write_hex(I->Upper) << "\n");
91*9880d681SAndroid Build Coastguard Worker         return false;
92*9880d681SAndroid Build Coastguard Worker       }
93*9880d681SAndroid Build Coastguard Worker       Prev = I->Upper;
94*9880d681SAndroid Build Coastguard Worker     }
95*9880d681SAndroid Build Coastguard Worker 
96*9880d681SAndroid Build Coastguard Worker     return true;
97*9880d681SAndroid Build Coastguard Worker   }
98*9880d681SAndroid Build Coastguard Worker 
99*9880d681SAndroid Build Coastguard Worker   const CharRanges Ranges;
100*9880d681SAndroid Build Coastguard Worker };
101*9880d681SAndroid Build Coastguard Worker 
102*9880d681SAndroid Build Coastguard Worker #undef DEBUG_TYPE // "unicode"
103*9880d681SAndroid Build Coastguard Worker 
104*9880d681SAndroid Build Coastguard Worker } // namespace sys
105*9880d681SAndroid Build Coastguard Worker } // namespace llvm
106*9880d681SAndroid Build Coastguard Worker 
107*9880d681SAndroid Build Coastguard Worker 
108*9880d681SAndroid Build Coastguard Worker #endif // LLVM_SUPPORT_UNICODECHARRANGES_H
109