1*0e209d39SAndroid Build Coastguard Worker // © 2018 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker 4*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 5*0e209d39SAndroid Build Coastguard Worker 6*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_FORMATTING 7*0e209d39SAndroid Build Coastguard Worker #ifndef __NUMPARSE_STRINGSEGMENT_H__ 8*0e209d39SAndroid Build Coastguard Worker #define __NUMPARSE_STRINGSEGMENT_H__ 9*0e209d39SAndroid Build Coastguard Worker 10*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h" 11*0e209d39SAndroid Build Coastguard Worker #include "unicode/uniset.h" 12*0e209d39SAndroid Build Coastguard Worker 13*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 14*0e209d39SAndroid Build Coastguard Worker 15*0e209d39SAndroid Build Coastguard Worker 16*0e209d39SAndroid Build Coastguard Worker /** 17*0e209d39SAndroid Build Coastguard Worker * A mutable UnicodeString wrapper with a variable offset and length and 18*0e209d39SAndroid Build Coastguard Worker * support for case folding. The charAt, length, and subSequence methods all 19*0e209d39SAndroid Build Coastguard Worker * operate relative to the fixed offset into the UnicodeString. 20*0e209d39SAndroid Build Coastguard Worker * 21*0e209d39SAndroid Build Coastguard Worker * Intended to be useful for parsing. 22*0e209d39SAndroid Build Coastguard Worker * 23*0e209d39SAndroid Build Coastguard Worker * CAUTION: Since this class is mutable, it must not be used anywhere that an 24*0e209d39SAndroid Build Coastguard Worker * immutable object is required, like in a cache or as the key of a hash map. 25*0e209d39SAndroid Build Coastguard Worker * 26*0e209d39SAndroid Build Coastguard Worker * @author sffc (Shane Carr) 27*0e209d39SAndroid Build Coastguard Worker */ 28*0e209d39SAndroid Build Coastguard Worker // Exported as U_I18N_API for tests 29*0e209d39SAndroid Build Coastguard Worker class U_I18N_API StringSegment : public UMemory { 30*0e209d39SAndroid Build Coastguard Worker public: 31*0e209d39SAndroid Build Coastguard Worker StringSegment(const UnicodeString& str, bool ignoreCase); 32*0e209d39SAndroid Build Coastguard Worker 33*0e209d39SAndroid Build Coastguard Worker int32_t getOffset() const; 34*0e209d39SAndroid Build Coastguard Worker 35*0e209d39SAndroid Build Coastguard Worker void setOffset(int32_t start); 36*0e209d39SAndroid Build Coastguard Worker 37*0e209d39SAndroid Build Coastguard Worker /** 38*0e209d39SAndroid Build Coastguard Worker * Equivalent to <code>setOffset(getOffset()+delta)</code>. 39*0e209d39SAndroid Build Coastguard Worker * 40*0e209d39SAndroid Build Coastguard Worker * <p> 41*0e209d39SAndroid Build Coastguard Worker * This method is usually called by a Matcher to register that a char was consumed. If the char is 42*0e209d39SAndroid Build Coastguard Worker * strong (it usually is, except for things like whitespace), follow this with a call to 43*0e209d39SAndroid Build Coastguard Worker * {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method. 44*0e209d39SAndroid Build Coastguard Worker */ 45*0e209d39SAndroid Build Coastguard Worker void adjustOffset(int32_t delta); 46*0e209d39SAndroid Build Coastguard Worker 47*0e209d39SAndroid Build Coastguard Worker /** 48*0e209d39SAndroid Build Coastguard Worker * Adjusts the offset by the width of the current code point, either 1 or 2 chars. 49*0e209d39SAndroid Build Coastguard Worker */ 50*0e209d39SAndroid Build Coastguard Worker void adjustOffsetByCodePoint(); 51*0e209d39SAndroid Build Coastguard Worker 52*0e209d39SAndroid Build Coastguard Worker void setLength(int32_t length); 53*0e209d39SAndroid Build Coastguard Worker 54*0e209d39SAndroid Build Coastguard Worker void resetLength(); 55*0e209d39SAndroid Build Coastguard Worker 56*0e209d39SAndroid Build Coastguard Worker int32_t length() const; 57*0e209d39SAndroid Build Coastguard Worker 58*0e209d39SAndroid Build Coastguard Worker char16_t charAt(int32_t index) const; 59*0e209d39SAndroid Build Coastguard Worker 60*0e209d39SAndroid Build Coastguard Worker UChar32 codePointAt(int32_t index) const; 61*0e209d39SAndroid Build Coastguard Worker 62*0e209d39SAndroid Build Coastguard Worker UnicodeString toUnicodeString() const; 63*0e209d39SAndroid Build Coastguard Worker 64*0e209d39SAndroid Build Coastguard Worker UnicodeString toTempUnicodeString() const; 65*0e209d39SAndroid Build Coastguard Worker 66*0e209d39SAndroid Build Coastguard Worker /** 67*0e209d39SAndroid Build Coastguard Worker * Returns the first code point in the string segment, or -1 if the string starts with an invalid 68*0e209d39SAndroid Build Coastguard Worker * code point. 69*0e209d39SAndroid Build Coastguard Worker * 70*0e209d39SAndroid Build Coastguard Worker * <p> 71*0e209d39SAndroid Build Coastguard Worker * <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles case 72*0e209d39SAndroid Build Coastguard Worker * folding logic, instead of this method. 73*0e209d39SAndroid Build Coastguard Worker */ 74*0e209d39SAndroid Build Coastguard Worker UChar32 getCodePoint() const; 75*0e209d39SAndroid Build Coastguard Worker 76*0e209d39SAndroid Build Coastguard Worker /** 77*0e209d39SAndroid Build Coastguard Worker * Returns true if the first code point of this StringSegment equals the given code point. 78*0e209d39SAndroid Build Coastguard Worker * 79*0e209d39SAndroid Build Coastguard Worker * <p> 80*0e209d39SAndroid Build Coastguard Worker * This method will perform case folding if case folding is enabled for the parser. 81*0e209d39SAndroid Build Coastguard Worker */ 82*0e209d39SAndroid Build Coastguard Worker bool startsWith(UChar32 otherCp) const; 83*0e209d39SAndroid Build Coastguard Worker 84*0e209d39SAndroid Build Coastguard Worker /** 85*0e209d39SAndroid Build Coastguard Worker * Returns true if the first code point of this StringSegment is in the given UnicodeSet. 86*0e209d39SAndroid Build Coastguard Worker */ 87*0e209d39SAndroid Build Coastguard Worker bool startsWith(const UnicodeSet& uniset) const; 88*0e209d39SAndroid Build Coastguard Worker 89*0e209d39SAndroid Build Coastguard Worker /** 90*0e209d39SAndroid Build Coastguard Worker * Returns true if there is at least one code point of overlap between this StringSegment and the 91*0e209d39SAndroid Build Coastguard Worker * given UnicodeString. 92*0e209d39SAndroid Build Coastguard Worker */ 93*0e209d39SAndroid Build Coastguard Worker bool startsWith(const UnicodeString& other) const; 94*0e209d39SAndroid Build Coastguard Worker 95*0e209d39SAndroid Build Coastguard Worker /** 96*0e209d39SAndroid Build Coastguard Worker * Returns the length of the prefix shared by this StringSegment and the given UnicodeString. For 97*0e209d39SAndroid Build Coastguard Worker * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2, 98*0e209d39SAndroid Build Coastguard Worker * since the first 2 characters are the same. 99*0e209d39SAndroid Build Coastguard Worker * 100*0e209d39SAndroid Build Coastguard Worker * <p> 101*0e209d39SAndroid Build Coastguard Worker * This method only returns offsets along code point boundaries. 102*0e209d39SAndroid Build Coastguard Worker * 103*0e209d39SAndroid Build Coastguard Worker * <p> 104*0e209d39SAndroid Build Coastguard Worker * This method will perform case folding if case folding was enabled in the constructor. 105*0e209d39SAndroid Build Coastguard Worker * 106*0e209d39SAndroid Build Coastguard Worker * <p> 107*0e209d39SAndroid Build Coastguard Worker * IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check. 108*0e209d39SAndroid Build Coastguard Worker */ 109*0e209d39SAndroid Build Coastguard Worker int32_t getCommonPrefixLength(const UnicodeString& other); 110*0e209d39SAndroid Build Coastguard Worker 111*0e209d39SAndroid Build Coastguard Worker /** 112*0e209d39SAndroid Build Coastguard Worker * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is 113*0e209d39SAndroid Build Coastguard Worker * enabled for the parser. 114*0e209d39SAndroid Build Coastguard Worker */ 115*0e209d39SAndroid Build Coastguard Worker int32_t getCaseSensitivePrefixLength(const UnicodeString& other); 116*0e209d39SAndroid Build Coastguard Worker 117*0e209d39SAndroid Build Coastguard Worker bool operator==(const UnicodeString& other) const; 118*0e209d39SAndroid Build Coastguard Worker 119*0e209d39SAndroid Build Coastguard Worker private: 120*0e209d39SAndroid Build Coastguard Worker const UnicodeString& fStr; 121*0e209d39SAndroid Build Coastguard Worker int32_t fStart; 122*0e209d39SAndroid Build Coastguard Worker int32_t fEnd; 123*0e209d39SAndroid Build Coastguard Worker bool fFoldCase; 124*0e209d39SAndroid Build Coastguard Worker 125*0e209d39SAndroid Build Coastguard Worker int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase); 126*0e209d39SAndroid Build Coastguard Worker 127*0e209d39SAndroid Build Coastguard Worker static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase); 128*0e209d39SAndroid Build Coastguard Worker }; 129*0e209d39SAndroid Build Coastguard Worker 130*0e209d39SAndroid Build Coastguard Worker 131*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 132*0e209d39SAndroid Build Coastguard Worker 133*0e209d39SAndroid Build Coastguard Worker #endif //__NUMPARSE_STRINGSEGMENT_H__ 134*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_FORMATTING */ 135