1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 6 #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 7 8 #include <stddef.h> 9 10 #include <string> 11 #include <vector> 12 13 #include "base/base_export.h" 14 #include "base/strings/string16.h" 15 #include "base/strings/string_piece.h" 16 17 namespace base { 18 19 // A helper class and associated data structures to adjust offsets into a 20 // string in response to various adjustments one might do to that string 21 // (e.g., eliminating a range). For details on offsets, see the comments by 22 // the AdjustOffsets() function below. 23 class BASE_EXPORT OffsetAdjuster { 24 public: 25 struct BASE_EXPORT Adjustment { 26 Adjustment(size_t original_offset, 27 size_t original_length, 28 size_t output_length); 29 30 size_t original_offset; 31 size_t original_length; 32 size_t output_length; 33 }; 34 typedef std::vector<Adjustment> Adjustments; 35 36 // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments 37 // recorded in |adjustments|. Adjusted offsets greater than |limit| will be 38 // set to string16::npos. 39 // 40 // Offsets represents insertion/selection points between characters: if |src| 41 // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the 42 // end of the string. Valid input offsets range from 0 to |src_len|. On 43 // exit, each offset will have been modified to point at the same logical 44 // position in the output string. If an offset cannot be successfully 45 // adjusted (e.g., because it points into the middle of a multibyte sequence), 46 // it will be set to string16::npos. 47 static void AdjustOffsets(const Adjustments& adjustments, 48 std::vector<size_t>* offsets_for_adjustment, 49 size_t limit = string16::npos); 50 51 // Adjusts the single |offset| to reflect the adjustments recorded in 52 // |adjustments|. 53 static void AdjustOffset(const Adjustments& adjustments, 54 size_t* offset, 55 size_t limit = string16::npos); 56 57 // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse 58 // of the adjustments recorded in |adjustments|. In other words, the offsets 59 // provided represent offsets into an adjusted string and the caller wants 60 // to know the offsets they correspond to in the original string. If an 61 // offset cannot be successfully unadjusted (e.g., because it points into 62 // the middle of a multibyte sequence), it will be set to string16::npos. 63 static void UnadjustOffsets(const Adjustments& adjustments, 64 std::vector<size_t>* offsets_for_unadjustment); 65 66 // Adjusts the single |offset| to reflect the reverse of the adjustments 67 // recorded in |adjustments|. 68 static void UnadjustOffset(const Adjustments& adjustments, 69 size_t* offset); 70 71 // Combines two sequential sets of adjustments, storing the combined revised 72 // adjustments in |adjustments_on_adjusted_string|. That is, suppose a 73 // string was altered in some way, with the alterations recorded as 74 // adjustments in |first_adjustments|. Then suppose the resulting string is 75 // further altered, with the alterations recorded as adjustments scored in 76 // |adjustments_on_adjusted_string|, with the offsets recorded in these 77 // adjustments being with respect to the intermediate string. This function 78 // combines the two sets of adjustments into one, storing the result in 79 // |adjustments_on_adjusted_string|, whose offsets are correct with respect 80 // to the original string. 81 // 82 // Assumes both parameters are sorted by increasing offset. 83 // 84 // WARNING: Only supports |first_adjustments| that involve collapsing ranges 85 // of text, not expanding ranges. 86 static void MergeSequentialAdjustments( 87 const Adjustments& first_adjustments, 88 Adjustments* adjustments_on_adjusted_string); 89 }; 90 91 // Like the conversions in utf_string_conversions.h, but also fills in an 92 // |adjustments| parameter that reflects the alterations done to the string. 93 // It may be NULL. 94 BASE_EXPORT bool UTF8ToUTF16WithAdjustments( 95 const char* src, 96 size_t src_len, 97 string16* output, 98 base::OffsetAdjuster::Adjustments* adjustments); 99 BASE_EXPORT string16 UTF8ToUTF16WithAdjustments( 100 const base::StringPiece& utf8, 101 base::OffsetAdjuster::Adjustments* adjustments); 102 // As above, but instead internally examines the adjustments and applies them 103 // to |offsets_for_adjustment|. Input offsets greater than the length of the 104 // input string will be set to string16::npos. See comments by AdjustOffsets(). 105 BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets( 106 const base::StringPiece& utf8, 107 std::vector<size_t>* offsets_for_adjustment); 108 BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets( 109 const base::StringPiece16& utf16, 110 std::vector<size_t>* offsets_for_adjustment); 111 112 } // namespace base 113 114 #endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ 115