xref: /aosp_15_r20/external/cronet/base/strings/utf_offset_string_conversions.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
6 #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
7 
8 #include <stddef.h>
9 
10 #include <string>
11 #include <vector>
12 
13 #include "base/base_export.h"
14 #include "base/strings/string_piece.h"
15 
16 namespace base {
17 
18 // A helper class and associated data structures to adjust offsets into a
19 // string in response to various adjustments one might do to that string
20 // (e.g., eliminating a range).  For details on offsets, see the comments by
21 // the AdjustOffsets() function below.
22 class BASE_EXPORT OffsetAdjuster {
23  public:
24   struct BASE_EXPORT Adjustment {
25     Adjustment(size_t original_offset,
26                size_t original_length,
27                size_t output_length);
28 
29     size_t original_offset;
30     size_t original_length;
31     size_t output_length;
32   };
33   typedef std::vector<Adjustment> Adjustments;
34 
35   // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
36   // recorded in |adjustments|.  Adjusted offsets greater than |limit| will be
37   // set to std::u16string::npos.
38   //
39   // Offsets represents insertion/selection points between characters: if |src|
40   // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
41   // end of the string.  Valid input offsets range from 0 to |src_len|.  On
42   // exit, each offset will have been modified to point at the same logical
43   // position in the output string.  If an offset cannot be successfully
44   // adjusted (e.g., because it points into the middle of a multibyte sequence),
45   // it will be set to std::u16string::npos.
46   static void AdjustOffsets(const Adjustments& adjustments,
47                             std::vector<size_t>* offsets_for_adjustment,
48                             size_t limit = std::u16string::npos);
49 
50   // Adjusts the single |offset| to reflect the adjustments recorded in
51   // |adjustments|.
52   static void AdjustOffset(const Adjustments& adjustments,
53                            size_t* offset,
54                            size_t limit = std::u16string::npos);
55 
56   // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
57   // of the adjustments recorded in |adjustments|.  In other words, the offsets
58   // provided represent offsets into an adjusted string and the caller wants
59   // to know the offsets they correspond to in the original string.  If an
60   // offset cannot be successfully unadjusted (e.g., because it points into
61   // the middle of a multibyte sequence), it will be set to
62   // std::u16string::npos.
63   static void UnadjustOffsets(const Adjustments& adjustments,
64                               std::vector<size_t>* offsets_for_unadjustment);
65 
66   // Adjusts the single |offset| to reflect the reverse of the adjustments
67   // recorded in |adjustments|.
68   static void UnadjustOffset(const Adjustments& adjustments,
69                              size_t* offset);
70 
71   // Combines two sequential sets of adjustments, storing the combined revised
72   // adjustments in |adjustments_on_adjusted_string|.  That is, suppose a
73   // string was altered in some way, with the alterations recorded as
74   // adjustments in |first_adjustments|.  Then suppose the resulting string is
75   // further altered, with the alterations recorded as adjustments scored in
76   // |adjustments_on_adjusted_string|, with the offsets recorded in these
77   // adjustments being with respect to the intermediate string.  This function
78   // combines the two sets of adjustments into one, storing the result in
79   // |adjustments_on_adjusted_string|, whose offsets are correct with respect
80   // to the original string.
81   //
82   // Assumes both parameters are sorted by increasing offset.
83   //
84   // WARNING: Only supports |first_adjustments| that involve collapsing ranges
85   // of text, not expanding ranges.
86   static void MergeSequentialAdjustments(
87       const Adjustments& first_adjustments,
88       Adjustments* adjustments_on_adjusted_string);
89 };
90 
91 // Like the conversions in utf_string_conversions.h, but also fills in an
92 // |adjustments| parameter that reflects the alterations done to the string.
93 // It may be NULL.
94 BASE_EXPORT bool UTF8ToUTF16WithAdjustments(
95     const char* src,
96     size_t src_len,
97     std::u16string* output,
98     base::OffsetAdjuster::Adjustments* adjustments);
99 [[nodiscard]] BASE_EXPORT std::u16string UTF8ToUTF16WithAdjustments(
100     const base::StringPiece& utf8,
101     base::OffsetAdjuster::Adjustments* adjustments);
102 // As above, but instead internally examines the adjustments and applies them
103 // to |offsets_for_adjustment|.  Input offsets greater than the length of the
104 // input string will be set to std::u16string::npos.  See comments by
105 // AdjustOffsets().
106 BASE_EXPORT std::u16string UTF8ToUTF16AndAdjustOffsets(
107     const base::StringPiece& utf8,
108     std::vector<size_t>* offsets_for_adjustment);
109 BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
110     const base::StringPiece16& utf16,
111     std::vector<size_t>* offsets_for_adjustment);
112 
113 }  // namespace base
114 
115 #endif  // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
116