1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1999-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  utf16.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999sep09
16 *   created by: Markus W. Scherer
17 */
18 
19 /**
20  * @addtogroup icu4c ICU4C
21  * @{
22  * \file
23  * \brief C API: 16-bit Unicode handling macros
24  *
25  * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
26  *
27  * For more information see utf.h and the ICU User Guide Strings chapter
28  * (https://unicode-org.github.io/icu/userguide/strings).
29  *
30  * <em>Usage:</em>
31  * ICU coding guidelines for if() statements should be followed when using these macros.
32  * Compound statements (curly braces {}) must be used  for if-else-while...
33  * bodies and all macro statements should be terminated with semicolon.
34  */
35 
36 #ifndef __UTF16_H__
37 #define __UTF16_H__
38 
39 #include <stdbool.h>
40 #include "unicode/umachine.h"
41 #ifndef __UTF_H__
42 #   include "unicode/utf.h"
43 #endif
44 
45 /* single-code point definitions -------------------------------------------- */
46 
47 /**
48  * Does this code unit alone encode a code point (BMP, not a surrogate)?
49  * @param c 16-bit code unit
50  * @return true or false
51  * \xrefitem stable "Stable" "Stable List" ICU 2.4
52  */
53 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
54 
55 /**
56  * Is this code unit a lead surrogate (U+d800..U+dbff)?
57  * @param c 16-bit code unit
58  * @return true or false
59  * \xrefitem stable "Stable" "Stable List" ICU 2.4
60  */
61 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
62 
63 /**
64  * Is this code unit a trail surrogate (U+dc00..U+dfff)?
65  * @param c 16-bit code unit
66  * @return true or false
67  * \xrefitem stable "Stable" "Stable List" ICU 2.4
68  */
69 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
70 
71 /**
72  * Is this code unit a surrogate (U+d800..U+dfff)?
73  * @param c 16-bit code unit
74  * @return true or false
75  * \xrefitem stable "Stable" "Stable List" ICU 2.4
76  */
77 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
78 
79 /**
80  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
81  * is it a lead surrogate?
82  * @param c 16-bit code unit
83  * @return true or false
84  * \xrefitem stable "Stable" "Stable List" ICU 2.4
85  */
86 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
87 
88 /**
89  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
90  * is it a trail surrogate?
91  * @param c 16-bit code unit
92  * @return true or false
93  * \xrefitem stable "Stable" "Stable List" ICU 4.2
94  */
95 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
96 
97 /**
98  * Helper constant for U16_GET_SUPPLEMENTARY.
99  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
100  */
101 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
102 
103 /**
104  * Get a supplementary code point value (U+10000..U+10ffff)
105  * from its lead and trail surrogates.
106  * The result is undefined if the input values are not
107  * lead and trail surrogates.
108  *
109  * @param lead lead surrogate (U+d800..U+dbff)
110  * @param trail trail surrogate (U+dc00..U+dfff)
111  * @return supplementary code point (U+10000..U+10ffff)
112  * \xrefitem stable "Stable" "Stable List" ICU 2.4
113  */
114 #define U16_GET_SUPPLEMENTARY(lead, trail) \
115     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
116 
117 
118 /**
119  * Get the lead surrogate (0xd800..0xdbff) for a
120  * supplementary code point (0x10000..0x10ffff).
121  * @param supplementary 32-bit code point (U+10000..U+10ffff)
122  * @return lead surrogate (U+d800..U+dbff) for supplementary
123  * \xrefitem stable "Stable" "Stable List" ICU 2.4
124  */
125 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
126 
127 /**
128  * Get the trail surrogate (0xdc00..0xdfff) for a
129  * supplementary code point (0x10000..0x10ffff).
130  * @param supplementary 32-bit code point (U+10000..U+10ffff)
131  * @return trail surrogate (U+dc00..U+dfff) for supplementary
132  * \xrefitem stable "Stable" "Stable List" ICU 2.4
133  */
134 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
135 
136 /**
137  * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
138  * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
139  * @param c 32-bit code point
140  * @return 1 or 2
141  * \xrefitem stable "Stable" "Stable List" ICU 2.4
142  */
143 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
144 
145 /**
146  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
147  * @return 2
148  * \xrefitem stable "Stable" "Stable List" ICU 2.4
149  */
150 #define U16_MAX_LENGTH 2
151 
152 /**
153  * Get a code point from a string at a random-access offset,
154  * without changing the offset.
155  * "Unsafe" macro, assumes well-formed UTF-16.
156  *
157  * The offset may point to either the lead or trail surrogate unit
158  * for a supplementary code point, in which case the macro will read
159  * the adjacent matching surrogate as well.
160  * The result is undefined if the offset points to a single, unpaired surrogate.
161  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
162  *
163  * @param s const UChar * string
164  * @param i string offset
165  * @param c output UChar32 variable
166  * @see U16_GET
167  * \xrefitem stable "Stable" "Stable List" ICU 2.4
168  */
169 #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
170     (c)=(s)[i]; \
171     if(U16_IS_SURROGATE(c)) { \
172         if(U16_IS_SURROGATE_LEAD(c)) { \
173             (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
174         } else { \
175             (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
176         } \
177     } \
178 } UPRV_BLOCK_MACRO_END
179 
180 /**
181  * Get a code point from a string at a random-access offset,
182  * without changing the offset.
183  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
184  *
185  * The offset may point to either the lead or trail surrogate unit
186  * for a supplementary code point, in which case the macro will read
187  * the adjacent matching surrogate as well.
188  *
189  * The length can be negative for a NUL-terminated string.
190  *
191  * If the offset points to a single, unpaired surrogate, then
192  * c is set to that unpaired surrogate.
193  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
194  *
195  * @param s const UChar * string
196  * @param start starting string offset (usually 0)
197  * @param i string offset, must be start<=i<length
198  * @param length string length
199  * @param c output UChar32 variable
200  * @see U16_GET_UNSAFE
201  * \xrefitem stable "Stable" "Stable List" ICU 2.4
202  */
203 #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
204     (c)=(s)[i]; \
205     if(U16_IS_SURROGATE(c)) { \
206         uint16_t __c2; \
207         if(U16_IS_SURROGATE_LEAD(c)) { \
208             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
209                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
210             } \
211         } else { \
212             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
213                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
214             } \
215         } \
216     } \
217 } UPRV_BLOCK_MACRO_END
218 
219 /**
220  * Get a code point from a string at a random-access offset,
221  * without changing the offset.
222  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
223  *
224  * The offset may point to either the lead or trail surrogate unit
225  * for a supplementary code point, in which case the macro will read
226  * the adjacent matching surrogate as well.
227  *
228  * The length can be negative for a NUL-terminated string.
229  *
230  * If the offset points to a single, unpaired surrogate, then
231  * c is set to U+FFFD.
232  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
233  *
234  * @param s const UChar * string
235  * @param start starting string offset (usually 0)
236  * @param i string offset, must be start<=i<length
237  * @param length string length
238  * @param c output UChar32 variable
239  * @see U16_GET_UNSAFE
240  * \xrefitem stable "Stable" "Stable List" ICU 60
241  */
242 #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
243     (c)=(s)[i]; \
244     if(U16_IS_SURROGATE(c)) { \
245         uint16_t __c2; \
246         if(U16_IS_SURROGATE_LEAD(c)) { \
247             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
248                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
249             } else { \
250                 (c)=0xfffd; \
251             } \
252         } else { \
253             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
254                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
255             } else { \
256                 (c)=0xfffd; \
257             } \
258         } \
259     } \
260 } UPRV_BLOCK_MACRO_END
261 
262 /* definitions with forward iteration --------------------------------------- */
263 
264 /**
265  * Get a code point from a string at a code point boundary offset,
266  * and advance the offset to the next code point boundary.
267  * (Post-incrementing forward iteration.)
268  * "Unsafe" macro, assumes well-formed UTF-16.
269  *
270  * The offset may point to the lead surrogate unit
271  * for a supplementary code point, in which case the macro will read
272  * the following trail surrogate as well.
273  * If the offset points to a trail surrogate, then that itself
274  * will be returned as the code point.
275  * The result is undefined if the offset points to a single, unpaired lead surrogate.
276  *
277  * @param s const UChar * string
278  * @param i string offset
279  * @param c output UChar32 variable
280  * @see U16_NEXT
281  * \xrefitem stable "Stable" "Stable List" ICU 2.4
282  */
283 #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
284     (c)=(s)[(i)++]; \
285     if(U16_IS_LEAD(c)) { \
286         (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
287     } \
288 } UPRV_BLOCK_MACRO_END
289 
290 /**
291  * Get a code point from a string at a code point boundary offset,
292  * and advance the offset to the next code point boundary.
293  * (Post-incrementing forward iteration.)
294  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
295  *
296  * The length can be negative for a NUL-terminated string.
297  *
298  * The offset may point to the lead surrogate unit
299  * for a supplementary code point, in which case the macro will read
300  * the following trail surrogate as well.
301  * If the offset points to a trail surrogate or
302  * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
303  *
304  * @param s const UChar * string
305  * @param i string offset, must be i<length
306  * @param length string length
307  * @param c output UChar32 variable
308  * @see U16_NEXT_UNSAFE
309  * \xrefitem stable "Stable" "Stable List" ICU 2.4
310  */
311 #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
312     (c)=(s)[(i)++]; \
313     if(U16_IS_LEAD(c)) { \
314         uint16_t __c2; \
315         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
316             ++(i); \
317             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
318         } \
319     } \
320 } UPRV_BLOCK_MACRO_END
321 
322 /**
323  * Get a code point from a string at a code point boundary offset,
324  * and advance the offset to the next code point boundary.
325  * (Post-incrementing forward iteration.)
326  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
327  *
328  * The length can be negative for a NUL-terminated string.
329  *
330  * The offset may point to the lead surrogate unit
331  * for a supplementary code point, in which case the macro will read
332  * the following trail surrogate as well.
333  * If the offset points to a trail surrogate or
334  * to a single, unpaired lead surrogate, then c is set to U+FFFD.
335  *
336  * @param s const UChar * string
337  * @param i string offset, must be i<length
338  * @param length string length
339  * @param c output UChar32 variable
340  * @see U16_NEXT_UNSAFE
341  * \xrefitem stable "Stable" "Stable List" ICU 60
342  */
343 #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
344     (c)=(s)[(i)++]; \
345     if(U16_IS_SURROGATE(c)) { \
346         uint16_t __c2; \
347         if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
348             ++(i); \
349             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
350         } else { \
351             (c)=0xfffd; \
352         } \
353     } \
354 } UPRV_BLOCK_MACRO_END
355 
356 /**
357  * Append a code point to a string, overwriting 1 or 2 code units.
358  * The offset points to the current end of the string contents
359  * and is advanced (post-increment).
360  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
361  * Otherwise, the result is undefined.
362  *
363  * @param s const UChar * string buffer
364  * @param i string offset
365  * @param c code point to append
366  * @see U16_APPEND
367  * \xrefitem stable "Stable" "Stable List" ICU 2.4
368  */
369 #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
370     if((uint32_t)(c)<=0xffff) { \
371         (s)[(i)++]=(uint16_t)(c); \
372     } else { \
373         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
374         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
375     } \
376 } UPRV_BLOCK_MACRO_END
377 
378 /**
379  * Append a code point to a string, overwriting 1 or 2 code units.
380  * The offset points to the current end of the string contents
381  * and is advanced (post-increment).
382  * "Safe" macro, checks for a valid code point.
383  * If a surrogate pair is written, checks for sufficient space in the string.
384  * If the code point is not valid or a trail surrogate does not fit,
385  * then isError is set to true.
386  *
387  * @param s const UChar * string buffer
388  * @param i string offset, must be i<capacity
389  * @param capacity size of the string buffer
390  * @param c code point to append
391  * @param isError output UBool set to true if an error occurs, otherwise not modified
392  * @see U16_APPEND_UNSAFE
393  * \xrefitem stable "Stable" "Stable List" ICU 2.4
394  */
395 #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
396     if((uint32_t)(c)<=0xffff) { \
397         (s)[(i)++]=(uint16_t)(c); \
398     } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
399         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
400         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
401     } else /* c>0x10ffff or not enough space */ { \
402         (isError)=true; \
403     } \
404 } UPRV_BLOCK_MACRO_END
405 
406 /**
407  * Advance the string offset from one code point boundary to the next.
408  * (Post-incrementing iteration.)
409  * "Unsafe" macro, assumes well-formed UTF-16.
410  *
411  * @param s const UChar * string
412  * @param i string offset
413  * @see U16_FWD_1
414  * \xrefitem stable "Stable" "Stable List" ICU 2.4
415  */
416 #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
417     if(U16_IS_LEAD((s)[(i)++])) { \
418         ++(i); \
419     } \
420 } UPRV_BLOCK_MACRO_END
421 
422 /**
423  * Advance the string offset from one code point boundary to the next.
424  * (Post-incrementing iteration.)
425  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
426  *
427  * The length can be negative for a NUL-terminated string.
428  *
429  * @param s const UChar * string
430  * @param i string offset, must be i<length
431  * @param length string length
432  * @see U16_FWD_1_UNSAFE
433  * \xrefitem stable "Stable" "Stable List" ICU 2.4
434  */
435 #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
436     if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
437         ++(i); \
438     } \
439 } UPRV_BLOCK_MACRO_END
440 
441 /**
442  * Advance the string offset from one code point boundary to the n-th next one,
443  * i.e., move forward by n code points.
444  * (Post-incrementing iteration.)
445  * "Unsafe" macro, assumes well-formed UTF-16.
446  *
447  * @param s const UChar * string
448  * @param i string offset
449  * @param n number of code points to skip
450  * @see U16_FWD_N
451  * \xrefitem stable "Stable" "Stable List" ICU 2.4
452  */
453 #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
454     int32_t __N=(n); \
455     while(__N>0) { \
456         U16_FWD_1_UNSAFE(s, i); \
457         --__N; \
458     } \
459 } UPRV_BLOCK_MACRO_END
460 
461 /**
462  * Advance the string offset from one code point boundary to the n-th next one,
463  * i.e., move forward by n code points.
464  * (Post-incrementing iteration.)
465  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
466  *
467  * The length can be negative for a NUL-terminated string.
468  *
469  * @param s const UChar * string
470  * @param i int32_t string offset, must be i<length
471  * @param length int32_t string length
472  * @param n number of code points to skip
473  * @see U16_FWD_N_UNSAFE
474  * \xrefitem stable "Stable" "Stable List" ICU 2.4
475  */
476 #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
477     int32_t __N=(n); \
478     while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
479         U16_FWD_1(s, i, length); \
480         --__N; \
481     } \
482 } UPRV_BLOCK_MACRO_END
483 
484 /**
485  * Adjust a random-access offset to a code point boundary
486  * at the start of a code point.
487  * If the offset points to the trail surrogate of a surrogate pair,
488  * then the offset is decremented.
489  * Otherwise, it is not modified.
490  * "Unsafe" macro, assumes well-formed UTF-16.
491  *
492  * @param s const UChar * string
493  * @param i string offset
494  * @see U16_SET_CP_START
495  * \xrefitem stable "Stable" "Stable List" ICU 2.4
496  */
497 #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
498     if(U16_IS_TRAIL((s)[i])) { \
499         --(i); \
500     } \
501 } UPRV_BLOCK_MACRO_END
502 
503 /**
504  * Adjust a random-access offset to a code point boundary
505  * at the start of a code point.
506  * If the offset points to the trail surrogate of a surrogate pair,
507  * then the offset is decremented.
508  * Otherwise, it is not modified.
509  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
510  *
511  * @param s const UChar * string
512  * @param start starting string offset (usually 0)
513  * @param i string offset, must be start<=i
514  * @see U16_SET_CP_START_UNSAFE
515  * \xrefitem stable "Stable" "Stable List" ICU 2.4
516  */
517 #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
518     if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
519         --(i); \
520     } \
521 } UPRV_BLOCK_MACRO_END
522 
523 /* definitions with backward iteration -------------------------------------- */
524 
525 /**
526  * Move the string offset from one code point boundary to the previous one
527  * and get the code point between them.
528  * (Pre-decrementing backward iteration.)
529  * "Unsafe" macro, assumes well-formed UTF-16.
530  *
531  * The input offset may be the same as the string length.
532  * If the offset is behind a trail surrogate unit
533  * for a supplementary code point, then the macro will read
534  * the preceding lead surrogate as well.
535  * If the offset is behind a lead surrogate, then that itself
536  * will be returned as the code point.
537  * The result is undefined if the offset is behind a single, unpaired trail surrogate.
538  *
539  * @param s const UChar * string
540  * @param i string offset
541  * @param c output UChar32 variable
542  * @see U16_PREV
543  * \xrefitem stable "Stable" "Stable List" ICU 2.4
544  */
545 #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
546     (c)=(s)[--(i)]; \
547     if(U16_IS_TRAIL(c)) { \
548         (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
549     } \
550 } UPRV_BLOCK_MACRO_END
551 
552 /**
553  * Move the string offset from one code point boundary to the previous one
554  * and get the code point between them.
555  * (Pre-decrementing backward iteration.)
556  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
557  *
558  * The input offset may be the same as the string length.
559  * If the offset is behind a trail surrogate unit
560  * for a supplementary code point, then the macro will read
561  * the preceding lead surrogate as well.
562  * If the offset is behind a lead surrogate or behind a single, unpaired
563  * trail surrogate, then c is set to that unpaired surrogate.
564  *
565  * @param s const UChar * string
566  * @param start starting string offset (usually 0)
567  * @param i string offset, must be start<i
568  * @param c output UChar32 variable
569  * @see U16_PREV_UNSAFE
570  * \xrefitem stable "Stable" "Stable List" ICU 2.4
571  */
572 #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
573     (c)=(s)[--(i)]; \
574     if(U16_IS_TRAIL(c)) { \
575         uint16_t __c2; \
576         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
577             --(i); \
578             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
579         } \
580     } \
581 } UPRV_BLOCK_MACRO_END
582 
583 /**
584  * Move the string offset from one code point boundary to the previous one
585  * and get the code point between them.
586  * (Pre-decrementing backward iteration.)
587  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
588  *
589  * The input offset may be the same as the string length.
590  * If the offset is behind a trail surrogate unit
591  * for a supplementary code point, then the macro will read
592  * the preceding lead surrogate as well.
593  * If the offset is behind a lead surrogate or behind a single, unpaired
594  * trail surrogate, then c is set to U+FFFD.
595  *
596  * @param s const UChar * string
597  * @param start starting string offset (usually 0)
598  * @param i string offset, must be start<i
599  * @param c output UChar32 variable
600  * @see U16_PREV_UNSAFE
601  * \xrefitem stable "Stable" "Stable List" ICU 60
602  */
603 #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
604     (c)=(s)[--(i)]; \
605     if(U16_IS_SURROGATE(c)) { \
606         uint16_t __c2; \
607         if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
608             --(i); \
609             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
610         } else { \
611             (c)=0xfffd; \
612         } \
613     } \
614 } UPRV_BLOCK_MACRO_END
615 
616 /**
617  * Move the string offset from one code point boundary to the previous one.
618  * (Pre-decrementing backward iteration.)
619  * The input offset may be the same as the string length.
620  * "Unsafe" macro, assumes well-formed UTF-16.
621  *
622  * @param s const UChar * string
623  * @param i string offset
624  * @see U16_BACK_1
625  * \xrefitem stable "Stable" "Stable List" ICU 2.4
626  */
627 #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
628     if(U16_IS_TRAIL((s)[--(i)])) { \
629         --(i); \
630     } \
631 } UPRV_BLOCK_MACRO_END
632 
633 /**
634  * Move the string offset from one code point boundary to the previous one.
635  * (Pre-decrementing backward iteration.)
636  * The input offset may be the same as the string length.
637  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
638  *
639  * @param s const UChar * string
640  * @param start starting string offset (usually 0)
641  * @param i string offset, must be start<i
642  * @see U16_BACK_1_UNSAFE
643  * \xrefitem stable "Stable" "Stable List" ICU 2.4
644  */
645 #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
646     if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
647         --(i); \
648     } \
649 } UPRV_BLOCK_MACRO_END
650 
651 /**
652  * Move the string offset from one code point boundary to the n-th one before it,
653  * i.e., move backward by n code points.
654  * (Pre-decrementing backward iteration.)
655  * The input offset may be the same as the string length.
656  * "Unsafe" macro, assumes well-formed UTF-16.
657  *
658  * @param s const UChar * string
659  * @param i string offset
660  * @param n number of code points to skip
661  * @see U16_BACK_N
662  * \xrefitem stable "Stable" "Stable List" ICU 2.4
663  */
664 #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
665     int32_t __N=(n); \
666     while(__N>0) { \
667         U16_BACK_1_UNSAFE(s, i); \
668         --__N; \
669     } \
670 } UPRV_BLOCK_MACRO_END
671 
672 /**
673  * Move the string offset from one code point boundary to the n-th one before it,
674  * i.e., move backward by n code points.
675  * (Pre-decrementing backward iteration.)
676  * The input offset may be the same as the string length.
677  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
678  *
679  * @param s const UChar * string
680  * @param start start of string
681  * @param i string offset, must be start<i
682  * @param n number of code points to skip
683  * @see U16_BACK_N_UNSAFE
684  * \xrefitem stable "Stable" "Stable List" ICU 2.4
685  */
686 #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
687     int32_t __N=(n); \
688     while(__N>0 && (i)>(start)) { \
689         U16_BACK_1(s, start, i); \
690         --__N; \
691     } \
692 } UPRV_BLOCK_MACRO_END
693 
694 /**
695  * Adjust a random-access offset to a code point boundary after a code point.
696  * If the offset is behind the lead surrogate of a surrogate pair,
697  * then the offset is incremented.
698  * Otherwise, it is not modified.
699  * The input offset may be the same as the string length.
700  * "Unsafe" macro, assumes well-formed UTF-16.
701  *
702  * @param s const UChar * string
703  * @param i string offset
704  * @see U16_SET_CP_LIMIT
705  * \xrefitem stable "Stable" "Stable List" ICU 2.4
706  */
707 #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
708     if(U16_IS_LEAD((s)[(i)-1])) { \
709         ++(i); \
710     } \
711 } UPRV_BLOCK_MACRO_END
712 
713 /**
714  * Adjust a random-access offset to a code point boundary after a code point.
715  * If the offset is behind the lead surrogate of a surrogate pair,
716  * then the offset is incremented.
717  * Otherwise, it is not modified.
718  * The input offset may be the same as the string length.
719  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
720  *
721  * The length can be negative for a NUL-terminated string.
722  *
723  * @param s const UChar * string
724  * @param start int32_t starting string offset (usually 0)
725  * @param i int32_t string offset, start<=i<=length
726  * @param length int32_t string length
727  * @see U16_SET_CP_LIMIT_UNSAFE
728  * \xrefitem stable "Stable" "Stable List" ICU 2.4
729  */
730 #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
731     if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
732         ++(i); \
733     } \
734 } UPRV_BLOCK_MACRO_END
735 
736 #endif
737 
738 /** @} */ // addtogroup
739