1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 1999-2012, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: utf16.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 1999sep09 16 * created by: Markus W. Scherer 17 */ 18 19 /** 20 * @addtogroup icu4c ICU4C 21 * @{ 22 * \file 23 * \brief C API: 16-bit Unicode handling macros 24 * 25 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. 26 * 27 * For more information see utf.h and the ICU User Guide Strings chapter 28 * (https://unicode-org.github.io/icu/userguide/strings). 29 * 30 * <em>Usage:</em> 31 * ICU coding guidelines for if() statements should be followed when using these macros. 32 * Compound statements (curly braces {}) must be used for if-else-while... 33 * bodies and all macro statements should be terminated with semicolon. 34 */ 35 36 #ifndef __UTF16_H__ 37 #define __UTF16_H__ 38 39 #include <stdbool.h> 40 #include "unicode/umachine.h" 41 #ifndef __UTF_H__ 42 # include "unicode/utf.h" 43 #endif 44 45 /* single-code point definitions -------------------------------------------- */ 46 47 /** 48 * Does this code unit alone encode a code point (BMP, not a surrogate)? 49 * @param c 16-bit code unit 50 * @return true or false 51 * \xrefitem stable "Stable" "Stable List" ICU 2.4 52 */ 53 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) 54 55 /** 56 * Is this code unit a lead surrogate (U+d800..U+dbff)? 57 * @param c 16-bit code unit 58 * @return true or false 59 * \xrefitem stable "Stable" "Stable List" ICU 2.4 60 */ 61 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) 62 63 /** 64 * Is this code unit a trail surrogate (U+dc00..U+dfff)? 65 * @param c 16-bit code unit 66 * @return true or false 67 * \xrefitem stable "Stable" "Stable List" ICU 2.4 68 */ 69 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) 70 71 /** 72 * Is this code unit a surrogate (U+d800..U+dfff)? 73 * @param c 16-bit code unit 74 * @return true or false 75 * \xrefitem stable "Stable" "Stable List" ICU 2.4 76 */ 77 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) 78 79 /** 80 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), 81 * is it a lead surrogate? 82 * @param c 16-bit code unit 83 * @return true or false 84 * \xrefitem stable "Stable" "Stable List" ICU 2.4 85 */ 86 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) 87 88 /** 89 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), 90 * is it a trail surrogate? 91 * @param c 16-bit code unit 92 * @return true or false 93 * \xrefitem stable "Stable" "Stable List" ICU 4.2 94 */ 95 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) 96 97 /** 98 * Helper constant for U16_GET_SUPPLEMENTARY. 99 * \xrefitem internal "Internal" "Internal List" Do not use. This API is for internal use only. 100 */ 101 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 102 103 /** 104 * Get a supplementary code point value (U+10000..U+10ffff) 105 * from its lead and trail surrogates. 106 * The result is undefined if the input values are not 107 * lead and trail surrogates. 108 * 109 * @param lead lead surrogate (U+d800..U+dbff) 110 * @param trail trail surrogate (U+dc00..U+dfff) 111 * @return supplementary code point (U+10000..U+10ffff) 112 * \xrefitem stable "Stable" "Stable List" ICU 2.4 113 */ 114 #define U16_GET_SUPPLEMENTARY(lead, trail) \ 115 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) 116 117 118 /** 119 * Get the lead surrogate (0xd800..0xdbff) for a 120 * supplementary code point (0x10000..0x10ffff). 121 * @param supplementary 32-bit code point (U+10000..U+10ffff) 122 * @return lead surrogate (U+d800..U+dbff) for supplementary 123 * \xrefitem stable "Stable" "Stable List" ICU 2.4 124 */ 125 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 126 127 /** 128 * Get the trail surrogate (0xdc00..0xdfff) for a 129 * supplementary code point (0x10000..0x10ffff). 130 * @param supplementary 32-bit code point (U+10000..U+10ffff) 131 * @return trail surrogate (U+dc00..U+dfff) for supplementary 132 * \xrefitem stable "Stable" "Stable List" ICU 2.4 133 */ 134 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 135 136 /** 137 * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) 138 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). 139 * @param c 32-bit code point 140 * @return 1 or 2 141 * \xrefitem stable "Stable" "Stable List" ICU 2.4 142 */ 143 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 144 145 /** 146 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). 147 * @return 2 148 * \xrefitem stable "Stable" "Stable List" ICU 2.4 149 */ 150 #define U16_MAX_LENGTH 2 151 152 /** 153 * Get a code point from a string at a random-access offset, 154 * without changing the offset. 155 * "Unsafe" macro, assumes well-formed UTF-16. 156 * 157 * The offset may point to either the lead or trail surrogate unit 158 * for a supplementary code point, in which case the macro will read 159 * the adjacent matching surrogate as well. 160 * The result is undefined if the offset points to a single, unpaired surrogate. 161 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. 162 * 163 * @param s const UChar * string 164 * @param i string offset 165 * @param c output UChar32 variable 166 * @see U16_GET 167 * \xrefitem stable "Stable" "Stable List" ICU 2.4 168 */ 169 #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 170 (c)=(s)[i]; \ 171 if(U16_IS_SURROGATE(c)) { \ 172 if(U16_IS_SURROGATE_LEAD(c)) { \ 173 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ 174 } else { \ 175 (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ 176 } \ 177 } \ 178 } UPRV_BLOCK_MACRO_END 179 180 /** 181 * Get a code point from a string at a random-access offset, 182 * without changing the offset. 183 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 184 * 185 * The offset may point to either the lead or trail surrogate unit 186 * for a supplementary code point, in which case the macro will read 187 * the adjacent matching surrogate as well. 188 * 189 * The length can be negative for a NUL-terminated string. 190 * 191 * If the offset points to a single, unpaired surrogate, then 192 * c is set to that unpaired surrogate. 193 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. 194 * 195 * @param s const UChar * string 196 * @param start starting string offset (usually 0) 197 * @param i string offset, must be start<=i<length 198 * @param length string length 199 * @param c output UChar32 variable 200 * @see U16_GET_UNSAFE 201 * \xrefitem stable "Stable" "Stable List" ICU 2.4 202 */ 203 #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 204 (c)=(s)[i]; \ 205 if(U16_IS_SURROGATE(c)) { \ 206 uint16_t __c2; \ 207 if(U16_IS_SURROGATE_LEAD(c)) { \ 208 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ 209 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 210 } \ 211 } else { \ 212 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 213 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 214 } \ 215 } \ 216 } \ 217 } UPRV_BLOCK_MACRO_END 218 219 /** 220 * Get a code point from a string at a random-access offset, 221 * without changing the offset. 222 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 223 * 224 * The offset may point to either the lead or trail surrogate unit 225 * for a supplementary code point, in which case the macro will read 226 * the adjacent matching surrogate as well. 227 * 228 * The length can be negative for a NUL-terminated string. 229 * 230 * If the offset points to a single, unpaired surrogate, then 231 * c is set to U+FFFD. 232 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. 233 * 234 * @param s const UChar * string 235 * @param start starting string offset (usually 0) 236 * @param i string offset, must be start<=i<length 237 * @param length string length 238 * @param c output UChar32 variable 239 * @see U16_GET_UNSAFE 240 * \xrefitem stable "Stable" "Stable List" ICU 60 241 */ 242 #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 243 (c)=(s)[i]; \ 244 if(U16_IS_SURROGATE(c)) { \ 245 uint16_t __c2; \ 246 if(U16_IS_SURROGATE_LEAD(c)) { \ 247 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ 248 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 249 } else { \ 250 (c)=0xfffd; \ 251 } \ 252 } else { \ 253 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 254 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 255 } else { \ 256 (c)=0xfffd; \ 257 } \ 258 } \ 259 } \ 260 } UPRV_BLOCK_MACRO_END 261 262 /* definitions with forward iteration --------------------------------------- */ 263 264 /** 265 * Get a code point from a string at a code point boundary offset, 266 * and advance the offset to the next code point boundary. 267 * (Post-incrementing forward iteration.) 268 * "Unsafe" macro, assumes well-formed UTF-16. 269 * 270 * The offset may point to the lead surrogate unit 271 * for a supplementary code point, in which case the macro will read 272 * the following trail surrogate as well. 273 * If the offset points to a trail surrogate, then that itself 274 * will be returned as the code point. 275 * The result is undefined if the offset points to a single, unpaired lead surrogate. 276 * 277 * @param s const UChar * string 278 * @param i string offset 279 * @param c output UChar32 variable 280 * @see U16_NEXT 281 * \xrefitem stable "Stable" "Stable List" ICU 2.4 282 */ 283 #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 284 (c)=(s)[(i)++]; \ 285 if(U16_IS_LEAD(c)) { \ 286 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ 287 } \ 288 } UPRV_BLOCK_MACRO_END 289 290 /** 291 * Get a code point from a string at a code point boundary offset, 292 * and advance the offset to the next code point boundary. 293 * (Post-incrementing forward iteration.) 294 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 295 * 296 * The length can be negative for a NUL-terminated string. 297 * 298 * The offset may point to the lead surrogate unit 299 * for a supplementary code point, in which case the macro will read 300 * the following trail surrogate as well. 301 * If the offset points to a trail surrogate or 302 * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. 303 * 304 * @param s const UChar * string 305 * @param i string offset, must be i<length 306 * @param length string length 307 * @param c output UChar32 variable 308 * @see U16_NEXT_UNSAFE 309 * \xrefitem stable "Stable" "Stable List" ICU 2.4 310 */ 311 #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 312 (c)=(s)[(i)++]; \ 313 if(U16_IS_LEAD(c)) { \ 314 uint16_t __c2; \ 315 if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 316 ++(i); \ 317 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 318 } \ 319 } \ 320 } UPRV_BLOCK_MACRO_END 321 322 /** 323 * Get a code point from a string at a code point boundary offset, 324 * and advance the offset to the next code point boundary. 325 * (Post-incrementing forward iteration.) 326 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 327 * 328 * The length can be negative for a NUL-terminated string. 329 * 330 * The offset may point to the lead surrogate unit 331 * for a supplementary code point, in which case the macro will read 332 * the following trail surrogate as well. 333 * If the offset points to a trail surrogate or 334 * to a single, unpaired lead surrogate, then c is set to U+FFFD. 335 * 336 * @param s const UChar * string 337 * @param i string offset, must be i<length 338 * @param length string length 339 * @param c output UChar32 variable 340 * @see U16_NEXT_UNSAFE 341 * \xrefitem stable "Stable" "Stable List" ICU 60 342 */ 343 #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 344 (c)=(s)[(i)++]; \ 345 if(U16_IS_SURROGATE(c)) { \ 346 uint16_t __c2; \ 347 if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 348 ++(i); \ 349 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 350 } else { \ 351 (c)=0xfffd; \ 352 } \ 353 } \ 354 } UPRV_BLOCK_MACRO_END 355 356 /** 357 * Append a code point to a string, overwriting 1 or 2 code units. 358 * The offset points to the current end of the string contents 359 * and is advanced (post-increment). 360 * "Unsafe" macro, assumes a valid code point and sufficient space in the string. 361 * Otherwise, the result is undefined. 362 * 363 * @param s const UChar * string buffer 364 * @param i string offset 365 * @param c code point to append 366 * @see U16_APPEND 367 * \xrefitem stable "Stable" "Stable List" ICU 2.4 368 */ 369 #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 370 if((uint32_t)(c)<=0xffff) { \ 371 (s)[(i)++]=(uint16_t)(c); \ 372 } else { \ 373 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 374 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 375 } \ 376 } UPRV_BLOCK_MACRO_END 377 378 /** 379 * Append a code point to a string, overwriting 1 or 2 code units. 380 * The offset points to the current end of the string contents 381 * and is advanced (post-increment). 382 * "Safe" macro, checks for a valid code point. 383 * If a surrogate pair is written, checks for sufficient space in the string. 384 * If the code point is not valid or a trail surrogate does not fit, 385 * then isError is set to true. 386 * 387 * @param s const UChar * string buffer 388 * @param i string offset, must be i<capacity 389 * @param capacity size of the string buffer 390 * @param c code point to append 391 * @param isError output UBool set to true if an error occurs, otherwise not modified 392 * @see U16_APPEND_UNSAFE 393 * \xrefitem stable "Stable" "Stable List" ICU 2.4 394 */ 395 #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ 396 if((uint32_t)(c)<=0xffff) { \ 397 (s)[(i)++]=(uint16_t)(c); \ 398 } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ 399 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 400 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 401 } else /* c>0x10ffff or not enough space */ { \ 402 (isError)=true; \ 403 } \ 404 } UPRV_BLOCK_MACRO_END 405 406 /** 407 * Advance the string offset from one code point boundary to the next. 408 * (Post-incrementing iteration.) 409 * "Unsafe" macro, assumes well-formed UTF-16. 410 * 411 * @param s const UChar * string 412 * @param i string offset 413 * @see U16_FWD_1 414 * \xrefitem stable "Stable" "Stable List" ICU 2.4 415 */ 416 #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 417 if(U16_IS_LEAD((s)[(i)++])) { \ 418 ++(i); \ 419 } \ 420 } UPRV_BLOCK_MACRO_END 421 422 /** 423 * Advance the string offset from one code point boundary to the next. 424 * (Post-incrementing iteration.) 425 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 426 * 427 * The length can be negative for a NUL-terminated string. 428 * 429 * @param s const UChar * string 430 * @param i string offset, must be i<length 431 * @param length string length 432 * @see U16_FWD_1_UNSAFE 433 * \xrefitem stable "Stable" "Stable List" ICU 2.4 434 */ 435 #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ 436 if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ 437 ++(i); \ 438 } \ 439 } UPRV_BLOCK_MACRO_END 440 441 /** 442 * Advance the string offset from one code point boundary to the n-th next one, 443 * i.e., move forward by n code points. 444 * (Post-incrementing iteration.) 445 * "Unsafe" macro, assumes well-formed UTF-16. 446 * 447 * @param s const UChar * string 448 * @param i string offset 449 * @param n number of code points to skip 450 * @see U16_FWD_N 451 * \xrefitem stable "Stable" "Stable List" ICU 2.4 452 */ 453 #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 454 int32_t __N=(n); \ 455 while(__N>0) { \ 456 U16_FWD_1_UNSAFE(s, i); \ 457 --__N; \ 458 } \ 459 } UPRV_BLOCK_MACRO_END 460 461 /** 462 * Advance the string offset from one code point boundary to the n-th next one, 463 * i.e., move forward by n code points. 464 * (Post-incrementing iteration.) 465 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 466 * 467 * The length can be negative for a NUL-terminated string. 468 * 469 * @param s const UChar * string 470 * @param i int32_t string offset, must be i<length 471 * @param length int32_t string length 472 * @param n number of code points to skip 473 * @see U16_FWD_N_UNSAFE 474 * \xrefitem stable "Stable" "Stable List" ICU 2.4 475 */ 476 #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ 477 int32_t __N=(n); \ 478 while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ 479 U16_FWD_1(s, i, length); \ 480 --__N; \ 481 } \ 482 } UPRV_BLOCK_MACRO_END 483 484 /** 485 * Adjust a random-access offset to a code point boundary 486 * at the start of a code point. 487 * If the offset points to the trail surrogate of a surrogate pair, 488 * then the offset is decremented. 489 * Otherwise, it is not modified. 490 * "Unsafe" macro, assumes well-formed UTF-16. 491 * 492 * @param s const UChar * string 493 * @param i string offset 494 * @see U16_SET_CP_START 495 * \xrefitem stable "Stable" "Stable List" ICU 2.4 496 */ 497 #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 498 if(U16_IS_TRAIL((s)[i])) { \ 499 --(i); \ 500 } \ 501 } UPRV_BLOCK_MACRO_END 502 503 /** 504 * Adjust a random-access offset to a code point boundary 505 * at the start of a code point. 506 * If the offset points to the trail surrogate of a surrogate pair, 507 * then the offset is decremented. 508 * Otherwise, it is not modified. 509 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 510 * 511 * @param s const UChar * string 512 * @param start starting string offset (usually 0) 513 * @param i string offset, must be start<=i 514 * @see U16_SET_CP_START_UNSAFE 515 * \xrefitem stable "Stable" "Stable List" ICU 2.4 516 */ 517 #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ 518 if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ 519 --(i); \ 520 } \ 521 } UPRV_BLOCK_MACRO_END 522 523 /* definitions with backward iteration -------------------------------------- */ 524 525 /** 526 * Move the string offset from one code point boundary to the previous one 527 * and get the code point between them. 528 * (Pre-decrementing backward iteration.) 529 * "Unsafe" macro, assumes well-formed UTF-16. 530 * 531 * The input offset may be the same as the string length. 532 * If the offset is behind a trail surrogate unit 533 * for a supplementary code point, then the macro will read 534 * the preceding lead surrogate as well. 535 * If the offset is behind a lead surrogate, then that itself 536 * will be returned as the code point. 537 * The result is undefined if the offset is behind a single, unpaired trail surrogate. 538 * 539 * @param s const UChar * string 540 * @param i string offset 541 * @param c output UChar32 variable 542 * @see U16_PREV 543 * \xrefitem stable "Stable" "Stable List" ICU 2.4 544 */ 545 #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 546 (c)=(s)[--(i)]; \ 547 if(U16_IS_TRAIL(c)) { \ 548 (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ 549 } \ 550 } UPRV_BLOCK_MACRO_END 551 552 /** 553 * Move the string offset from one code point boundary to the previous one 554 * and get the code point between them. 555 * (Pre-decrementing backward iteration.) 556 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 557 * 558 * The input offset may be the same as the string length. 559 * If the offset is behind a trail surrogate unit 560 * for a supplementary code point, then the macro will read 561 * the preceding lead surrogate as well. 562 * If the offset is behind a lead surrogate or behind a single, unpaired 563 * trail surrogate, then c is set to that unpaired surrogate. 564 * 565 * @param s const UChar * string 566 * @param start starting string offset (usually 0) 567 * @param i string offset, must be start<i 568 * @param c output UChar32 variable 569 * @see U16_PREV_UNSAFE 570 * \xrefitem stable "Stable" "Stable List" ICU 2.4 571 */ 572 #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 573 (c)=(s)[--(i)]; \ 574 if(U16_IS_TRAIL(c)) { \ 575 uint16_t __c2; \ 576 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 577 --(i); \ 578 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 579 } \ 580 } \ 581 } UPRV_BLOCK_MACRO_END 582 583 /** 584 * Move the string offset from one code point boundary to the previous one 585 * and get the code point between them. 586 * (Pre-decrementing backward iteration.) 587 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 588 * 589 * The input offset may be the same as the string length. 590 * If the offset is behind a trail surrogate unit 591 * for a supplementary code point, then the macro will read 592 * the preceding lead surrogate as well. 593 * If the offset is behind a lead surrogate or behind a single, unpaired 594 * trail surrogate, then c is set to U+FFFD. 595 * 596 * @param s const UChar * string 597 * @param start starting string offset (usually 0) 598 * @param i string offset, must be start<i 599 * @param c output UChar32 variable 600 * @see U16_PREV_UNSAFE 601 * \xrefitem stable "Stable" "Stable List" ICU 60 602 */ 603 #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 604 (c)=(s)[--(i)]; \ 605 if(U16_IS_SURROGATE(c)) { \ 606 uint16_t __c2; \ 607 if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 608 --(i); \ 609 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 610 } else { \ 611 (c)=0xfffd; \ 612 } \ 613 } \ 614 } UPRV_BLOCK_MACRO_END 615 616 /** 617 * Move the string offset from one code point boundary to the previous one. 618 * (Pre-decrementing backward iteration.) 619 * The input offset may be the same as the string length. 620 * "Unsafe" macro, assumes well-formed UTF-16. 621 * 622 * @param s const UChar * string 623 * @param i string offset 624 * @see U16_BACK_1 625 * \xrefitem stable "Stable" "Stable List" ICU 2.4 626 */ 627 #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 628 if(U16_IS_TRAIL((s)[--(i)])) { \ 629 --(i); \ 630 } \ 631 } UPRV_BLOCK_MACRO_END 632 633 /** 634 * Move the string offset from one code point boundary to the previous one. 635 * (Pre-decrementing backward iteration.) 636 * The input offset may be the same as the string length. 637 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 638 * 639 * @param s const UChar * string 640 * @param start starting string offset (usually 0) 641 * @param i string offset, must be start<i 642 * @see U16_BACK_1_UNSAFE 643 * \xrefitem stable "Stable" "Stable List" ICU 2.4 644 */ 645 #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ 646 if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ 647 --(i); \ 648 } \ 649 } UPRV_BLOCK_MACRO_END 650 651 /** 652 * Move the string offset from one code point boundary to the n-th one before it, 653 * i.e., move backward by n code points. 654 * (Pre-decrementing backward iteration.) 655 * The input offset may be the same as the string length. 656 * "Unsafe" macro, assumes well-formed UTF-16. 657 * 658 * @param s const UChar * string 659 * @param i string offset 660 * @param n number of code points to skip 661 * @see U16_BACK_N 662 * \xrefitem stable "Stable" "Stable List" ICU 2.4 663 */ 664 #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 665 int32_t __N=(n); \ 666 while(__N>0) { \ 667 U16_BACK_1_UNSAFE(s, i); \ 668 --__N; \ 669 } \ 670 } UPRV_BLOCK_MACRO_END 671 672 /** 673 * Move the string offset from one code point boundary to the n-th one before it, 674 * i.e., move backward by n code points. 675 * (Pre-decrementing backward iteration.) 676 * The input offset may be the same as the string length. 677 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 678 * 679 * @param s const UChar * string 680 * @param start start of string 681 * @param i string offset, must be start<i 682 * @param n number of code points to skip 683 * @see U16_BACK_N_UNSAFE 684 * \xrefitem stable "Stable" "Stable List" ICU 2.4 685 */ 686 #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 687 int32_t __N=(n); \ 688 while(__N>0 && (i)>(start)) { \ 689 U16_BACK_1(s, start, i); \ 690 --__N; \ 691 } \ 692 } UPRV_BLOCK_MACRO_END 693 694 /** 695 * Adjust a random-access offset to a code point boundary after a code point. 696 * If the offset is behind the lead surrogate of a surrogate pair, 697 * then the offset is incremented. 698 * Otherwise, it is not modified. 699 * The input offset may be the same as the string length. 700 * "Unsafe" macro, assumes well-formed UTF-16. 701 * 702 * @param s const UChar * string 703 * @param i string offset 704 * @see U16_SET_CP_LIMIT 705 * \xrefitem stable "Stable" "Stable List" ICU 2.4 706 */ 707 #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 708 if(U16_IS_LEAD((s)[(i)-1])) { \ 709 ++(i); \ 710 } \ 711 } UPRV_BLOCK_MACRO_END 712 713 /** 714 * Adjust a random-access offset to a code point boundary after a code point. 715 * If the offset is behind the lead surrogate of a surrogate pair, 716 * then the offset is incremented. 717 * Otherwise, it is not modified. 718 * The input offset may be the same as the string length. 719 * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 720 * 721 * The length can be negative for a NUL-terminated string. 722 * 723 * @param s const UChar * string 724 * @param start int32_t starting string offset (usually 0) 725 * @param i int32_t string offset, start<=i<=length 726 * @param length int32_t string length 727 * @see U16_SET_CP_LIMIT_UNSAFE 728 * \xrefitem stable "Stable" "Stable List" ICU 2.4 729 */ 730 #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ 731 if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ 732 ++(i); \ 733 } \ 734 } UPRV_BLOCK_MACRO_END 735 736 #endif 737 738 /** @} */ // addtogroup 739