1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2015, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  umachine.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999sep13
16 *   created by: Markus W. Scherer
17 *
18 *   This file defines basic types and constants for ICU to be
19 *   platform-independent. umachine.h and utf.h are included into
20 *   utypes.h to provide all the general definitions for ICU.
21 *   All of these definitions used to be in utypes.h before
22 *   the UTF-handling macros made this unmaintainable.
23 */
24 
25 #ifndef __UMACHINE_H__
26 #define __UMACHINE_H__
27 
28 
29 /**
30  * @addtogroup icu4c ICU4C
31  * @{
32  * \file
33  * \brief Basic types and constants for UTF
34  *
35  * <h2> Basic types and constants for UTF </h2>
36  *   This file defines basic types and constants for utf.h to be
37  *   platform-independent. umachine.h and utf.h are included into
38  *   utypes.h to provide all the general definitions for ICU.
39  *   All of these definitions used to be in utypes.h before
40  *   the UTF-handling macros made this unmaintainable.
41  *
42  */
43 /*==========================================================================*/
44 /* Include platform-dependent definitions                                   */
45 /* which are contained in the platform-specific file platform.h             */
46 /*==========================================================================*/
47 
48 #include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
49 
50 /*
51  * ANSI C headers:
52  * stddef.h defines wchar_t
53  */
54 #include <stdbool.h>
55 #include <stddef.h>
56 
57 /*==========================================================================*/
58 /* For C wrappers, we use the symbol U_CAPI.                                */
59 /* This works properly if the includer is C or C++.                         */
60 /* Functions are declared   U_CAPI return-type U_EXPORT2 function-name()... */
61 /*==========================================================================*/
62 
63 /**
64  * \def U_CFUNC
65  * This is used in a declaration of a library private ICU C function.
66  * \xrefitem stable "Stable" "Stable List" ICU 2.4
67  */
68 
69 /**
70  * \def U_CDECL_BEGIN
71  * This is used to begin a declaration of a library private ICU C API.
72  * \xrefitem stable "Stable" "Stable List" ICU 2.4
73  */
74 
75 /**
76  * \def U_CDECL_END
77  * This is used to end a declaration of a library private ICU C API
78  * \xrefitem stable "Stable" "Stable List" ICU 2.4
79  */
80 
81 #ifdef __cplusplus
82 #   define U_CFUNC extern "C"
83 #   define U_CDECL_BEGIN extern "C" {
84 #   define U_CDECL_END   }
85 #else
86 #   define U_CFUNC extern
87 #   define U_CDECL_BEGIN
88 #   define U_CDECL_END
89 #endif
90 
91 #ifndef U_ATTRIBUTE_DEPRECATED
92 /**
93  * \def U_ATTRIBUTE_DEPRECATED
94  *  This is used for GCC specific attributes
95  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
96  */
97 #if U_GCC_MAJOR_MINOR >= 302
98 #    define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
99 /**
100  * \def U_ATTRIBUTE_DEPRECATED
101  * This is used for Visual C++ specific attributes
102  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
103  */
104 #elif defined(_MSC_VER) && (_MSC_VER >= 1400)
105 #    define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
106 #else
107 #    define U_ATTRIBUTE_DEPRECATED
108 #endif
109 #endif
110 
111 /** This is used to declare a function as a public ICU C API \xrefitem stable "Stable" "Stable List" ICU 2.0*/
112 #define U_CAPI U_CFUNC U_EXPORT
113 /** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
114 #define U_STABLE U_CAPI
115 /** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API  */
116 #define U_DRAFT  U_CAPI
117 /** This is used to declare a function as a deprecated public ICU C API  */
118 #define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
119 /** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API  */
120 #define U_OBSOLETE U_CAPI
121 /** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API  */
122 #define U_INTERNAL U_CAPI
123 
124 // Before ICU 65, function-like, multi-statement ICU macros were just defined as
125 // series of statements wrapped in { } blocks and the caller could choose to
126 // either treat them as if they were actual functions and end the invocation
127 // with a trailing ; creating an empty statement after the block or else omit
128 // this trailing ; using the knowledge that the macro would expand to { }.
129 //
130 // But doing so doesn't work well with macros that look like functions and
131 // compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
132 // switches to the standard solution of wrapping such macros in do { } while.
133 //
134 // This will however break existing code that depends on being able to invoke
135 // these macros without a trailing ; so to be able to remain compatible with
136 // such code the wrapper is itself defined as macros so that it's possible to
137 // build ICU 65 and later with the old macro behaviour, like this:
138 //
139 // export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
140 // runConfigureICU ...
141 //
142 
143 /**
144  * \def UPRV_BLOCK_MACRO_BEGIN
145  * Defined as the "do" keyword by default.
146  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
147  */
148 #ifndef UPRV_BLOCK_MACRO_BEGIN
149 #define UPRV_BLOCK_MACRO_BEGIN do
150 #endif
151 
152 /**
153  * \def UPRV_BLOCK_MACRO_END
154  * Defined as "while (false)" by default.
155  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
156  */
157 #ifndef UPRV_BLOCK_MACRO_END
158 #define UPRV_BLOCK_MACRO_END while (false)
159 #endif
160 
161 /*==========================================================================*/
162 /* limits for int32_t etc., like in POSIX inttypes.h                        */
163 /*==========================================================================*/
164 
165 #ifndef INT8_MIN
166 /** The smallest value an 8 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
167 #   define INT8_MIN        ((int8_t)(-128))
168 #endif
169 #ifndef INT16_MIN
170 /** The smallest value a 16 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
171 #   define INT16_MIN       ((int16_t)(-32767-1))
172 #endif
173 #ifndef INT32_MIN
174 /** The smallest value a 32 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
175 #   define INT32_MIN       ((int32_t)(-2147483647-1))
176 #endif
177 
178 #ifndef INT8_MAX
179 /** The largest value an 8 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
180 #   define INT8_MAX        ((int8_t)(127))
181 #endif
182 #ifndef INT16_MAX
183 /** The largest value a 16 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
184 #   define INT16_MAX       ((int16_t)(32767))
185 #endif
186 #ifndef INT32_MAX
187 /** The largest value a 32 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
188 #   define INT32_MAX       ((int32_t)(2147483647))
189 #endif
190 
191 #ifndef UINT8_MAX
192 /** The largest value an 8 bit unsigned integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
193 #   define UINT8_MAX       ((uint8_t)(255U))
194 #endif
195 #ifndef UINT16_MAX
196 /** The largest value a 16 bit unsigned integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
197 #   define UINT16_MAX      ((uint16_t)(65535U))
198 #endif
199 #ifndef UINT32_MAX
200 /** The largest value a 32 bit unsigned integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
201 #   define UINT32_MAX      ((uint32_t)(4294967295U))
202 #endif
203 
204 #if defined(U_INT64_T_UNAVAILABLE)
205 # error int64_t is required for decimal format and rule-based number format.
206 #else
207 # ifndef INT64_C
208 /**
209  * Provides a platform independent way to specify a signed 64-bit integer constant.
210  * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
211  * \xrefitem stable "Stable" "Stable List" ICU 2.8
212  */
213 #   define INT64_C(c) c ## LL
214 # endif
215 # ifndef UINT64_C
216 /**
217  * Provides a platform independent way to specify an unsigned 64-bit integer constant.
218  * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
219  * \xrefitem stable "Stable" "Stable List" ICU 2.8
220  */
221 #   define UINT64_C(c) c ## ULL
222 # endif
223 # ifndef U_INT64_MIN
224 /** The smallest value a 64 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.8 */
225 #     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
226 # endif
227 # ifndef U_INT64_MAX
228 /** The largest value a 64 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.8 */
229 #     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
230 # endif
231 # ifndef U_UINT64_MAX
232 /** The largest value a 64 bit unsigned integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.8 */
233 #     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
234 # endif
235 #endif
236 
237 /*==========================================================================*/
238 /* Boolean data type                                                        */
239 /*==========================================================================*/
240 
241 /**
242  * The ICU boolean type, a signed-byte integer.
243  * ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
244  * Also provides a fixed type definition, as opposed to
245  * type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
246  *
247  * \xrefitem stable "Stable" "Stable List" ICU 2.0
248  */
249 typedef int8_t UBool;
250 
251 /**
252  * \def U_DEFINE_FALSE_AND_TRUE
253  * Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
254  * These obsolete macros sometimes break compilation of other code that
255  * defines enum constants or similar with these names.
256  * C++ has long defined bool/false/true.
257  * C99 also added definitions for these, although as macros; see stdbool.h.
258  *
259  * You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
260  *
261  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only. ICU 68
262  */
263 #ifdef U_DEFINE_FALSE_AND_TRUE
264     // Use the predefined value.
265 #else
266     // Default to avoiding collision with non-macro definitions of FALSE & TRUE.
267 #   define U_DEFINE_FALSE_AND_TRUE 0
268 #endif
269 
270 #if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
271 #ifndef TRUE
272 /**
273  * The TRUE value of a UBool.
274  *
275  * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 68 Use standard "true" instead.
276  */
277 #   define TRUE  1
278 #endif
279 #ifndef FALSE
280 /**
281  * The FALSE value of a UBool.
282  *
283  * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 68 Use standard "false" instead.
284  */
285 #   define FALSE 0
286 #endif
287 #endif  // U_DEFINE_FALSE_AND_TRUE
288 
289 /*==========================================================================*/
290 /* Unicode data types                                                       */
291 /*==========================================================================*/
292 
293 /* wchar_t-related definitions -------------------------------------------- */
294 
295 /*
296  * \def U_WCHAR_IS_UTF16
297  * Defined if wchar_t uses UTF-16.
298  *
299  * \xrefitem stable "Stable" "Stable List" ICU 2.0
300  */
301 /*
302  * \def U_WCHAR_IS_UTF32
303  * Defined if wchar_t uses UTF-32.
304  *
305  * \xrefitem stable "Stable" "Stable List" ICU 2.0
306  */
307 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
308 #   ifdef __STDC_ISO_10646__
309 #       if (U_SIZEOF_WCHAR_T==2)
310 #           define U_WCHAR_IS_UTF16
311 #       elif (U_SIZEOF_WCHAR_T==4)
312 #           define  U_WCHAR_IS_UTF32
313 #       endif
314 #   elif defined __UCS2__
315 #       if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
316 #           define U_WCHAR_IS_UTF16
317 #       endif
318 #   elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
319 #       if (U_SIZEOF_WCHAR_T==4)
320 #           define U_WCHAR_IS_UTF32
321 #       endif
322 #   elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
323 #       define U_WCHAR_IS_UTF32
324 #   elif U_PLATFORM_HAS_WIN32_API
325 #       define U_WCHAR_IS_UTF16
326 #   endif
327 #endif
328 
329 /* UChar and UChar32 definitions -------------------------------------------- */
330 
331 /** Number of bytes in a UChar (always 2). \xrefitem stable "Stable" "Stable List" ICU 2.0 */
332 #define U_SIZEOF_UCHAR 2
333 
334 /**
335  * \def U_CHAR16_IS_TYPEDEF
336  * If 1, then char16_t is a typedef and not a real type (yet)
337  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
338  */
339 #if defined(_MSC_VER) && (_MSC_VER < 1900)
340 // Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
341 // and instead use a typedef.  https://msdn.microsoft.com/library/bb531344.aspx
342 # define U_CHAR16_IS_TYPEDEF 1
343 #else
344 # define U_CHAR16_IS_TYPEDEF 0
345 #endif
346 
347 
348 /**
349  * \var UChar
350  *
351  * The base type for UTF-16 code units and pointers.
352  * Unsigned 16-bit integer.
353  * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
354  *
355  * UChar is configurable by defining the macro UCHAR_TYPE
356  * on the preprocessor or compiler command line:
357  * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
358  * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
359  * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
360  *
361  * The default is UChar=char16_t.
362  *
363  * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
364  *
365  * In C, char16_t is a simple typedef of uint_least16_t.
366  * ICU requires uint_least16_t=uint16_t for data memory mapping.
367  * On macOS, char16_t is not available because the uchar.h standard header is missing.
368  *
369  * \xrefitem stable "Stable" "Stable List" ICU 4.4
370  */
371 
372 #if 1
373     // #if 1 is normal. UChar defaults to char16_t in C++.
374     // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
375 #else
376 #   define UCHAR_TYPE uint16_t
377 #endif
378 
379 #if defined(U_ALL_IMPLEMENTATION) || !defined(UCHAR_TYPE)
380     typedef char16_t UChar;
381 #else
382     typedef UCHAR_TYPE UChar;
383 #endif
384 
385 /**
386  * \var OldUChar
387  * Default ICU 58 definition of UChar.
388  * A base type for UTF-16 code units and pointers.
389  * Unsigned 16-bit integer.
390  *
391  * Define OldUChar to be wchar_t if that is 16 bits wide.
392  * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
393  *
394  * This makes the definition of OldUChar platform-dependent
395  * but allows direct string type compatibility with platforms with
396  * 16-bit wchar_t types.
397  *
398  * This is how UChar was defined in ICU 58, for transition convenience.
399  * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
400  * The current UChar responds to UCHAR_TYPE but OldUChar does not.
401  *
402  * \xrefitem stable "Stable" "Stable List" ICU 59
403  */
404 #if U_SIZEOF_WCHAR_T==2
405     typedef wchar_t OldUChar;
406 #elif defined(__CHAR16_TYPE__)
407     typedef __CHAR16_TYPE__ OldUChar;
408 #else
409     typedef uint16_t OldUChar;
410 #endif
411 
412 /**
413  * Define UChar32 as a type for single Unicode code points.
414  * UChar32 is a signed 32-bit integer (same as int32_t).
415  *
416  * The Unicode code point range is 0..0x10ffff.
417  * All other values (negative or >=0x110000) are illegal as Unicode code points.
418  * They may be used as sentinel values to indicate "done", "error"
419  * or similar non-code point conditions.
420  *
421  * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
422  * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
423  * or else to be uint32_t.
424  * That is, the definition of UChar32 was platform-dependent.
425  *
426  * @see U_SENTINEL
427  * \xrefitem stable "Stable" "Stable List" ICU 2.4
428  */
429 typedef int32_t UChar32;
430 
431 /**
432  * This value is intended for sentinel values for APIs that
433  * (take or) return single code points (UChar32).
434  * It is outside of the Unicode code point range 0..0x10ffff.
435  *
436  * For example, a "done" or "error" value in a new API
437  * could be indicated with U_SENTINEL.
438  *
439  * ICU APIs designed before ICU 2.4 usually define service-specific "done"
440  * values, mostly 0xffff.
441  * Those may need to be distinguished from
442  * actual U+ffff text contents by calling functions like
443  * CharacterIterator::hasNext() or UnicodeString::length().
444  *
445  * @return -1
446  * @see UChar32
447  * \xrefitem stable "Stable" "Stable List" ICU 2.4
448  */
449 #define U_SENTINEL (-1)
450 
451 #include "unicode/urename.h"
452 
453 #endif
454 
455 /** @} */ // addtogroup
456