1 use crate::{ucs2_from_utf8_at_offset, Error};
2 
3 /// Count the number of UCS-2 characters in a string. Return an error if
4 /// the string cannot be encoded in UCS-2.
str_num_ucs2_chars(s: &str) -> Result<usize, Error>5 pub const fn str_num_ucs2_chars(s: &str) -> Result<usize, Error> {
6     let bytes = s.as_bytes();
7     let len = bytes.len();
8 
9     let mut offset = 0;
10     let mut num_ucs2_chars = 0;
11 
12     while offset < len {
13         // SAFETY: `bytes` is valid UTF-8.
14         match unsafe { ucs2_from_utf8_at_offset(bytes, offset) } {
15             Ok(ch) => {
16                 offset += ch.num_bytes as usize;
17                 num_ucs2_chars += 1;
18             }
19             Err(err) => {
20                 return Err(err);
21             }
22         }
23     }
24 
25     Ok(num_ucs2_chars)
26 }
27 
28 /// Convert a `str` into a null-terminated UCS-2 character array.
str_to_ucs2<const N: usize>(s: &str) -> Result<[u16; N], Error>29 pub const fn str_to_ucs2<const N: usize>(s: &str) -> Result<[u16; N], Error> {
30     let bytes = s.as_bytes();
31     let len = bytes.len();
32 
33     let mut output = [0; N];
34 
35     let mut output_offset = 0;
36     let mut input_offset = 0;
37     while input_offset < len {
38         // SAFETY: `bytes` is valid UTF-8.
39         match unsafe { ucs2_from_utf8_at_offset(bytes, input_offset) } {
40             Ok(ch) => {
41                 if ch.val == 0 {
42                     panic!("interior null character");
43                 } else {
44                     output[output_offset] = ch.val;
45                     output_offset += 1;
46                     input_offset += ch.num_bytes as usize;
47                 }
48             }
49             Err(err) => {
50                 return Err(err);
51             }
52         }
53     }
54 
55     // The output array must be one bigger than the converted string,
56     // to leave room for the trailing null character.
57     if output_offset + 1 != N {
58         panic!("incorrect array length");
59     }
60 
61     Ok(output)
62 }
63 
64 /// Encode a string as UCS-2 with a trailing null character.
65 ///
66 /// The encoding is done at compile time, so the result can be used in a
67 /// `const` item. The type returned by the macro is a `[u16; N]` array;
68 /// to avoid having to specify what `N` is in a `const` item, take a
69 /// reference and store it as `&[u16]`.
70 ///
71 /// # Example
72 ///
73 /// ```
74 /// use ucs2::ucs2_cstr;
75 ///
76 /// const S: &[u16] = &ucs2_cstr!("abc");
77 /// assert_eq!(S, [97, 98, 99, 0]);
78 /// ```
79 #[macro_export]
80 macro_rules! ucs2_cstr {
81     ($s:literal) => {{
82         // Use `const` values here to force errors to happen at compile
83         // time.
84 
85         const NUM_CHARS: usize = match $crate::str_num_ucs2_chars($s) {
86             // Add one for the null char.
87             Ok(num) => num + 1,
88             Err(_) => panic!("input contains a character which cannot be represented in UCS-2"),
89         };
90 
91         const VAL: [u16; NUM_CHARS] = match $crate::str_to_ucs2($s) {
92             Ok(val) => val,
93             // The string was already checked by `str_num_ucs2_chars`,
94             // so this error is unreachable.
95             Err(_) => {
96                 unreachable!();
97             }
98         };
99         VAL
100     }};
101 }
102 
103 #[cfg(test)]
104 mod tests {
105     use super::*;
106 
107     #[test]
test_str_num_chars()108     fn test_str_num_chars() {
109         // Some of the strings here are from https://www.kermitproject.org/utf8.html.
110 
111         // One-byte chars.
112         assert_eq!(str_num_ucs2_chars("abc"), Ok(3));
113         // Two-byte chars.
114         assert_eq!(str_num_ucs2_chars("Τη γλώσσα μου έδωσαν ελληνική"), Ok(29));
115         // Three-byte chars.
116         assert_eq!(str_num_ucs2_chars("ვეპხის ტყაოსანი შოთა რუსთაველი"), Ok(30));
117         // Four-byte chars.
118         assert_eq!(str_num_ucs2_chars("����"), Err(Error::MultiByte));
119     }
120 
121     #[test]
test_ucs2_cstr()122     fn test_ucs2_cstr() {
123         let s = ucs2_cstr!("abc");
124         assert_eq!(s, [97, 98, 99, 0]);
125     }
126 }
127