1 use crate::{ucs2_from_utf8_at_offset, Error};
2
3 /// Count the number of UCS-2 characters in a string. Return an error if
4 /// the string cannot be encoded in UCS-2.
str_num_ucs2_chars(s: &str) -> Result<usize, Error>5 pub const fn str_num_ucs2_chars(s: &str) -> Result<usize, Error> {
6 let bytes = s.as_bytes();
7 let len = bytes.len();
8
9 let mut offset = 0;
10 let mut num_ucs2_chars = 0;
11
12 while offset < len {
13 // SAFETY: `bytes` is valid UTF-8.
14 match unsafe { ucs2_from_utf8_at_offset(bytes, offset) } {
15 Ok(ch) => {
16 offset += ch.num_bytes as usize;
17 num_ucs2_chars += 1;
18 }
19 Err(err) => {
20 return Err(err);
21 }
22 }
23 }
24
25 Ok(num_ucs2_chars)
26 }
27
28 /// Convert a `str` into a null-terminated UCS-2 character array.
str_to_ucs2<const N: usize>(s: &str) -> Result<[u16; N], Error>29 pub const fn str_to_ucs2<const N: usize>(s: &str) -> Result<[u16; N], Error> {
30 let bytes = s.as_bytes();
31 let len = bytes.len();
32
33 let mut output = [0; N];
34
35 let mut output_offset = 0;
36 let mut input_offset = 0;
37 while input_offset < len {
38 // SAFETY: `bytes` is valid UTF-8.
39 match unsafe { ucs2_from_utf8_at_offset(bytes, input_offset) } {
40 Ok(ch) => {
41 if ch.val == 0 {
42 panic!("interior null character");
43 } else {
44 output[output_offset] = ch.val;
45 output_offset += 1;
46 input_offset += ch.num_bytes as usize;
47 }
48 }
49 Err(err) => {
50 return Err(err);
51 }
52 }
53 }
54
55 // The output array must be one bigger than the converted string,
56 // to leave room for the trailing null character.
57 if output_offset + 1 != N {
58 panic!("incorrect array length");
59 }
60
61 Ok(output)
62 }
63
64 /// Encode a string as UCS-2 with a trailing null character.
65 ///
66 /// The encoding is done at compile time, so the result can be used in a
67 /// `const` item. The type returned by the macro is a `[u16; N]` array;
68 /// to avoid having to specify what `N` is in a `const` item, take a
69 /// reference and store it as `&[u16]`.
70 ///
71 /// # Example
72 ///
73 /// ```
74 /// use ucs2::ucs2_cstr;
75 ///
76 /// const S: &[u16] = &ucs2_cstr!("abc");
77 /// assert_eq!(S, [97, 98, 99, 0]);
78 /// ```
79 #[macro_export]
80 macro_rules! ucs2_cstr {
81 ($s:literal) => {{
82 // Use `const` values here to force errors to happen at compile
83 // time.
84
85 const NUM_CHARS: usize = match $crate::str_num_ucs2_chars($s) {
86 // Add one for the null char.
87 Ok(num) => num + 1,
88 Err(_) => panic!("input contains a character which cannot be represented in UCS-2"),
89 };
90
91 const VAL: [u16; NUM_CHARS] = match $crate::str_to_ucs2($s) {
92 Ok(val) => val,
93 // The string was already checked by `str_num_ucs2_chars`,
94 // so this error is unreachable.
95 Err(_) => {
96 unreachable!();
97 }
98 };
99 VAL
100 }};
101 }
102
103 #[cfg(test)]
104 mod tests {
105 use super::*;
106
107 #[test]
test_str_num_chars()108 fn test_str_num_chars() {
109 // Some of the strings here are from https://www.kermitproject.org/utf8.html.
110
111 // One-byte chars.
112 assert_eq!(str_num_ucs2_chars("abc"), Ok(3));
113 // Two-byte chars.
114 assert_eq!(str_num_ucs2_chars("Τη γλώσσα μου έδωσαν ελληνική"), Ok(29));
115 // Three-byte chars.
116 assert_eq!(str_num_ucs2_chars("ვეპხის ტყაოსანი შოთა რუსთაველი"), Ok(30));
117 // Four-byte chars.
118 assert_eq!(str_num_ucs2_chars(""), Err(Error::MultiByte));
119 }
120
121 #[test]
test_ucs2_cstr()122 fn test_ucs2_cstr() {
123 let s = ucs2_cstr!("abc");
124 assert_eq!(s, [97, 98, 99, 0]);
125 }
126 }
127