ucs2/
macros.rs

1use crate::{ucs2_from_utf8_at_offset, Error};
2
3/// Count the number of UCS-2 characters in a string. Return an error if
4/// the string cannot be encoded in UCS-2.
5pub const fn str_num_ucs2_chars(s: &str) -> Result<usize, Error> {
6    let bytes = s.as_bytes();
7    let len = bytes.len();
8
9    let mut offset = 0;
10    let mut num_ucs2_chars = 0;
11
12    while offset < len {
13        // SAFETY: `bytes` is valid UTF-8.
14        match unsafe { ucs2_from_utf8_at_offset(bytes, offset) } {
15            Ok(ch) => {
16                offset += ch.num_bytes as usize;
17                num_ucs2_chars += 1;
18            }
19            Err(err) => {
20                return Err(err);
21            }
22        }
23    }
24
25    Ok(num_ucs2_chars)
26}
27
28/// Convert a `str` into a null-terminated UCS-2 character array.
29pub const fn str_to_ucs2<const N: usize>(s: &str) -> Result<[u16; N], Error> {
30    let bytes = s.as_bytes();
31    let len = bytes.len();
32
33    let mut output = [0; N];
34
35    let mut output_offset = 0;
36    let mut input_offset = 0;
37    while input_offset < len {
38        // SAFETY: `bytes` is valid UTF-8.
39        match unsafe { ucs2_from_utf8_at_offset(bytes, input_offset) } {
40            Ok(ch) => {
41                if ch.val == 0 {
42                    panic!("interior null character");
43                } else {
44                    output[output_offset] = ch.val;
45                    output_offset += 1;
46                    input_offset += ch.num_bytes as usize;
47                }
48            }
49            Err(err) => {
50                return Err(err);
51            }
52        }
53    }
54
55    // The output array must be one bigger than the converted string,
56    // to leave room for the trailing null character.
57    if output_offset + 1 != N {
58        panic!("incorrect array length");
59    }
60
61    Ok(output)
62}
63
64/// Encode a string as UCS-2 with a trailing null character.
65///
66/// The encoding is done at compile time, so the result can be used in a
67/// `const` item. The type returned by the macro is a `[u16; N]` array;
68/// to avoid having to specify what `N` is in a `const` item, take a
69/// reference and store it as `&[u16]`.
70///
71/// # Example
72///
73/// ```
74/// use ucs2::ucs2_cstr;
75///
76/// const S: &[u16] = &ucs2_cstr!("abc");
77/// assert_eq!(S, [97, 98, 99, 0]);
78/// ```
79#[macro_export]
80macro_rules! ucs2_cstr {
81    ($s:literal) => {{
82        // Use `const` values here to force errors to happen at compile
83        // time.
84
85        const NUM_CHARS: usize = match $crate::str_num_ucs2_chars($s) {
86            // Add one for the null char.
87            Ok(num) => num + 1,
88            Err(_) => panic!("input contains a character which cannot be represented in UCS-2"),
89        };
90
91        const VAL: [u16; NUM_CHARS] = match $crate::str_to_ucs2($s) {
92            Ok(val) => val,
93            // The string was already checked by `str_num_ucs2_chars`,
94            // so this error is unreachable.
95            Err(_) => {
96                unreachable!();
97            }
98        };
99        VAL
100    }};
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106
107    #[test]
108    fn test_str_num_chars() {
109        // Some of the strings here are from https://www.kermitproject.org/utf8.html.
110
111        // One-byte chars.
112        assert_eq!(str_num_ucs2_chars("abc"), Ok(3));
113        // Two-byte chars.
114        assert_eq!(str_num_ucs2_chars("Τη γλώσσα μου έδωσαν ελληνική"), Ok(29));
115        // Three-byte chars.
116        assert_eq!(str_num_ucs2_chars("ვეპხის ტყაოსანი შოთა რუსთაველი"), Ok(30));
117        // Four-byte chars.
118        assert_eq!(str_num_ucs2_chars("😎🔥"), Err(Error::MultiByte));
119    }
120
121    #[test]
122    fn test_ucs2_cstr() {
123        let s = ucs2_cstr!("abc");
124        assert_eq!(s, [97, 98, 99, 0]);
125    }
126}