1 use super::*;
2 
3 /// A push-based, lossy decoder for UTF-8.
4 /// Errors are replaced with the U+FFFD replacement character.
5 ///
6 /// Users “push” bytes into the decoder, which in turn “pushes” `&str` slices into a callback.
7 ///
8 /// For example, `String::from_utf8_lossy` (but returning `String` instead of `Cow`)
9 /// can be rewritten as:
10 ///
11 /// ```rust
12 /// fn string_from_utf8_lossy(input: &[u8]) -> String {
13 ///     let mut string = String::new();
14 ///     utf8::LossyDecoder::new(|s| string.push_str(s)).feed(input);
15 ///     string
16 /// }
17 /// ```
18 ///
19 /// **Note:** Dropping the decoder signals the end of the input:
20 /// If the last input chunk ended with an incomplete byte sequence for a code point,
21 /// this is an error and a replacement character is emitted.
22 /// Use `std::mem::forget` to inhibit this behavior.
23 pub struct LossyDecoder<F: FnMut(&str)> {
24     push_str: F,
25     incomplete: Incomplete,
26 }
27 
28 impl<F: FnMut(&str)> LossyDecoder<F> {
29     /// Create a new decoder from a callback.
30     #[inline]
new(push_str: F) -> Self31     pub fn new(push_str: F) -> Self {
32         LossyDecoder {
33             push_str: push_str,
34             incomplete: Incomplete {
35                 buffer: [0, 0, 0, 0],
36                 buffer_len: 0,
37             },
38         }
39     }
40 
41     /// Feed one chunk of input into the decoder.
42     ///
43     /// The input is decoded lossily
44     /// and the callback called once or more with `&str` string slices.
45     ///
46     /// If the UTF-8 byte sequence for one code point was split into this bytes chunk
47     /// and previous bytes chunks, it will be correctly pieced back together.
feed(&mut self, mut input: &[u8])48     pub fn feed(&mut self, mut input: &[u8]) {
49         if self.incomplete.buffer_len > 0 {
50             match self.incomplete.try_complete(input) {
51                 Some((Ok(s), remaining)) => {
52                     (self.push_str)(s);
53                     input = remaining
54                 }
55                 Some((Err(_), remaining)) => {
56                     (self.push_str)(REPLACEMENT_CHARACTER);
57                     input = remaining
58                 }
59                 None => {
60                     return
61                 }
62             }
63         }
64         loop {
65             match decode(input) {
66                 Ok(s) => {
67                     (self.push_str)(s);
68                     return
69                 }
70                 Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => {
71                     (self.push_str)(valid_prefix);
72                     self.incomplete = incomplete_suffix;
73                     return
74                 }
75                 Err(DecodeError::Invalid { valid_prefix, remaining_input, .. }) => {
76                     (self.push_str)(valid_prefix);
77                     (self.push_str)(REPLACEMENT_CHARACTER);
78                     input = remaining_input
79                 }
80             }
81         }
82     }
83 }
84 
85 impl<F: FnMut(&str)> Drop for LossyDecoder<F> {
86     #[inline]
drop(&mut self)87     fn drop(&mut self) {
88         if self.incomplete.buffer_len > 0 {
89             (self.push_str)(REPLACEMENT_CHARACTER)
90         }
91     }
92 }
93