1 use super::*; 2 3 /// A push-based, lossy decoder for UTF-8. 4 /// Errors are replaced with the U+FFFD replacement character. 5 /// 6 /// Users “push” bytes into the decoder, which in turn “pushes” `&str` slices into a callback. 7 /// 8 /// For example, `String::from_utf8_lossy` (but returning `String` instead of `Cow`) 9 /// can be rewritten as: 10 /// 11 /// ```rust 12 /// fn string_from_utf8_lossy(input: &[u8]) -> String { 13 /// let mut string = String::new(); 14 /// utf8::LossyDecoder::new(|s| string.push_str(s)).feed(input); 15 /// string 16 /// } 17 /// ``` 18 /// 19 /// **Note:** Dropping the decoder signals the end of the input: 20 /// If the last input chunk ended with an incomplete byte sequence for a code point, 21 /// this is an error and a replacement character is emitted. 22 /// Use `std::mem::forget` to inhibit this behavior. 23 pub struct LossyDecoder<F: FnMut(&str)> { 24 push_str: F, 25 incomplete: Incomplete, 26 } 27 28 impl<F: FnMut(&str)> LossyDecoder<F> { 29 /// Create a new decoder from a callback. 30 #[inline] new(push_str: F) -> Self31 pub fn new(push_str: F) -> Self { 32 LossyDecoder { 33 push_str: push_str, 34 incomplete: Incomplete { 35 buffer: [0, 0, 0, 0], 36 buffer_len: 0, 37 }, 38 } 39 } 40 41 /// Feed one chunk of input into the decoder. 42 /// 43 /// The input is decoded lossily 44 /// and the callback called once or more with `&str` string slices. 45 /// 46 /// If the UTF-8 byte sequence for one code point was split into this bytes chunk 47 /// and previous bytes chunks, it will be correctly pieced back together. feed(&mut self, mut input: &[u8])48 pub fn feed(&mut self, mut input: &[u8]) { 49 if self.incomplete.buffer_len > 0 { 50 match self.incomplete.try_complete(input) { 51 Some((Ok(s), remaining)) => { 52 (self.push_str)(s); 53 input = remaining 54 } 55 Some((Err(_), remaining)) => { 56 (self.push_str)(REPLACEMENT_CHARACTER); 57 input = remaining 58 } 59 None => { 60 return 61 } 62 } 63 } 64 loop { 65 match decode(input) { 66 Ok(s) => { 67 (self.push_str)(s); 68 return 69 } 70 Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => { 71 (self.push_str)(valid_prefix); 72 self.incomplete = incomplete_suffix; 73 return 74 } 75 Err(DecodeError::Invalid { valid_prefix, remaining_input, .. }) => { 76 (self.push_str)(valid_prefix); 77 (self.push_str)(REPLACEMENT_CHARACTER); 78 input = remaining_input 79 } 80 } 81 } 82 } 83 } 84 85 impl<F: FnMut(&str)> Drop for LossyDecoder<F> { 86 #[inline] drop(&mut self)87 fn drop(&mut self) { 88 if self.incomplete.buffer_len > 0 { 89 (self.push_str)(REPLACEMENT_CHARACTER) 90 } 91 } 92 } 93