1 use std::{
2     fs::File,
3     io::{self, BufRead, Seek},
4     marker::PhantomData,
5     path::Path,
6     result,
7 };
8 
9 use {
10     csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder},
11     serde::de::DeserializeOwned,
12 };
13 
14 use crate::{
15     byte_record::{ByteRecord, Position},
16     error::{Error, ErrorKind, Result, Utf8Error},
17     string_record::StringRecord,
18     {Terminator, Trim},
19 };
20 
21 /// Builds a CSV reader with various configuration knobs.
22 ///
23 /// This builder can be used to tweak the field delimiter, record terminator
24 /// and more. Once a CSV `Reader` is built, its configuration cannot be
25 /// changed.
26 #[derive(Debug)]
27 pub struct ReaderBuilder {
28     capacity: usize,
29     flexible: bool,
30     has_headers: bool,
31     trim: Trim,
32     /// The underlying CSV parser builder.
33     ///
34     /// We explicitly put this on the heap because CoreReaderBuilder embeds an
35     /// entire DFA transition table, which along with other things, tallies up
36     /// to almost 500 bytes on the stack.
37     builder: Box<CoreReaderBuilder>,
38 }
39 
40 impl Default for ReaderBuilder {
default() -> ReaderBuilder41     fn default() -> ReaderBuilder {
42         ReaderBuilder {
43             capacity: 8 * (1 << 10),
44             flexible: false,
45             has_headers: true,
46             trim: Trim::default(),
47             builder: Box::new(CoreReaderBuilder::default()),
48         }
49     }
50 }
51 
52 impl ReaderBuilder {
53     /// Create a new builder for configuring CSV parsing.
54     ///
55     /// To convert a builder into a reader, call one of the methods starting
56     /// with `from_`.
57     ///
58     /// # Example
59     ///
60     /// ```
61     /// use std::error::Error;
62     /// use csv::{ReaderBuilder, StringRecord};
63     ///
64     /// # fn main() { example().unwrap(); }
65     /// fn example() -> Result<(), Box<dyn Error>> {
66     ///     let data = "\
67     /// city,country,pop
68     /// Boston,United States,4628910
69     /// Concord,United States,42695
70     /// ";
71     ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
72     ///
73     ///     let records = rdr
74     ///         .records()
75     ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
76     ///     assert_eq!(records, vec![
77     ///         vec!["Boston", "United States", "4628910"],
78     ///         vec!["Concord", "United States", "42695"],
79     ///     ]);
80     ///     Ok(())
81     /// }
82     /// ```
new() -> ReaderBuilder83     pub fn new() -> ReaderBuilder {
84         ReaderBuilder::default()
85     }
86 
87     /// Build a CSV parser from this configuration that reads data from the
88     /// given file path.
89     ///
90     /// If there was a problem opening the file at the given path, then this
91     /// returns the corresponding error.
92     ///
93     /// # Example
94     ///
95     /// ```no_run
96     /// use std::error::Error;
97     /// use csv::ReaderBuilder;
98     ///
99     /// # fn main() { example().unwrap(); }
100     /// fn example() -> Result<(), Box<dyn Error>> {
101     ///     let mut rdr = ReaderBuilder::new().from_path("foo.csv")?;
102     ///     for result in rdr.records() {
103     ///         let record = result?;
104     ///         println!("{:?}", record);
105     ///     }
106     ///     Ok(())
107     /// }
108     /// ```
from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>>109     pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
110         Ok(Reader::new(self, File::open(path)?))
111     }
112 
113     /// Build a CSV parser from this configuration that reads data from `rdr`.
114     ///
115     /// Note that the CSV reader is buffered automatically, so you should not
116     /// wrap `rdr` in a buffered reader like `io::BufReader`.
117     ///
118     /// # Example
119     ///
120     /// ```
121     /// use std::error::Error;
122     /// use csv::ReaderBuilder;
123     ///
124     /// # fn main() { example().unwrap(); }
125     /// fn example() -> Result<(), Box<dyn Error>> {
126     ///     let data = "\
127     /// city,country,pop
128     /// Boston,United States,4628910
129     /// Concord,United States,42695
130     /// ";
131     ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
132     ///     for result in rdr.records() {
133     ///         let record = result?;
134     ///         println!("{:?}", record);
135     ///     }
136     ///     Ok(())
137     /// }
138     /// ```
from_reader<R: io::Read>(&self, rdr: R) -> Reader<R>139     pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> {
140         Reader::new(self, rdr)
141     }
142 
143     /// The field delimiter to use when parsing CSV.
144     ///
145     /// The default is `b','`.
146     ///
147     /// # Example
148     ///
149     /// ```
150     /// use std::error::Error;
151     /// use csv::ReaderBuilder;
152     ///
153     /// # fn main() { example().unwrap(); }
154     /// fn example() -> Result<(), Box<dyn Error>> {
155     ///     let data = "\
156     /// city;country;pop
157     /// Boston;United States;4628910
158     /// ";
159     ///     let mut rdr = ReaderBuilder::new()
160     ///         .delimiter(b';')
161     ///         .from_reader(data.as_bytes());
162     ///
163     ///     if let Some(result) = rdr.records().next() {
164     ///         let record = result?;
165     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
166     ///         Ok(())
167     ///     } else {
168     ///         Err(From::from("expected at least one record but got none"))
169     ///     }
170     /// }
171     /// ```
delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder172     pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder {
173         self.builder.delimiter(delimiter);
174         self
175     }
176 
177     /// Whether to treat the first row as a special header row.
178     ///
179     /// By default, the first row is treated as a special header row, which
180     /// means the header is never returned by any of the record reading methods
181     /// or iterators. When this is disabled (`yes` set to `false`), the first
182     /// row is not treated specially.
183     ///
184     /// Note that the `headers` and `byte_headers` methods are unaffected by
185     /// whether this is set. Those methods always return the first record.
186     ///
187     /// # Example
188     ///
189     /// This example shows what happens when `has_headers` is disabled.
190     /// Namely, the first row is treated just like any other row.
191     ///
192     /// ```
193     /// use std::error::Error;
194     /// use csv::ReaderBuilder;
195     ///
196     /// # fn main() { example().unwrap(); }
197     /// fn example() -> Result<(), Box<dyn Error>> {
198     ///     let data = "\
199     /// city,country,pop
200     /// Boston,United States,4628910
201     /// ";
202     ///     let mut rdr = ReaderBuilder::new()
203     ///         .has_headers(false)
204     ///         .from_reader(data.as_bytes());
205     ///     let mut iter = rdr.records();
206     ///
207     ///     // Read the first record.
208     ///     if let Some(result) = iter.next() {
209     ///         let record = result?;
210     ///         assert_eq!(record, vec!["city", "country", "pop"]);
211     ///     } else {
212     ///         return Err(From::from(
213     ///             "expected at least two records but got none"));
214     ///     }
215     ///
216     ///     // Read the second record.
217     ///     if let Some(result) = iter.next() {
218     ///         let record = result?;
219     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
220     ///     } else {
221     ///         return Err(From::from(
222     ///             "expected at least two records but got one"))
223     ///     }
224     ///     Ok(())
225     /// }
226     /// ```
has_headers(&mut self, yes: bool) -> &mut ReaderBuilder227     pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder {
228         self.has_headers = yes;
229         self
230     }
231 
232     /// Whether the number of fields in records is allowed to change or not.
233     ///
234     /// When disabled (which is the default), parsing CSV data will return an
235     /// error if a record is found with a number of fields different from the
236     /// number of fields in a previous record.
237     ///
238     /// When enabled, this error checking is turned off.
239     ///
240     /// # Example: flexible records enabled
241     ///
242     /// ```
243     /// use std::error::Error;
244     /// use csv::ReaderBuilder;
245     ///
246     /// # fn main() { example().unwrap(); }
247     /// fn example() -> Result<(), Box<dyn Error>> {
248     ///     // Notice that the first row is missing the population count.
249     ///     let data = "\
250     /// city,country,pop
251     /// Boston,United States
252     /// ";
253     ///     let mut rdr = ReaderBuilder::new()
254     ///         .flexible(true)
255     ///         .from_reader(data.as_bytes());
256     ///
257     ///     if let Some(result) = rdr.records().next() {
258     ///         let record = result?;
259     ///         assert_eq!(record, vec!["Boston", "United States"]);
260     ///         Ok(())
261     ///     } else {
262     ///         Err(From::from("expected at least one record but got none"))
263     ///     }
264     /// }
265     /// ```
266     ///
267     /// # Example: flexible records disabled
268     ///
269     /// This shows the error that appears when records of unequal length
270     /// are found and flexible records have been disabled (which is the
271     /// default).
272     ///
273     /// ```
274     /// use std::error::Error;
275     /// use csv::{ErrorKind, ReaderBuilder};
276     ///
277     /// # fn main() { example().unwrap(); }
278     /// fn example() -> Result<(), Box<dyn Error>> {
279     ///     // Notice that the first row is missing the population count.
280     ///     let data = "\
281     /// city,country,pop
282     /// Boston,United States
283     /// ";
284     ///     let mut rdr = ReaderBuilder::new()
285     ///         .flexible(false)
286     ///         .from_reader(data.as_bytes());
287     ///
288     ///     if let Some(Err(err)) = rdr.records().next() {
289     ///         match *err.kind() {
290     ///             ErrorKind::UnequalLengths { expected_len, len, .. } => {
291     ///                 // The header row has 3 fields...
292     ///                 assert_eq!(expected_len, 3);
293     ///                 // ... but the first row has only 2 fields.
294     ///                 assert_eq!(len, 2);
295     ///                 Ok(())
296     ///             }
297     ///             ref wrong => {
298     ///                 Err(From::from(format!(
299     ///                     "expected UnequalLengths error but got {:?}",
300     ///                     wrong)))
301     ///             }
302     ///         }
303     ///     } else {
304     ///         Err(From::from(
305     ///             "expected at least one errored record but got none"))
306     ///     }
307     /// }
308     /// ```
flexible(&mut self, yes: bool) -> &mut ReaderBuilder309     pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder {
310         self.flexible = yes;
311         self
312     }
313 
314     /// Whether fields are trimmed of leading and trailing whitespace or not.
315     ///
316     /// By default, no trimming is performed. This method permits one to
317     /// override that behavior and choose one of the following options:
318     ///
319     /// 1. `Trim::Headers` trims only header values.
320     /// 2. `Trim::Fields` trims only non-header or "field" values.
321     /// 3. `Trim::All` trims both header and non-header values.
322     ///
323     /// A value is only interpreted as a header value if this CSV reader is
324     /// configured to read a header record (which is the default).
325     ///
326     /// When reading string records, characters meeting the definition of
327     /// Unicode whitespace are trimmed. When reading byte records, characters
328     /// meeting the definition of ASCII whitespace are trimmed. ASCII
329     /// whitespace characters correspond to the set `[\t\n\v\f\r ]`.
330     ///
331     /// # Example
332     ///
333     /// This example shows what happens when all values are trimmed.
334     ///
335     /// ```
336     /// use std::error::Error;
337     /// use csv::{ReaderBuilder, StringRecord, Trim};
338     ///
339     /// # fn main() { example().unwrap(); }
340     /// fn example() -> Result<(), Box<dyn Error>> {
341     ///     let data = "\
342     /// city ,   country ,  pop
343     /// Boston,\"
344     ///    United States\",4628910
345     /// Concord,   United States   ,42695
346     /// ";
347     ///     let mut rdr = ReaderBuilder::new()
348     ///         .trim(Trim::All)
349     ///         .from_reader(data.as_bytes());
350     ///     let records = rdr
351     ///         .records()
352     ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
353     ///     assert_eq!(records, vec![
354     ///         vec!["Boston", "United States", "4628910"],
355     ///         vec!["Concord", "United States", "42695"],
356     ///     ]);
357     ///     Ok(())
358     /// }
359     /// ```
trim(&mut self, trim: Trim) -> &mut ReaderBuilder360     pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder {
361         self.trim = trim;
362         self
363     }
364 
365     /// The record terminator to use when parsing CSV.
366     ///
367     /// A record terminator can be any single byte. The default is a special
368     /// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n`
369     /// or `\r\n` as a single record terminator.
370     ///
371     /// # Example: `$` as a record terminator
372     ///
373     /// ```
374     /// use std::error::Error;
375     /// use csv::{ReaderBuilder, Terminator};
376     ///
377     /// # fn main() { example().unwrap(); }
378     /// fn example() -> Result<(), Box<dyn Error>> {
379     ///     let data = "city,country,pop$Boston,United States,4628910";
380     ///     let mut rdr = ReaderBuilder::new()
381     ///         .terminator(Terminator::Any(b'$'))
382     ///         .from_reader(data.as_bytes());
383     ///
384     ///     if let Some(result) = rdr.records().next() {
385     ///         let record = result?;
386     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
387     ///         Ok(())
388     ///     } else {
389     ///         Err(From::from("expected at least one record but got none"))
390     ///     }
391     /// }
392     /// ```
terminator(&mut self, term: Terminator) -> &mut ReaderBuilder393     pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder {
394         self.builder.terminator(term.to_core());
395         self
396     }
397 
398     /// The quote character to use when parsing CSV.
399     ///
400     /// The default is `b'"'`.
401     ///
402     /// # Example: single quotes instead of double quotes
403     ///
404     /// ```
405     /// use std::error::Error;
406     /// use csv::ReaderBuilder;
407     ///
408     /// # fn main() { example().unwrap(); }
409     /// fn example() -> Result<(), Box<dyn Error>> {
410     ///     let data = "\
411     /// city,country,pop
412     /// Boston,'United States',4628910
413     /// ";
414     ///     let mut rdr = ReaderBuilder::new()
415     ///         .quote(b'\'')
416     ///         .from_reader(data.as_bytes());
417     ///
418     ///     if let Some(result) = rdr.records().next() {
419     ///         let record = result?;
420     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
421     ///         Ok(())
422     ///     } else {
423     ///         Err(From::from("expected at least one record but got none"))
424     ///     }
425     /// }
426     /// ```
quote(&mut self, quote: u8) -> &mut ReaderBuilder427     pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder {
428         self.builder.quote(quote);
429         self
430     }
431 
432     /// The escape character to use when parsing CSV.
433     ///
434     /// In some variants of CSV, quotes are escaped using a special escape
435     /// character like `\` (instead of escaping quotes by doubling them).
436     ///
437     /// By default, recognizing these idiosyncratic escapes is disabled.
438     ///
439     /// # Example
440     ///
441     /// ```
442     /// use std::error::Error;
443     /// use csv::ReaderBuilder;
444     ///
445     /// # fn main() { example().unwrap(); }
446     /// fn example() -> Result<(), Box<dyn Error>> {
447     ///     let data = "\
448     /// city,country,pop
449     /// Boston,\"The \\\"United\\\" States\",4628910
450     /// ";
451     ///     let mut rdr = ReaderBuilder::new()
452     ///         .escape(Some(b'\\'))
453     ///         .from_reader(data.as_bytes());
454     ///
455     ///     if let Some(result) = rdr.records().next() {
456     ///         let record = result?;
457     ///         assert_eq!(record, vec![
458     ///             "Boston", "The \"United\" States", "4628910",
459     ///         ]);
460     ///         Ok(())
461     ///     } else {
462     ///         Err(From::from("expected at least one record but got none"))
463     ///     }
464     /// }
465     /// ```
escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder466     pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder {
467         self.builder.escape(escape);
468         self
469     }
470 
471     /// Enable double quote escapes.
472     ///
473     /// This is enabled by default, but it may be disabled. When disabled,
474     /// doubled quotes are not interpreted as escapes.
475     ///
476     /// # Example
477     ///
478     /// ```
479     /// use std::error::Error;
480     /// use csv::ReaderBuilder;
481     ///
482     /// # fn main() { example().unwrap(); }
483     /// fn example() -> Result<(), Box<dyn Error>> {
484     ///     let data = "\
485     /// city,country,pop
486     /// Boston,\"The \"\"United\"\" States\",4628910
487     /// ";
488     ///     let mut rdr = ReaderBuilder::new()
489     ///         .double_quote(false)
490     ///         .from_reader(data.as_bytes());
491     ///
492     ///     if let Some(result) = rdr.records().next() {
493     ///         let record = result?;
494     ///         assert_eq!(record, vec![
495     ///             "Boston", "The \"United\"\" States\"", "4628910",
496     ///         ]);
497     ///         Ok(())
498     ///     } else {
499     ///         Err(From::from("expected at least one record but got none"))
500     ///     }
501     /// }
502     /// ```
double_quote(&mut self, yes: bool) -> &mut ReaderBuilder503     pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder {
504         self.builder.double_quote(yes);
505         self
506     }
507 
508     /// Enable or disable quoting.
509     ///
510     /// This is enabled by default, but it may be disabled. When disabled,
511     /// quotes are not treated specially.
512     ///
513     /// # Example
514     ///
515     /// ```
516     /// use std::error::Error;
517     /// use csv::ReaderBuilder;
518     ///
519     /// # fn main() { example().unwrap(); }
520     /// fn example() -> Result<(), Box<dyn Error>> {
521     ///     let data = "\
522     /// city,country,pop
523     /// Boston,\"The United States,4628910
524     /// ";
525     ///     let mut rdr = ReaderBuilder::new()
526     ///         .quoting(false)
527     ///         .from_reader(data.as_bytes());
528     ///
529     ///     if let Some(result) = rdr.records().next() {
530     ///         let record = result?;
531     ///         assert_eq!(record, vec![
532     ///             "Boston", "\"The United States", "4628910",
533     ///         ]);
534     ///         Ok(())
535     ///     } else {
536     ///         Err(From::from("expected at least one record but got none"))
537     ///     }
538     /// }
539     /// ```
quoting(&mut self, yes: bool) -> &mut ReaderBuilder540     pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder {
541         self.builder.quoting(yes);
542         self
543     }
544 
545     /// The comment character to use when parsing CSV.
546     ///
547     /// If the start of a record begins with the byte given here, then that
548     /// line is ignored by the CSV parser.
549     ///
550     /// This is disabled by default.
551     ///
552     /// # Example
553     ///
554     /// ```
555     /// use std::error::Error;
556     /// use csv::ReaderBuilder;
557     ///
558     /// # fn main() { example().unwrap(); }
559     /// fn example() -> Result<(), Box<dyn Error>> {
560     ///     let data = "\
561     /// city,country,pop
562     /// #Concord,United States,42695
563     /// Boston,United States,4628910
564     /// ";
565     ///     let mut rdr = ReaderBuilder::new()
566     ///         .comment(Some(b'#'))
567     ///         .from_reader(data.as_bytes());
568     ///
569     ///     if let Some(result) = rdr.records().next() {
570     ///         let record = result?;
571     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
572     ///         Ok(())
573     ///     } else {
574     ///         Err(From::from("expected at least one record but got none"))
575     ///     }
576     /// }
577     /// ```
comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder578     pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder {
579         self.builder.comment(comment);
580         self
581     }
582 
583     /// A convenience method for specifying a configuration to read ASCII
584     /// delimited text.
585     ///
586     /// This sets the delimiter and record terminator to the ASCII unit
587     /// separator (`\x1F`) and record separator (`\x1E`), respectively.
588     ///
589     /// # Example
590     ///
591     /// ```
592     /// use std::error::Error;
593     /// use csv::ReaderBuilder;
594     ///
595     /// # fn main() { example().unwrap(); }
596     /// fn example() -> Result<(), Box<dyn Error>> {
597     ///     let data = "\
598     /// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910";
599     ///     let mut rdr = ReaderBuilder::new()
600     ///         .ascii()
601     ///         .from_reader(data.as_bytes());
602     ///
603     ///     if let Some(result) = rdr.records().next() {
604     ///         let record = result?;
605     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
606     ///         Ok(())
607     ///     } else {
608     ///         Err(From::from("expected at least one record but got none"))
609     ///     }
610     /// }
611     /// ```
ascii(&mut self) -> &mut ReaderBuilder612     pub fn ascii(&mut self) -> &mut ReaderBuilder {
613         self.builder.ascii();
614         self
615     }
616 
617     /// Set the capacity (in bytes) of the buffer used in the CSV reader.
618     /// This defaults to a reasonable setting.
buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder619     pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder {
620         self.capacity = capacity;
621         self
622     }
623 
624     /// Enable or disable the NFA for parsing CSV.
625     ///
626     /// This is intended to be a debug option. The NFA is always slower than
627     /// the DFA.
628     #[doc(hidden)]
nfa(&mut self, yes: bool) -> &mut ReaderBuilder629     pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder {
630         self.builder.nfa(yes);
631         self
632     }
633 }
634 
635 /// A already configured CSV reader.
636 ///
637 /// A CSV reader takes as input CSV data and transforms that into standard Rust
638 /// values. The most flexible way to read CSV data is as a sequence of records,
639 /// where a record is a sequence of fields and each field is a string. However,
640 /// a reader can also deserialize CSV data into Rust types like `i64` or
641 /// `(String, f64, f64, f64)` or even a custom struct automatically using
642 /// Serde.
643 ///
644 /// # Configuration
645 ///
646 /// A CSV reader has a couple convenient constructor methods like `from_path`
647 /// and `from_reader`. However, if you want to configure the CSV reader to use
648 /// a different delimiter or quote character (among many other things), then
649 /// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct
650 /// a `Reader`. For example, to change the field delimiter:
651 ///
652 /// ```
653 /// use std::error::Error;
654 /// use csv::ReaderBuilder;
655 ///
656 /// # fn main() { example().unwrap(); }
657 /// fn example() -> Result<(), Box<dyn Error>> {
658 ///     let data = "\
659 /// city;country;pop
660 /// Boston;United States;4628910
661 /// ";
662 ///     let mut rdr = ReaderBuilder::new()
663 ///         .delimiter(b';')
664 ///         .from_reader(data.as_bytes());
665 ///
666 ///     if let Some(result) = rdr.records().next() {
667 ///         let record = result?;
668 ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
669 ///         Ok(())
670 ///     } else {
671 ///         Err(From::from("expected at least one record but got none"))
672 ///     }
673 /// }
674 /// ```
675 ///
676 /// # Error handling
677 ///
678 /// In general, CSV *parsing* does not ever return an error. That is, there is
679 /// no such thing as malformed CSV data. Instead, this reader will prioritize
680 /// finding a parse over rejecting CSV data that it does not understand. This
681 /// choice was inspired by other popular CSV parsers, but also because it is
682 /// pragmatic. CSV data varies wildly, so even if the CSV data is malformed,
683 /// it might still be possible to work with the data. In the land of CSV, there
684 /// is no "right" or "wrong," only "right" and "less right."
685 ///
686 /// With that said, a number of errors can occur while reading CSV data:
687 ///
688 /// * By default, all records in CSV data must have the same number of fields.
689 ///   If a record is found with a different number of fields than a prior
690 ///   record, then an error is returned. This behavior can be disabled by
691 ///   enabling flexible parsing via the `flexible` method on
692 ///   [`ReaderBuilder`](struct.ReaderBuilder.html).
693 /// * When reading CSV data from a resource (like a file), it is possible for
694 ///   reading from the underlying resource to fail. This will return an error.
695 ///   For subsequent calls to the `Reader` after encountering a such error
696 ///   (unless `seek` is used), it will behave as if end of file had been
697 ///   reached, in order to avoid running into infinite loops when still
698 ///   attempting to read the next record when one has errored.
699 /// * When reading CSV data into `String` or `&str` fields (e.g., via a
700 ///   [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly
701 ///   enforced. If CSV data is invalid UTF-8, then an error is returned. If
702 ///   you want to read invalid UTF-8, then you should use the byte oriented
703 ///   APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit
704 ///   support for another encoding entirely, then you'll need to use another
705 ///   crate to transcode your CSV data to UTF-8 before parsing it.
706 /// * When using Serde to deserialize CSV data into Rust types, it is possible
707 ///   for a number of additional errors to occur. For example, deserializing
708 ///   a field `xyz` into an `i32` field will result in an error.
709 ///
710 /// For more details on the precise semantics of errors, see the
711 /// [`Error`](enum.Error.html) type.
712 #[derive(Debug)]
713 pub struct Reader<R> {
714     /// The underlying CSV parser.
715     ///
716     /// We explicitly put this on the heap because CoreReader embeds an entire
717     /// DFA transition table, which along with other things, tallies up to
718     /// almost 500 bytes on the stack.
719     core: Box<CoreReader>,
720     /// The underlying reader.
721     rdr: io::BufReader<R>,
722     /// Various state tracking.
723     ///
724     /// There is more state embedded in the `CoreReader`.
725     state: ReaderState,
726 }
727 
728 #[derive(Debug)]
729 struct ReaderState {
730     /// When set, this contains the first row of any parsed CSV data.
731     ///
732     /// This is always populated, regardless of whether `has_headers` is set.
733     headers: Option<Headers>,
734     /// When set, the first row of parsed CSV data is excluded from things
735     /// that read records, like iterators and `read_record`.
736     has_headers: bool,
737     /// When set, there is no restriction on the length of records. When not
738     /// set, every record must have the same number of fields, or else an error
739     /// is reported.
740     flexible: bool,
741     trim: Trim,
742     /// The number of fields in the first record parsed.
743     first_field_count: Option<u64>,
744     /// The current position of the parser.
745     ///
746     /// Note that this position is only observable by callers at the start
747     /// of a record. More granular positions are not supported.
748     cur_pos: Position,
749     /// Whether the first record has been read or not.
750     first: bool,
751     /// Whether the reader has been seeked or not.
752     seeked: bool,
753     /// Whether EOF of the underlying reader has been reached or not.
754     ///
755     /// IO errors on the underlying reader will be considered as an EOF for
756     /// subsequent read attempts, as it would be incorrect to keep on trying
757     /// to read when the underlying reader has broken.
758     ///
759     /// For clarity, having the best `Debug` impl and in case they need to be
760     /// treated differently at some point, we store whether the `EOF` is
761     /// considered because an actual EOF happened, or because we encoundered
762     /// an IO error.
763     /// This has no additional runtime cost.
764     eof: ReaderEofState,
765 }
766 
767 /// Whether EOF of the underlying reader has been reached or not.
768 ///
769 /// IO errors on the underlying reader will be considered as an EOF for
770 /// subsequent read attempts, as it would be incorrect to keep on trying
771 /// to read when the underlying reader has broken.
772 ///
773 /// For clarity, having the best `Debug` impl and in case they need to be
774 /// treated differently at some point, we store whether the `EOF` is
775 /// considered because an actual EOF happened, or because we encoundered
776 /// an IO error
777 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
778 enum ReaderEofState {
779     NotEof,
780     Eof,
781     IOError,
782 }
783 
784 /// Headers encapsulates any data associated with the headers of CSV data.
785 ///
786 /// The headers always correspond to the first row.
787 #[derive(Debug)]
788 struct Headers {
789     /// The header, as raw bytes.
790     byte_record: ByteRecord,
791     /// The header, as valid UTF-8 (or a UTF-8 error).
792     string_record: result::Result<StringRecord, Utf8Error>,
793 }
794 
795 impl Reader<Reader<File>> {
796     /// Create a new CSV parser with a default configuration for the given
797     /// file path.
798     ///
799     /// To customize CSV parsing, use a `ReaderBuilder`.
800     ///
801     /// # Example
802     ///
803     /// ```no_run
804     /// use std::error::Error;
805     /// use csv::Reader;
806     ///
807     /// # fn main() { example().unwrap(); }
808     /// fn example() -> Result<(), Box<dyn Error>> {
809     ///     let mut rdr = Reader::from_path("foo.csv")?;
810     ///     for result in rdr.records() {
811     ///         let record = result?;
812     ///         println!("{:?}", record);
813     ///     }
814     ///     Ok(())
815     /// }
816     /// ```
from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>>817     pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> {
818         ReaderBuilder::new().from_path(path)
819     }
820 }
821 
822 impl<R: io::Read> Reader<R> {
823     /// Create a new CSV reader given a builder and a source of underlying
824     /// bytes.
new(builder: &ReaderBuilder, rdr: R) -> Reader<R>825     fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> {
826         Reader {
827             core: Box::new(builder.builder.build()),
828             rdr: io::BufReader::with_capacity(builder.capacity, rdr),
829             state: ReaderState {
830                 headers: None,
831                 has_headers: builder.has_headers,
832                 flexible: builder.flexible,
833                 trim: builder.trim,
834                 first_field_count: None,
835                 cur_pos: Position::new(),
836                 first: false,
837                 seeked: false,
838                 eof: ReaderEofState::NotEof,
839             },
840         }
841     }
842 
843     /// Create a new CSV parser with a default configuration for the given
844     /// reader.
845     ///
846     /// To customize CSV parsing, use a `ReaderBuilder`.
847     ///
848     /// # Example
849     ///
850     /// ```
851     /// use std::error::Error;
852     /// use csv::Reader;
853     ///
854     /// # fn main() { example().unwrap(); }
855     /// fn example() -> Result<(), Box<dyn Error>> {
856     ///     let data = "\
857     /// city,country,pop
858     /// Boston,United States,4628910
859     /// Concord,United States,42695
860     /// ";
861     ///     let mut rdr = Reader::from_reader(data.as_bytes());
862     ///     for result in rdr.records() {
863     ///         let record = result?;
864     ///         println!("{:?}", record);
865     ///     }
866     ///     Ok(())
867     /// }
868     /// ```
from_reader(rdr: R) -> Reader<R>869     pub fn from_reader(rdr: R) -> Reader<R> {
870         ReaderBuilder::new().from_reader(rdr)
871     }
872 
873     /// Returns a borrowed iterator over deserialized records.
874     ///
875     /// Each item yielded by this iterator is a `Result<D, Error>`.
876     /// Therefore, in order to access the record, callers must handle the
877     /// possibility of error (typically with `try!` or `?`).
878     ///
879     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
880     /// default), then this does not include the first record. Additionally,
881     /// if `has_headers` is enabled, then deserializing into a struct will
882     /// automatically align the values in each row to the fields of a struct
883     /// based on the header row.
884     ///
885     /// # Example
886     ///
887     /// This shows how to deserialize CSV data into normal Rust structs. The
888     /// fields of the header row are used to match up the values in each row
889     /// to the fields of the struct.
890     ///
891     /// ```
892     /// use std::error::Error;
893     ///
894     /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
895     /// struct Row {
896     ///     city: String,
897     ///     country: String,
898     ///     #[serde(rename = "popcount")]
899     ///     population: u64,
900     /// }
901     ///
902     /// # fn main() { example().unwrap(); }
903     /// fn example() -> Result<(), Box<dyn Error>> {
904     ///     let data = "\
905     /// city,country,popcount
906     /// Boston,United States,4628910
907     /// ";
908     ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
909     ///     let mut iter = rdr.deserialize();
910     ///
911     ///     if let Some(result) = iter.next() {
912     ///         let record: Row = result?;
913     ///         assert_eq!(record, Row {
914     ///             city: "Boston".to_string(),
915     ///             country: "United States".to_string(),
916     ///             population: 4628910,
917     ///         });
918     ///         Ok(())
919     ///     } else {
920     ///         Err(From::from("expected at least one record but got none"))
921     ///     }
922     /// }
923     /// ```
924     ///
925     /// # Rules
926     ///
927     /// For the most part, any Rust type that maps straight-forwardly to a CSV
928     /// record is supported. This includes maps, structs, tuples and tuple
929     /// structs. Other Rust types, such as `Vec`s, arrays, and enums have
930     /// a more complicated story. In general, when working with CSV data, one
931     /// should avoid *nested sequences* as much as possible.
932     ///
933     /// Maps, structs, tuples and tuple structs map to CSV records in a simple
934     /// way. Tuples and tuple structs decode their fields in the order that
935     /// they are defined. Structs will do the same only if `has_headers` has
936     /// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html),
937     /// otherwise, structs and maps are deserialized based on the fields
938     /// defined in the header row. (If there is no header row, then
939     /// deserializing into a map will result in an error.)
940     ///
941     /// Nested sequences are supported in a limited capacity. Namely, they
942     /// are flattened. As a result, it's often useful to use a `Vec` to capture
943     /// a "tail" of fields in a record:
944     ///
945     /// ```
946     /// use std::error::Error;
947     ///
948     /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
949     /// struct Row {
950     ///     label: String,
951     ///     values: Vec<i32>,
952     /// }
953     ///
954     /// # fn main() { example().unwrap(); }
955     /// fn example() -> Result<(), Box<dyn Error>> {
956     ///     let data = "foo,1,2,3";
957     ///     let mut rdr = csv::ReaderBuilder::new()
958     ///         .has_headers(false)
959     ///         .from_reader(data.as_bytes());
960     ///     let mut iter = rdr.deserialize();
961     ///
962     ///     if let Some(result) = iter.next() {
963     ///         let record: Row = result?;
964     ///         assert_eq!(record, Row {
965     ///             label: "foo".to_string(),
966     ///             values: vec![1, 2, 3],
967     ///         });
968     ///         Ok(())
969     ///     } else {
970     ///         Err(From::from("expected at least one record but got none"))
971     ///     }
972     /// }
973     /// ```
974     ///
975     /// In the above example, adding another field to the `Row` struct after
976     /// the `values` field will result in a deserialization error. This is
977     /// because the deserializer doesn't know when to stop reading fields
978     /// into the `values` vector, so it will consume the rest of the fields in
979     /// the record leaving none left over for the additional field.
980     ///
981     /// Finally, simple enums in Rust can be deserialized as well. Namely,
982     /// enums must either be variants with no arguments or variants with a
983     /// single argument. Variants with no arguments are deserialized based on
984     /// which variant name the field matches. Variants with a single argument
985     /// are deserialized based on which variant can store the data. The latter
986     /// is only supported when using "untagged" enum deserialization. The
987     /// following example shows both forms in action:
988     ///
989     /// ```
990     /// use std::error::Error;
991     ///
992     /// #[derive(Debug, serde::Deserialize, PartialEq)]
993     /// struct Row {
994     ///     label: Label,
995     ///     value: Number,
996     /// }
997     ///
998     /// #[derive(Debug, serde::Deserialize, PartialEq)]
999     /// #[serde(rename_all = "lowercase")]
1000     /// enum Label {
1001     ///     Celsius,
1002     ///     Fahrenheit,
1003     /// }
1004     ///
1005     /// #[derive(Debug, serde::Deserialize, PartialEq)]
1006     /// #[serde(untagged)]
1007     /// enum Number {
1008     ///     Integer(i64),
1009     ///     Float(f64),
1010     /// }
1011     ///
1012     /// # fn main() { example().unwrap(); }
1013     /// fn example() -> Result<(), Box<dyn Error>> {
1014     ///     let data = "\
1015     /// label,value
1016     /// celsius,22.2222
1017     /// fahrenheit,72
1018     /// ";
1019     ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
1020     ///     let mut iter = rdr.deserialize();
1021     ///
1022     ///     // Read the first record.
1023     ///     if let Some(result) = iter.next() {
1024     ///         let record: Row = result?;
1025     ///         assert_eq!(record, Row {
1026     ///             label: Label::Celsius,
1027     ///             value: Number::Float(22.2222),
1028     ///         });
1029     ///     } else {
1030     ///         return Err(From::from(
1031     ///             "expected at least two records but got none"));
1032     ///     }
1033     ///
1034     ///     // Read the second record.
1035     ///     if let Some(result) = iter.next() {
1036     ///         let record: Row = result?;
1037     ///         assert_eq!(record, Row {
1038     ///             label: Label::Fahrenheit,
1039     ///             value: Number::Integer(72),
1040     ///         });
1041     ///         Ok(())
1042     ///     } else {
1043     ///         Err(From::from(
1044     ///             "expected at least two records but got only one"))
1045     ///     }
1046     /// }
1047     /// ```
deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D> where D: DeserializeOwned,1048     pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D>
1049     where
1050         D: DeserializeOwned,
1051     {
1052         DeserializeRecordsIter::new(self)
1053     }
1054 
1055     /// Returns an owned iterator over deserialized records.
1056     ///
1057     /// Each item yielded by this iterator is a `Result<D, Error>`.
1058     /// Therefore, in order to access the record, callers must handle the
1059     /// possibility of error (typically with `try!` or `?`).
1060     ///
1061     /// This is mostly useful when you want to return a CSV iterator or store
1062     /// it somewhere.
1063     ///
1064     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1065     /// default), then this does not include the first record. Additionally,
1066     /// if `has_headers` is enabled, then deserializing into a struct will
1067     /// automatically align the values in each row to the fields of a struct
1068     /// based on the header row.
1069     ///
1070     /// For more detailed deserialization rules, see the documentation on the
1071     /// `deserialize` method.
1072     ///
1073     /// # Example
1074     ///
1075     /// ```
1076     /// use std::error::Error;
1077     ///
1078     /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
1079     /// struct Row {
1080     ///     city: String,
1081     ///     country: String,
1082     ///     #[serde(rename = "popcount")]
1083     ///     population: u64,
1084     /// }
1085     ///
1086     /// # fn main() { example().unwrap(); }
1087     /// fn example() -> Result<(), Box<dyn Error>> {
1088     ///     let data = "\
1089     /// city,country,popcount
1090     /// Boston,United States,4628910
1091     /// ";
1092     ///     let rdr = csv::Reader::from_reader(data.as_bytes());
1093     ///     let mut iter = rdr.into_deserialize();
1094     ///
1095     ///     if let Some(result) = iter.next() {
1096     ///         let record: Row = result?;
1097     ///         assert_eq!(record, Row {
1098     ///             city: "Boston".to_string(),
1099     ///             country: "United States".to_string(),
1100     ///             population: 4628910,
1101     ///         });
1102     ///         Ok(())
1103     ///     } else {
1104     ///         Err(From::from("expected at least one record but got none"))
1105     ///     }
1106     /// }
1107     /// ```
into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D> where D: DeserializeOwned,1108     pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D>
1109     where
1110         D: DeserializeOwned,
1111     {
1112         DeserializeRecordsIntoIter::new(self)
1113     }
1114 
1115     /// Returns a borrowed iterator over all records as strings.
1116     ///
1117     /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
1118     /// Therefore, in order to access the record, callers must handle the
1119     /// possibility of error (typically with `try!` or `?`).
1120     ///
1121     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1122     /// default), then this does not include the first record.
1123     ///
1124     /// # Example
1125     ///
1126     /// ```
1127     /// use std::error::Error;
1128     /// use csv::Reader;
1129     ///
1130     /// # fn main() { example().unwrap(); }
1131     /// fn example() -> Result<(), Box<dyn Error>> {
1132     ///     let data = "\
1133     /// city,country,pop
1134     /// Boston,United States,4628910
1135     /// ";
1136     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1137     ///     let mut iter = rdr.records();
1138     ///
1139     ///     if let Some(result) = iter.next() {
1140     ///         let record = result?;
1141     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1142     ///         Ok(())
1143     ///     } else {
1144     ///         Err(From::from("expected at least one record but got none"))
1145     ///     }
1146     /// }
1147     /// ```
records(&mut self) -> StringRecordsIter<R>1148     pub fn records(&mut self) -> StringRecordsIter<R> {
1149         StringRecordsIter::new(self)
1150     }
1151 
1152     /// Returns an owned iterator over all records as strings.
1153     ///
1154     /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
1155     /// Therefore, in order to access the record, callers must handle the
1156     /// possibility of error (typically with `try!` or `?`).
1157     ///
1158     /// This is mostly useful when you want to return a CSV iterator or store
1159     /// it somewhere.
1160     ///
1161     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1162     /// default), then this does not include the first record.
1163     ///
1164     /// # Example
1165     ///
1166     /// ```
1167     /// use std::error::Error;
1168     /// use csv::Reader;
1169     ///
1170     /// # fn main() { example().unwrap(); }
1171     /// fn example() -> Result<(), Box<dyn Error>> {
1172     ///     let data = "\
1173     /// city,country,pop
1174     /// Boston,United States,4628910
1175     /// ";
1176     ///     let rdr = Reader::from_reader(data.as_bytes());
1177     ///     let mut iter = rdr.into_records();
1178     ///
1179     ///     if let Some(result) = iter.next() {
1180     ///         let record = result?;
1181     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1182     ///         Ok(())
1183     ///     } else {
1184     ///         Err(From::from("expected at least one record but got none"))
1185     ///     }
1186     /// }
1187     /// ```
into_records(self) -> StringRecordsIntoIter<R>1188     pub fn into_records(self) -> StringRecordsIntoIter<R> {
1189         StringRecordsIntoIter::new(self)
1190     }
1191 
1192     /// Returns a borrowed iterator over all records as raw bytes.
1193     ///
1194     /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
1195     /// Therefore, in order to access the record, callers must handle the
1196     /// possibility of error (typically with `try!` or `?`).
1197     ///
1198     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1199     /// default), then this does not include the first record.
1200     ///
1201     /// # Example
1202     ///
1203     /// ```
1204     /// use std::error::Error;
1205     /// use csv::Reader;
1206     ///
1207     /// # fn main() { example().unwrap(); }
1208     /// fn example() -> Result<(), Box<dyn Error>> {
1209     ///     let data = "\
1210     /// city,country,pop
1211     /// Boston,United States,4628910
1212     /// ";
1213     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1214     ///     let mut iter = rdr.byte_records();
1215     ///
1216     ///     if let Some(result) = iter.next() {
1217     ///         let record = result?;
1218     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1219     ///         Ok(())
1220     ///     } else {
1221     ///         Err(From::from("expected at least one record but got none"))
1222     ///     }
1223     /// }
1224     /// ```
byte_records(&mut self) -> ByteRecordsIter<R>1225     pub fn byte_records(&mut self) -> ByteRecordsIter<R> {
1226         ByteRecordsIter::new(self)
1227     }
1228 
1229     /// Returns an owned iterator over all records as raw bytes.
1230     ///
1231     /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
1232     /// Therefore, in order to access the record, callers must handle the
1233     /// possibility of error (typically with `try!` or `?`).
1234     ///
1235     /// This is mostly useful when you want to return a CSV iterator or store
1236     /// it somewhere.
1237     ///
1238     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1239     /// default), then this does not include the first record.
1240     ///
1241     /// # Example
1242     ///
1243     /// ```
1244     /// use std::error::Error;
1245     /// use csv::Reader;
1246     ///
1247     /// # fn main() { example().unwrap(); }
1248     /// fn example() -> Result<(), Box<dyn Error>> {
1249     ///     let data = "\
1250     /// city,country,pop
1251     /// Boston,United States,4628910
1252     /// ";
1253     ///     let rdr = Reader::from_reader(data.as_bytes());
1254     ///     let mut iter = rdr.into_byte_records();
1255     ///
1256     ///     if let Some(result) = iter.next() {
1257     ///         let record = result?;
1258     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1259     ///         Ok(())
1260     ///     } else {
1261     ///         Err(From::from("expected at least one record but got none"))
1262     ///     }
1263     /// }
1264     /// ```
into_byte_records(self) -> ByteRecordsIntoIter<R>1265     pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> {
1266         ByteRecordsIntoIter::new(self)
1267     }
1268 
1269     /// Returns a reference to the first row read by this parser.
1270     ///
1271     /// If no row has been read yet, then this will force parsing of the first
1272     /// row.
1273     ///
1274     /// If there was a problem parsing the row or if it wasn't valid UTF-8,
1275     /// then this returns an error.
1276     ///
1277     /// If the underlying reader emits EOF before any data, then this returns
1278     /// an empty record.
1279     ///
1280     /// Note that this method may be used regardless of whether `has_headers`
1281     /// was enabled (but it is enabled by default).
1282     ///
1283     /// # Example
1284     ///
1285     /// This example shows how to get the header row of CSV data. Notice that
1286     /// the header row does not appear as a record in the iterator!
1287     ///
1288     /// ```
1289     /// use std::error::Error;
1290     /// use csv::Reader;
1291     ///
1292     /// # fn main() { example().unwrap(); }
1293     /// fn example() -> Result<(), Box<dyn Error>> {
1294     ///     let data = "\
1295     /// city,country,pop
1296     /// Boston,United States,4628910
1297     /// ";
1298     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1299     ///
1300     ///     // We can read the headers before iterating.
1301     ///     {
1302     ///         // `headers` borrows from the reader, so we put this in its
1303     ///         // own scope. That way, the borrow ends before we try iterating
1304     ///         // below. Alternatively, we could clone the headers.
1305     ///         let headers = rdr.headers()?;
1306     ///         assert_eq!(headers, vec!["city", "country", "pop"]);
1307     ///     }
1308     ///
1309     ///     if let Some(result) = rdr.records().next() {
1310     ///         let record = result?;
1311     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1312     ///     } else {
1313     ///         return Err(From::from(
1314     ///             "expected at least one record but got none"))
1315     ///     }
1316     ///
1317     ///     // We can also read the headers after iterating.
1318     ///     let headers = rdr.headers()?;
1319     ///     assert_eq!(headers, vec!["city", "country", "pop"]);
1320     ///     Ok(())
1321     /// }
1322     /// ```
headers(&mut self) -> Result<&StringRecord>1323     pub fn headers(&mut self) -> Result<&StringRecord> {
1324         if self.state.headers.is_none() {
1325             let mut record = ByteRecord::new();
1326             self.read_byte_record_impl(&mut record)?;
1327             self.set_headers_impl(Err(record));
1328         }
1329         let headers = self.state.headers.as_ref().unwrap();
1330         match headers.string_record {
1331             Ok(ref record) => Ok(record),
1332             Err(ref err) => Err(Error::new(ErrorKind::Utf8 {
1333                 pos: headers.byte_record.position().map(Clone::clone),
1334                 err: err.clone(),
1335             })),
1336         }
1337     }
1338 
1339     /// Returns a reference to the first row read by this parser as raw bytes.
1340     ///
1341     /// If no row has been read yet, then this will force parsing of the first
1342     /// row.
1343     ///
1344     /// If there was a problem parsing the row then this returns an error.
1345     ///
1346     /// If the underlying reader emits EOF before any data, then this returns
1347     /// an empty record.
1348     ///
1349     /// Note that this method may be used regardless of whether `has_headers`
1350     /// was enabled (but it is enabled by default).
1351     ///
1352     /// # Example
1353     ///
1354     /// This example shows how to get the header row of CSV data. Notice that
1355     /// the header row does not appear as a record in the iterator!
1356     ///
1357     /// ```
1358     /// use std::error::Error;
1359     /// use csv::Reader;
1360     ///
1361     /// # fn main() { example().unwrap(); }
1362     /// fn example() -> Result<(), Box<dyn Error>> {
1363     ///     let data = "\
1364     /// city,country,pop
1365     /// Boston,United States,4628910
1366     /// ";
1367     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1368     ///
1369     ///     // We can read the headers before iterating.
1370     ///     {
1371     ///         // `headers` borrows from the reader, so we put this in its
1372     ///         // own scope. That way, the borrow ends before we try iterating
1373     ///         // below. Alternatively, we could clone the headers.
1374     ///         let headers = rdr.byte_headers()?;
1375     ///         assert_eq!(headers, vec!["city", "country", "pop"]);
1376     ///     }
1377     ///
1378     ///     if let Some(result) = rdr.byte_records().next() {
1379     ///         let record = result?;
1380     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1381     ///     } else {
1382     ///         return Err(From::from(
1383     ///             "expected at least one record but got none"))
1384     ///     }
1385     ///
1386     ///     // We can also read the headers after iterating.
1387     ///     let headers = rdr.byte_headers()?;
1388     ///     assert_eq!(headers, vec!["city", "country", "pop"]);
1389     ///     Ok(())
1390     /// }
1391     /// ```
byte_headers(&mut self) -> Result<&ByteRecord>1392     pub fn byte_headers(&mut self) -> Result<&ByteRecord> {
1393         if self.state.headers.is_none() {
1394             let mut record = ByteRecord::new();
1395             self.read_byte_record_impl(&mut record)?;
1396             self.set_headers_impl(Err(record));
1397         }
1398         Ok(&self.state.headers.as_ref().unwrap().byte_record)
1399     }
1400 
1401     /// Set the headers of this CSV parser manually.
1402     ///
1403     /// This overrides any other setting (including `set_byte_headers`). Any
1404     /// automatic detection of headers is disabled. This may be called at any
1405     /// time.
1406     ///
1407     /// # Example
1408     ///
1409     /// ```
1410     /// use std::error::Error;
1411     /// use csv::{Reader, StringRecord};
1412     ///
1413     /// # fn main() { example().unwrap(); }
1414     /// fn example() -> Result<(), Box<dyn Error>> {
1415     ///     let data = "\
1416     /// city,country,pop
1417     /// Boston,United States,4628910
1418     /// ";
1419     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1420     ///
1421     ///     assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]);
1422     ///     rdr.set_headers(StringRecord::from(vec!["a", "b", "c"]));
1423     ///     assert_eq!(rdr.headers()?, vec!["a", "b", "c"]);
1424     ///
1425     ///     Ok(())
1426     /// }
1427     /// ```
set_headers(&mut self, headers: StringRecord)1428     pub fn set_headers(&mut self, headers: StringRecord) {
1429         self.set_headers_impl(Ok(headers));
1430     }
1431 
1432     /// Set the headers of this CSV parser manually as raw bytes.
1433     ///
1434     /// This overrides any other setting (including `set_headers`). Any
1435     /// automatic detection of headers is disabled. This may be called at any
1436     /// time.
1437     ///
1438     /// # Example
1439     ///
1440     /// ```
1441     /// use std::error::Error;
1442     /// use csv::{Reader, ByteRecord};
1443     ///
1444     /// # fn main() { example().unwrap(); }
1445     /// fn example() -> Result<(), Box<dyn Error>> {
1446     ///     let data = "\
1447     /// city,country,pop
1448     /// Boston,United States,4628910
1449     /// ";
1450     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1451     ///
1452     ///     assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]);
1453     ///     rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"]));
1454     ///     assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]);
1455     ///
1456     ///     Ok(())
1457     /// }
1458     /// ```
set_byte_headers(&mut self, headers: ByteRecord)1459     pub fn set_byte_headers(&mut self, headers: ByteRecord) {
1460         self.set_headers_impl(Err(headers));
1461     }
1462 
set_headers_impl( &mut self, headers: result::Result<StringRecord, ByteRecord>, )1463     fn set_headers_impl(
1464         &mut self,
1465         headers: result::Result<StringRecord, ByteRecord>,
1466     ) {
1467         // If we have string headers, then get byte headers. But if we have
1468         // byte headers, then get the string headers (or a UTF-8 error).
1469         let (mut str_headers, mut byte_headers) = match headers {
1470             Ok(string) => {
1471                 let bytes = string.clone().into_byte_record();
1472                 (Ok(string), bytes)
1473             }
1474             Err(bytes) => {
1475                 match StringRecord::from_byte_record(bytes.clone()) {
1476                     Ok(str_headers) => (Ok(str_headers), bytes),
1477                     Err(err) => (Err(err.utf8_error().clone()), bytes),
1478                 }
1479             }
1480         };
1481         if self.state.trim.should_trim_headers() {
1482             if let Ok(ref mut str_headers) = str_headers.as_mut() {
1483                 str_headers.trim();
1484             }
1485             byte_headers.trim();
1486         }
1487         self.state.headers = Some(Headers {
1488             byte_record: byte_headers,
1489             string_record: str_headers,
1490         });
1491     }
1492 
1493     /// Read a single row into the given record. Returns false when no more
1494     /// records could be read.
1495     ///
1496     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1497     /// default), then this will never read the first record.
1498     ///
1499     /// This method is useful when you want to read records as fast as
1500     /// as possible. It's less ergonomic than an iterator, but it permits the
1501     /// caller to reuse the `StringRecord` allocation, which usually results
1502     /// in higher throughput.
1503     ///
1504     /// Records read via this method are guaranteed to have a position set
1505     /// on them, even if the reader is at EOF or if an error is returned.
1506     ///
1507     /// # Example
1508     ///
1509     /// ```
1510     /// use std::error::Error;
1511     /// use csv::{Reader, StringRecord};
1512     ///
1513     /// # fn main() { example().unwrap(); }
1514     /// fn example() -> Result<(), Box<dyn Error>> {
1515     ///     let data = "\
1516     /// city,country,pop
1517     /// Boston,United States,4628910
1518     /// ";
1519     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1520     ///     let mut record = StringRecord::new();
1521     ///
1522     ///     if rdr.read_record(&mut record)? {
1523     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1524     ///         Ok(())
1525     ///     } else {
1526     ///         Err(From::from("expected at least one record but got none"))
1527     ///     }
1528     /// }
1529     /// ```
read_record(&mut self, record: &mut StringRecord) -> Result<bool>1530     pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> {
1531         let result = record.read(self);
1532         // We need to trim again because trimming string records includes
1533         // Unicode whitespace. (ByteRecord trimming only includes ASCII
1534         // whitespace.)
1535         if self.state.trim.should_trim_fields() {
1536             record.trim();
1537         }
1538         result
1539     }
1540 
1541     /// Read a single row into the given byte record. Returns false when no
1542     /// more records could be read.
1543     ///
1544     /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
1545     /// default), then this will never read the first record.
1546     ///
1547     /// This method is useful when you want to read records as fast as
1548     /// as possible. It's less ergonomic than an iterator, but it permits the
1549     /// caller to reuse the `ByteRecord` allocation, which usually results
1550     /// in higher throughput.
1551     ///
1552     /// Records read via this method are guaranteed to have a position set
1553     /// on them, even if the reader is at EOF or if an error is returned.
1554     ///
1555     /// # Example
1556     ///
1557     /// ```
1558     /// use std::error::Error;
1559     /// use csv::{ByteRecord, Reader};
1560     ///
1561     /// # fn main() { example().unwrap(); }
1562     /// fn example() -> Result<(), Box<dyn Error>> {
1563     ///     let data = "\
1564     /// city,country,pop
1565     /// Boston,United States,4628910
1566     /// ";
1567     ///     let mut rdr = Reader::from_reader(data.as_bytes());
1568     ///     let mut record = ByteRecord::new();
1569     ///
1570     ///     if rdr.read_byte_record(&mut record)? {
1571     ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
1572     ///         Ok(())
1573     ///     } else {
1574     ///         Err(From::from("expected at least one record but got none"))
1575     ///     }
1576     /// }
1577     /// ```
read_byte_record( &mut self, record: &mut ByteRecord, ) -> Result<bool>1578     pub fn read_byte_record(
1579         &mut self,
1580         record: &mut ByteRecord,
1581     ) -> Result<bool> {
1582         if !self.state.seeked && !self.state.has_headers && !self.state.first {
1583             // If the caller indicated "no headers" and we haven't yielded the
1584             // first record yet, then we should yield our header row if we have
1585             // one.
1586             if let Some(ref headers) = self.state.headers {
1587                 self.state.first = true;
1588                 record.clone_from(&headers.byte_record);
1589                 if self.state.trim.should_trim_fields() {
1590                     record.trim();
1591                 }
1592                 return Ok(!record.is_empty());
1593             }
1594         }
1595         let ok = self.read_byte_record_impl(record)?;
1596         self.state.first = true;
1597         if !self.state.seeked && self.state.headers.is_none() {
1598             self.set_headers_impl(Err(record.clone()));
1599             // If the end user indicated that we have headers, then we should
1600             // never return the first row. Instead, we should attempt to
1601             // read and return the next one.
1602             if self.state.has_headers {
1603                 let result = self.read_byte_record_impl(record);
1604                 if self.state.trim.should_trim_fields() {
1605                     record.trim();
1606                 }
1607                 return result;
1608             }
1609         } else if self.state.trim.should_trim_fields() {
1610             record.trim();
1611         }
1612         Ok(ok)
1613     }
1614 
1615     /// Read a byte record from the underlying CSV reader, without accounting
1616     /// for headers.
1617     #[inline(always)]
read_byte_record_impl( &mut self, record: &mut ByteRecord, ) -> Result<bool>1618     fn read_byte_record_impl(
1619         &mut self,
1620         record: &mut ByteRecord,
1621     ) -> Result<bool> {
1622         use csv_core::ReadRecordResult::*;
1623 
1624         record.clear();
1625         record.set_position(Some(self.state.cur_pos.clone()));
1626         if self.state.eof != ReaderEofState::NotEof {
1627             return Ok(false);
1628         }
1629         let (mut outlen, mut endlen) = (0, 0);
1630         loop {
1631             let (res, nin, nout, nend) = {
1632                 let input_res = self.rdr.fill_buf();
1633                 if input_res.is_err() {
1634                     self.state.eof = ReaderEofState::IOError;
1635                 }
1636                 let input = input_res?;
1637                 let (fields, ends) = record.as_parts();
1638                 self.core.read_record(
1639                     input,
1640                     &mut fields[outlen..],
1641                     &mut ends[endlen..],
1642                 )
1643             };
1644             self.rdr.consume(nin);
1645             let byte = self.state.cur_pos.byte();
1646             self.state
1647                 .cur_pos
1648                 .set_byte(byte + nin as u64)
1649                 .set_line(self.core.line());
1650             outlen += nout;
1651             endlen += nend;
1652             match res {
1653                 InputEmpty => continue,
1654                 OutputFull => {
1655                     record.expand_fields();
1656                     continue;
1657                 }
1658                 OutputEndsFull => {
1659                     record.expand_ends();
1660                     continue;
1661                 }
1662                 Record => {
1663                     record.set_len(endlen);
1664                     self.state.add_record(record)?;
1665                     return Ok(true);
1666                 }
1667                 End => {
1668                     self.state.eof = ReaderEofState::Eof;
1669                     return Ok(false);
1670                 }
1671             }
1672         }
1673     }
1674 
1675     /// Return the current position of this CSV reader.
1676     ///
1677     /// The byte offset in the position returned can be used to `seek` this
1678     /// reader. In particular, seeking to a position returned here on the same
1679     /// data will result in parsing the same subsequent record.
1680     ///
1681     /// # Example: reading the position
1682     ///
1683     /// ```
1684     /// use std::{error::Error, io};
1685     /// use csv::{Reader, Position};
1686     ///
1687     /// # fn main() { example().unwrap(); }
1688     /// fn example() -> Result<(), Box<dyn Error>> {
1689     ///     let data = "\
1690     /// city,country,popcount
1691     /// Boston,United States,4628910
1692     /// Concord,United States,42695
1693     /// ";
1694     ///     let rdr = Reader::from_reader(io::Cursor::new(data));
1695     ///     let mut iter = rdr.into_records();
1696     ///     let mut pos = Position::new();
1697     ///     loop {
1698     ///         // Read the position immediately before each record.
1699     ///         let next_pos = iter.reader().position().clone();
1700     ///         if iter.next().is_none() {
1701     ///             break;
1702     ///         }
1703     ///         pos = next_pos;
1704     ///     }
1705     ///
1706     ///     // `pos` should now be the position immediately before the last
1707     ///     // record.
1708     ///     assert_eq!(pos.byte(), 51);
1709     ///     assert_eq!(pos.line(), 3);
1710     ///     assert_eq!(pos.record(), 2);
1711     ///     Ok(())
1712     /// }
1713     /// ```
position(&self) -> &Position1714     pub fn position(&self) -> &Position {
1715         &self.state.cur_pos
1716     }
1717 
1718     /// Returns true if and only if this reader has been exhausted.
1719     ///
1720     /// When this returns true, no more records can be read from this reader
1721     /// (unless it has been seeked to another position).
1722     ///
1723     /// # Example
1724     ///
1725     /// ```
1726     /// use std::{error::Error, io};
1727     /// use csv::{Reader, Position};
1728     ///
1729     /// # fn main() { example().unwrap(); }
1730     /// fn example() -> Result<(), Box<dyn Error>> {
1731     ///     let data = "\
1732     /// city,country,popcount
1733     /// Boston,United States,4628910
1734     /// Concord,United States,42695
1735     /// ";
1736     ///     let mut rdr = Reader::from_reader(io::Cursor::new(data));
1737     ///     assert!(!rdr.is_done());
1738     ///     for result in rdr.records() {
1739     ///         let _ = result?;
1740     ///     }
1741     ///     assert!(rdr.is_done());
1742     ///     Ok(())
1743     /// }
1744     /// ```
is_done(&self) -> bool1745     pub fn is_done(&self) -> bool {
1746         self.state.eof != ReaderEofState::NotEof
1747     }
1748 
1749     /// Returns true if and only if this reader has been configured to
1750     /// interpret the first record as a header record.
has_headers(&self) -> bool1751     pub fn has_headers(&self) -> bool {
1752         self.state.has_headers
1753     }
1754 
1755     /// Returns a reference to the underlying reader.
get_ref(&self) -> &R1756     pub fn get_ref(&self) -> &R {
1757         self.rdr.get_ref()
1758     }
1759 
1760     /// Returns a mutable reference to the underlying reader.
get_mut(&mut self) -> &mut R1761     pub fn get_mut(&mut self) -> &mut R {
1762         self.rdr.get_mut()
1763     }
1764 
1765     /// Unwraps this CSV reader, returning the underlying reader.
1766     ///
1767     /// Note that any leftover data inside this reader's internal buffer is
1768     /// lost.
into_inner(self) -> R1769     pub fn into_inner(self) -> R {
1770         self.rdr.into_inner()
1771     }
1772 }
1773 
1774 impl<R: io::Read + io::Seek> Reader<R> {
1775     /// Seeks the underlying reader to the position given.
1776     ///
1777     /// This comes with a few caveats:
1778     ///
1779     /// * Any internal buffer associated with this reader is cleared.
1780     /// * If the given position does not correspond to a position immediately
1781     ///   before the start of a record, then the behavior of this reader is
1782     ///   unspecified.
1783     /// * Any special logic that skips the first record in the CSV reader
1784     ///   when reading or iterating over records is disabled.
1785     ///
1786     /// If the given position has a byte offset equivalent to the current
1787     /// position, then no seeking is performed.
1788     ///
1789     /// If the header row has not already been read, then this will attempt
1790     /// to read the header row before seeking. Therefore, it is possible that
1791     /// this returns an error associated with reading CSV data.
1792     ///
1793     /// Note that seeking is performed based only on the byte offset in the
1794     /// given position. Namely, the record or line numbers in the position may
1795     /// be incorrect, but this will cause any future position generated by
1796     /// this CSV reader to be similarly incorrect.
1797     ///
1798     /// # Example: seek to parse a record twice
1799     ///
1800     /// ```
1801     /// use std::{error::Error, io};
1802     /// use csv::{Reader, Position};
1803     ///
1804     /// # fn main() { example().unwrap(); }
1805     /// fn example() -> Result<(), Box<dyn Error>> {
1806     ///     let data = "\
1807     /// city,country,popcount
1808     /// Boston,United States,4628910
1809     /// Concord,United States,42695
1810     /// ";
1811     ///     let rdr = Reader::from_reader(io::Cursor::new(data));
1812     ///     let mut iter = rdr.into_records();
1813     ///     let mut pos = Position::new();
1814     ///     loop {
1815     ///         // Read the position immediately before each record.
1816     ///         let next_pos = iter.reader().position().clone();
1817     ///         if iter.next().is_none() {
1818     ///             break;
1819     ///         }
1820     ///         pos = next_pos;
1821     ///     }
1822     ///
1823     ///     // Now seek the reader back to `pos`. This will let us read the
1824     ///     // last record again.
1825     ///     iter.reader_mut().seek(pos)?;
1826     ///     let mut iter = iter.into_reader().into_records();
1827     ///     if let Some(result) = iter.next() {
1828     ///         let record = result?;
1829     ///         assert_eq!(record, vec!["Concord", "United States", "42695"]);
1830     ///         Ok(())
1831     ///     } else {
1832     ///         Err(From::from("expected at least one record but got none"))
1833     ///     }
1834     /// }
1835     /// ```
seek(&mut self, pos: Position) -> Result<()>1836     pub fn seek(&mut self, pos: Position) -> Result<()> {
1837         self.byte_headers()?;
1838         self.state.seeked = true;
1839         if pos.byte() == self.state.cur_pos.byte() {
1840             return Ok(());
1841         }
1842         self.rdr.seek(io::SeekFrom::Start(pos.byte()))?;
1843         self.core.reset();
1844         self.core.set_line(pos.line());
1845         self.state.cur_pos = pos;
1846         self.state.eof = ReaderEofState::NotEof;
1847         Ok(())
1848     }
1849 
1850     /// This is like `seek`, but provides direct control over how the seeking
1851     /// operation is performed via `io::SeekFrom`.
1852     ///
1853     /// The `pos` position given *should* correspond the position indicated
1854     /// by `seek_from`, but there is no requirement. If the `pos` position
1855     /// given is incorrect, then the position information returned by this
1856     /// reader will be similarly incorrect.
1857     ///
1858     /// If the header row has not already been read, then this will attempt
1859     /// to read the header row before seeking. Therefore, it is possible that
1860     /// this returns an error associated with reading CSV data.
1861     ///
1862     /// Unlike `seek`, this will always cause an actual seek to be performed.
seek_raw( &mut self, seek_from: io::SeekFrom, pos: Position, ) -> Result<()>1863     pub fn seek_raw(
1864         &mut self,
1865         seek_from: io::SeekFrom,
1866         pos: Position,
1867     ) -> Result<()> {
1868         self.byte_headers()?;
1869         self.state.seeked = true;
1870         self.rdr.seek(seek_from)?;
1871         self.core.reset();
1872         self.core.set_line(pos.line());
1873         self.state.cur_pos = pos;
1874         self.state.eof = ReaderEofState::NotEof;
1875         Ok(())
1876     }
1877 }
1878 
1879 impl ReaderState {
1880     #[inline(always)]
add_record(&mut self, record: &ByteRecord) -> Result<()>1881     fn add_record(&mut self, record: &ByteRecord) -> Result<()> {
1882         let i = self.cur_pos.record();
1883         self.cur_pos.set_record(i.checked_add(1).unwrap());
1884         if !self.flexible {
1885             match self.first_field_count {
1886                 None => self.first_field_count = Some(record.len() as u64),
1887                 Some(expected) => {
1888                     if record.len() as u64 != expected {
1889                         return Err(Error::new(ErrorKind::UnequalLengths {
1890                             pos: record.position().map(Clone::clone),
1891                             expected_len: expected,
1892                             len: record.len() as u64,
1893                         }));
1894                     }
1895                 }
1896             }
1897         }
1898         Ok(())
1899     }
1900 }
1901 
1902 /// An owned iterator over deserialized records.
1903 ///
1904 /// The type parameter `R` refers to the underlying `io::Read` type, and `D`
1905 /// refers to the type that this iterator will deserialize a record into.
1906 pub struct DeserializeRecordsIntoIter<R, D> {
1907     rdr: Reader<R>,
1908     rec: StringRecord,
1909     headers: Option<StringRecord>,
1910     _priv: PhantomData<D>,
1911 }
1912 
1913 impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> {
new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D>1914     fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> {
1915         let headers = if !rdr.state.has_headers {
1916             None
1917         } else {
1918             rdr.headers().ok().map(Clone::clone)
1919         };
1920         DeserializeRecordsIntoIter {
1921             rdr,
1922             rec: StringRecord::new(),
1923             headers,
1924             _priv: PhantomData,
1925         }
1926     }
1927 
1928     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>1929     pub fn reader(&self) -> &Reader<R> {
1930         &self.rdr
1931     }
1932 
1933     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>1934     pub fn reader_mut(&mut self) -> &mut Reader<R> {
1935         &mut self.rdr
1936     }
1937 
1938     /// Drop this iterator and return the underlying CSV reader.
into_reader(self) -> Reader<R>1939     pub fn into_reader(self) -> Reader<R> {
1940         self.rdr
1941     }
1942 }
1943 
1944 impl<R: io::Read, D: DeserializeOwned> Iterator
1945     for DeserializeRecordsIntoIter<R, D>
1946 {
1947     type Item = Result<D>;
1948 
next(&mut self) -> Option<Result<D>>1949     fn next(&mut self) -> Option<Result<D>> {
1950         match self.rdr.read_record(&mut self.rec) {
1951             Err(err) => Some(Err(err)),
1952             Ok(false) => None,
1953             Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
1954         }
1955     }
1956 }
1957 
1958 /// A borrowed iterator over deserialized records.
1959 ///
1960 /// The lifetime parameter `'r` refers to the lifetime of the underlying
1961 /// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read`
1962 /// type, and `D` refers to the type that this iterator will deserialize a
1963 /// record into.
1964 pub struct DeserializeRecordsIter<'r, R: 'r, D> {
1965     rdr: &'r mut Reader<R>,
1966     rec: StringRecord,
1967     headers: Option<StringRecord>,
1968     _priv: PhantomData<D>,
1969 }
1970 
1971 impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> {
new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D>1972     fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> {
1973         let headers = if !rdr.state.has_headers {
1974             None
1975         } else {
1976             rdr.headers().ok().map(Clone::clone)
1977         };
1978         DeserializeRecordsIter {
1979             rdr,
1980             rec: StringRecord::new(),
1981             headers,
1982             _priv: PhantomData,
1983         }
1984     }
1985 
1986     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>1987     pub fn reader(&self) -> &Reader<R> {
1988         &self.rdr
1989     }
1990 
1991     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>1992     pub fn reader_mut(&mut self) -> &mut Reader<R> {
1993         &mut self.rdr
1994     }
1995 }
1996 
1997 impl<'r, R: io::Read, D: DeserializeOwned> Iterator
1998     for DeserializeRecordsIter<'r, R, D>
1999 {
2000     type Item = Result<D>;
2001 
next(&mut self) -> Option<Result<D>>2002     fn next(&mut self) -> Option<Result<D>> {
2003         match self.rdr.read_record(&mut self.rec) {
2004             Err(err) => Some(Err(err)),
2005             Ok(false) => None,
2006             Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
2007         }
2008     }
2009 }
2010 
2011 /// An owned iterator over records as strings.
2012 pub struct StringRecordsIntoIter<R> {
2013     rdr: Reader<R>,
2014     rec: StringRecord,
2015 }
2016 
2017 impl<R: io::Read> StringRecordsIntoIter<R> {
new(rdr: Reader<R>) -> StringRecordsIntoIter<R>2018     fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> {
2019         StringRecordsIntoIter { rdr, rec: StringRecord::new() }
2020     }
2021 
2022     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2023     pub fn reader(&self) -> &Reader<R> {
2024         &self.rdr
2025     }
2026 
2027     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2028     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2029         &mut self.rdr
2030     }
2031 
2032     /// Drop this iterator and return the underlying CSV reader.
into_reader(self) -> Reader<R>2033     pub fn into_reader(self) -> Reader<R> {
2034         self.rdr
2035     }
2036 }
2037 
2038 impl<R: io::Read> Iterator for StringRecordsIntoIter<R> {
2039     type Item = Result<StringRecord>;
2040 
next(&mut self) -> Option<Result<StringRecord>>2041     fn next(&mut self) -> Option<Result<StringRecord>> {
2042         match self.rdr.read_record(&mut self.rec) {
2043             Err(err) => Some(Err(err)),
2044             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2045             Ok(false) => None,
2046         }
2047     }
2048 }
2049 
2050 /// A borrowed iterator over records as strings.
2051 ///
2052 /// The lifetime parameter `'r` refers to the lifetime of the underlying
2053 /// CSV `Reader`.
2054 pub struct StringRecordsIter<'r, R: 'r> {
2055     rdr: &'r mut Reader<R>,
2056     rec: StringRecord,
2057 }
2058 
2059 impl<'r, R: io::Read> StringRecordsIter<'r, R> {
new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R>2060     fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> {
2061         StringRecordsIter { rdr, rec: StringRecord::new() }
2062     }
2063 
2064     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2065     pub fn reader(&self) -> &Reader<R> {
2066         &self.rdr
2067     }
2068 
2069     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2070     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2071         &mut self.rdr
2072     }
2073 }
2074 
2075 impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> {
2076     type Item = Result<StringRecord>;
2077 
next(&mut self) -> Option<Result<StringRecord>>2078     fn next(&mut self) -> Option<Result<StringRecord>> {
2079         match self.rdr.read_record(&mut self.rec) {
2080             Err(err) => Some(Err(err)),
2081             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2082             Ok(false) => None,
2083         }
2084     }
2085 }
2086 
2087 /// An owned iterator over records as raw bytes.
2088 pub struct ByteRecordsIntoIter<R> {
2089     rdr: Reader<R>,
2090     rec: ByteRecord,
2091 }
2092 
2093 impl<R: io::Read> ByteRecordsIntoIter<R> {
new(rdr: Reader<R>) -> ByteRecordsIntoIter<R>2094     fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> {
2095         ByteRecordsIntoIter { rdr, rec: ByteRecord::new() }
2096     }
2097 
2098     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2099     pub fn reader(&self) -> &Reader<R> {
2100         &self.rdr
2101     }
2102 
2103     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2104     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2105         &mut self.rdr
2106     }
2107 
2108     /// Drop this iterator and return the underlying CSV reader.
into_reader(self) -> Reader<R>2109     pub fn into_reader(self) -> Reader<R> {
2110         self.rdr
2111     }
2112 }
2113 
2114 impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> {
2115     type Item = Result<ByteRecord>;
2116 
next(&mut self) -> Option<Result<ByteRecord>>2117     fn next(&mut self) -> Option<Result<ByteRecord>> {
2118         match self.rdr.read_byte_record(&mut self.rec) {
2119             Err(err) => Some(Err(err)),
2120             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2121             Ok(false) => None,
2122         }
2123     }
2124 }
2125 
2126 /// A borrowed iterator over records as raw bytes.
2127 ///
2128 /// The lifetime parameter `'r` refers to the lifetime of the underlying
2129 /// CSV `Reader`.
2130 pub struct ByteRecordsIter<'r, R: 'r> {
2131     rdr: &'r mut Reader<R>,
2132     rec: ByteRecord,
2133 }
2134 
2135 impl<'r, R: io::Read> ByteRecordsIter<'r, R> {
new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R>2136     fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> {
2137         ByteRecordsIter { rdr, rec: ByteRecord::new() }
2138     }
2139 
2140     /// Return a reference to the underlying CSV reader.
reader(&self) -> &Reader<R>2141     pub fn reader(&self) -> &Reader<R> {
2142         &self.rdr
2143     }
2144 
2145     /// Return a mutable reference to the underlying CSV reader.
reader_mut(&mut self) -> &mut Reader<R>2146     pub fn reader_mut(&mut self) -> &mut Reader<R> {
2147         &mut self.rdr
2148     }
2149 }
2150 
2151 impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> {
2152     type Item = Result<ByteRecord>;
2153 
next(&mut self) -> Option<Result<ByteRecord>>2154     fn next(&mut self) -> Option<Result<ByteRecord>> {
2155         match self.rdr.read_byte_record(&mut self.rec) {
2156             Err(err) => Some(Err(err)),
2157             Ok(true) => Some(Ok(self.rec.clone_truncated())),
2158             Ok(false) => None,
2159         }
2160     }
2161 }
2162 
2163 #[cfg(test)]
2164 mod tests {
2165     use std::io;
2166 
2167     use crate::{
2168         byte_record::ByteRecord, error::ErrorKind, string_record::StringRecord,
2169     };
2170 
2171     use super::{Position, ReaderBuilder, Trim};
2172 
b(s: &str) -> &[u8]2173     fn b(s: &str) -> &[u8] {
2174         s.as_bytes()
2175     }
s(b: &[u8]) -> &str2176     fn s(b: &[u8]) -> &str {
2177         ::std::str::from_utf8(b).unwrap()
2178     }
2179 
newpos(byte: u64, line: u64, record: u64) -> Position2180     fn newpos(byte: u64, line: u64, record: u64) -> Position {
2181         let mut p = Position::new();
2182         p.set_byte(byte).set_line(line).set_record(record);
2183         p
2184     }
2185 
2186     #[test]
read_byte_record()2187     fn read_byte_record() {
2188         let data = b("foo,\"b,ar\",baz\nabc,mno,xyz");
2189         let mut rdr =
2190             ReaderBuilder::new().has_headers(false).from_reader(data);
2191         let mut rec = ByteRecord::new();
2192 
2193         assert!(rdr.read_byte_record(&mut rec).unwrap());
2194         assert_eq!(3, rec.len());
2195         assert_eq!("foo", s(&rec[0]));
2196         assert_eq!("b,ar", s(&rec[1]));
2197         assert_eq!("baz", s(&rec[2]));
2198 
2199         assert!(rdr.read_byte_record(&mut rec).unwrap());
2200         assert_eq!(3, rec.len());
2201         assert_eq!("abc", s(&rec[0]));
2202         assert_eq!("mno", s(&rec[1]));
2203         assert_eq!("xyz", s(&rec[2]));
2204 
2205         assert!(!rdr.read_byte_record(&mut rec).unwrap());
2206     }
2207 
2208     #[test]
read_trimmed_records_and_headers()2209     fn read_trimmed_records_and_headers() {
2210         let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2211         let mut rdr = ReaderBuilder::new()
2212             .has_headers(true)
2213             .trim(Trim::All)
2214             .from_reader(data);
2215         let mut rec = ByteRecord::new();
2216         assert!(rdr.read_byte_record(&mut rec).unwrap());
2217         assert_eq!("1", s(&rec[0]));
2218         assert_eq!("2", s(&rec[1]));
2219         assert_eq!("3", s(&rec[2]));
2220         let mut rec = StringRecord::new();
2221         assert!(rdr.read_record(&mut rec).unwrap());
2222         assert_eq!("1", &rec[0]);
2223         assert_eq!("", &rec[1]);
2224         assert_eq!("3", &rec[2]);
2225         {
2226             let headers = rdr.headers().unwrap();
2227             assert_eq!(3, headers.len());
2228             assert_eq!("foo", &headers[0]);
2229             assert_eq!("bar", &headers[1]);
2230             assert_eq!("baz", &headers[2]);
2231         }
2232     }
2233 
2234     #[test]
read_trimmed_header()2235     fn read_trimmed_header() {
2236         let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2237         let mut rdr = ReaderBuilder::new()
2238             .has_headers(true)
2239             .trim(Trim::Headers)
2240             .from_reader(data);
2241         let mut rec = ByteRecord::new();
2242         assert!(rdr.read_byte_record(&mut rec).unwrap());
2243         assert_eq!("  1", s(&rec[0]));
2244         assert_eq!("  2", s(&rec[1]));
2245         assert_eq!("  3", s(&rec[2]));
2246         {
2247             let headers = rdr.headers().unwrap();
2248             assert_eq!(3, headers.len());
2249             assert_eq!("foo", &headers[0]);
2250             assert_eq!("bar", &headers[1]);
2251             assert_eq!("baz", &headers[2]);
2252         }
2253     }
2254 
2255     #[test]
read_trimed_header_invalid_utf8()2256     fn read_trimed_header_invalid_utf8() {
2257         let data = &b"foo,  b\xFFar,\tbaz\na,b,c\nd,e,f"[..];
2258         let mut rdr = ReaderBuilder::new()
2259             .has_headers(true)
2260             .trim(Trim::Headers)
2261             .from_reader(data);
2262         let mut rec = StringRecord::new();
2263 
2264         // force the headers to be read
2265         let _ = rdr.read_record(&mut rec);
2266         // Check the byte headers are trimmed
2267         {
2268             let headers = rdr.byte_headers().unwrap();
2269             assert_eq!(3, headers.len());
2270             assert_eq!(b"foo", &headers[0]);
2271             assert_eq!(b"b\xFFar", &headers[1]);
2272             assert_eq!(b"baz", &headers[2]);
2273         }
2274         match *rdr.headers().unwrap_err().kind() {
2275             ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
2276                 assert_eq!(pos, &newpos(0, 1, 0));
2277                 assert_eq!(err.field(), 1);
2278                 assert_eq!(err.valid_up_to(), 3);
2279             }
2280             ref err => panic!("match failed, got {:?}", err),
2281         }
2282     }
2283 
2284     #[test]
read_trimmed_records()2285     fn read_trimmed_records() {
2286         let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
2287         let mut rdr = ReaderBuilder::new()
2288             .has_headers(true)
2289             .trim(Trim::Fields)
2290             .from_reader(data);
2291         let mut rec = ByteRecord::new();
2292         assert!(rdr.read_byte_record(&mut rec).unwrap());
2293         assert_eq!("1", s(&rec[0]));
2294         assert_eq!("2", s(&rec[1]));
2295         assert_eq!("3", s(&rec[2]));
2296         {
2297             let headers = rdr.headers().unwrap();
2298             assert_eq!(3, headers.len());
2299             assert_eq!("foo", &headers[0]);
2300             assert_eq!("  bar", &headers[1]);
2301             assert_eq!("\tbaz", &headers[2]);
2302         }
2303     }
2304 
2305     #[test]
read_record_unequal_fails()2306     fn read_record_unequal_fails() {
2307         let data = b("foo\nbar,baz");
2308         let mut rdr =
2309             ReaderBuilder::new().has_headers(false).from_reader(data);
2310         let mut rec = ByteRecord::new();
2311 
2312         assert!(rdr.read_byte_record(&mut rec).unwrap());
2313         assert_eq!(1, rec.len());
2314         assert_eq!("foo", s(&rec[0]));
2315 
2316         match rdr.read_byte_record(&mut rec) {
2317             Err(err) => match *err.kind() {
2318                 ErrorKind::UnequalLengths {
2319                     expected_len: 1,
2320                     ref pos,
2321                     len: 2,
2322                 } => {
2323                     assert_eq!(pos, &Some(newpos(4, 2, 1)));
2324                 }
2325                 ref wrong => panic!("match failed, got {:?}", wrong),
2326             },
2327             wrong => panic!("match failed, got {:?}", wrong),
2328         }
2329     }
2330 
2331     #[test]
read_record_unequal_ok()2332     fn read_record_unequal_ok() {
2333         let data = b("foo\nbar,baz");
2334         let mut rdr = ReaderBuilder::new()
2335             .has_headers(false)
2336             .flexible(true)
2337             .from_reader(data);
2338         let mut rec = ByteRecord::new();
2339 
2340         assert!(rdr.read_byte_record(&mut rec).unwrap());
2341         assert_eq!(1, rec.len());
2342         assert_eq!("foo", s(&rec[0]));
2343 
2344         assert!(rdr.read_byte_record(&mut rec).unwrap());
2345         assert_eq!(2, rec.len());
2346         assert_eq!("bar", s(&rec[0]));
2347         assert_eq!("baz", s(&rec[1]));
2348 
2349         assert!(!rdr.read_byte_record(&mut rec).unwrap());
2350     }
2351 
2352     // This tests that even if we get a CSV error, we can continue reading
2353     // if we want.
2354     #[test]
read_record_unequal_continue()2355     fn read_record_unequal_continue() {
2356         let data = b("foo\nbar,baz\nquux");
2357         let mut rdr =
2358             ReaderBuilder::new().has_headers(false).from_reader(data);
2359         let mut rec = ByteRecord::new();
2360 
2361         assert!(rdr.read_byte_record(&mut rec).unwrap());
2362         assert_eq!(1, rec.len());
2363         assert_eq!("foo", s(&rec[0]));
2364 
2365         match rdr.read_byte_record(&mut rec) {
2366             Err(err) => match err.kind() {
2367                 &ErrorKind::UnequalLengths {
2368                     expected_len: 1,
2369                     ref pos,
2370                     len: 2,
2371                 } => {
2372                     assert_eq!(pos, &Some(newpos(4, 2, 1)));
2373                 }
2374                 wrong => panic!("match failed, got {:?}", wrong),
2375             },
2376             wrong => panic!("match failed, got {:?}", wrong),
2377         }
2378 
2379         assert!(rdr.read_byte_record(&mut rec).unwrap());
2380         assert_eq!(1, rec.len());
2381         assert_eq!("quux", s(&rec[0]));
2382 
2383         assert!(!rdr.read_byte_record(&mut rec).unwrap());
2384     }
2385 
2386     #[test]
read_record_headers()2387     fn read_record_headers() {
2388         let data = b("foo,bar,baz\na,b,c\nd,e,f");
2389         let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
2390         let mut rec = StringRecord::new();
2391 
2392         assert!(rdr.read_record(&mut rec).unwrap());
2393         assert_eq!(3, rec.len());
2394         assert_eq!("a", &rec[0]);
2395 
2396         assert!(rdr.read_record(&mut rec).unwrap());
2397         assert_eq!(3, rec.len());
2398         assert_eq!("d", &rec[0]);
2399 
2400         assert!(!rdr.read_record(&mut rec).unwrap());
2401 
2402         {
2403             let headers = rdr.byte_headers().unwrap();
2404             assert_eq!(3, headers.len());
2405             assert_eq!(b"foo", &headers[0]);
2406             assert_eq!(b"bar", &headers[1]);
2407             assert_eq!(b"baz", &headers[2]);
2408         }
2409         {
2410             let headers = rdr.headers().unwrap();
2411             assert_eq!(3, headers.len());
2412             assert_eq!("foo", &headers[0]);
2413             assert_eq!("bar", &headers[1]);
2414             assert_eq!("baz", &headers[2]);
2415         }
2416     }
2417 
2418     #[test]
read_record_headers_invalid_utf8()2419     fn read_record_headers_invalid_utf8() {
2420         let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..];
2421         let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
2422         let mut rec = StringRecord::new();
2423 
2424         assert!(rdr.read_record(&mut rec).unwrap());
2425         assert_eq!(3, rec.len());
2426         assert_eq!("a", &rec[0]);
2427 
2428         assert!(rdr.read_record(&mut rec).unwrap());
2429         assert_eq!(3, rec.len());
2430         assert_eq!("d", &rec[0]);
2431 
2432         assert!(!rdr.read_record(&mut rec).unwrap());
2433 
2434         // Check that we can read the headers as raw bytes, but that
2435         // if we read them as strings, we get an appropriate UTF-8 error.
2436         {
2437             let headers = rdr.byte_headers().unwrap();
2438             assert_eq!(3, headers.len());
2439             assert_eq!(b"foo", &headers[0]);
2440             assert_eq!(b"b\xFFar", &headers[1]);
2441             assert_eq!(b"baz", &headers[2]);
2442         }
2443         match *rdr.headers().unwrap_err().kind() {
2444             ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
2445                 assert_eq!(pos, &newpos(0, 1, 0));
2446                 assert_eq!(err.field(), 1);
2447                 assert_eq!(err.valid_up_to(), 1);
2448             }
2449             ref err => panic!("match failed, got {:?}", err),
2450         }
2451     }
2452 
2453     #[test]
read_record_no_headers_before()2454     fn read_record_no_headers_before() {
2455         let data = b("foo,bar,baz\na,b,c\nd,e,f");
2456         let mut rdr =
2457             ReaderBuilder::new().has_headers(false).from_reader(data);
2458         let mut rec = StringRecord::new();
2459 
2460         {
2461             let headers = rdr.headers().unwrap();
2462             assert_eq!(3, headers.len());
2463             assert_eq!("foo", &headers[0]);
2464             assert_eq!("bar", &headers[1]);
2465             assert_eq!("baz", &headers[2]);
2466         }
2467 
2468         assert!(rdr.read_record(&mut rec).unwrap());
2469         assert_eq!(3, rec.len());
2470         assert_eq!("foo", &rec[0]);
2471 
2472         assert!(rdr.read_record(&mut rec).unwrap());
2473         assert_eq!(3, rec.len());
2474         assert_eq!("a", &rec[0]);
2475 
2476         assert!(rdr.read_record(&mut rec).unwrap());
2477         assert_eq!(3, rec.len());
2478         assert_eq!("d", &rec[0]);
2479 
2480         assert!(!rdr.read_record(&mut rec).unwrap());
2481     }
2482 
2483     #[test]
read_record_no_headers_after()2484     fn read_record_no_headers_after() {
2485         let data = b("foo,bar,baz\na,b,c\nd,e,f");
2486         let mut rdr =
2487             ReaderBuilder::new().has_headers(false).from_reader(data);
2488         let mut rec = StringRecord::new();
2489 
2490         assert!(rdr.read_record(&mut rec).unwrap());
2491         assert_eq!(3, rec.len());
2492         assert_eq!("foo", &rec[0]);
2493 
2494         assert!(rdr.read_record(&mut rec).unwrap());
2495         assert_eq!(3, rec.len());
2496         assert_eq!("a", &rec[0]);
2497 
2498         assert!(rdr.read_record(&mut rec).unwrap());
2499         assert_eq!(3, rec.len());
2500         assert_eq!("d", &rec[0]);
2501 
2502         assert!(!rdr.read_record(&mut rec).unwrap());
2503 
2504         let headers = rdr.headers().unwrap();
2505         assert_eq!(3, headers.len());
2506         assert_eq!("foo", &headers[0]);
2507         assert_eq!("bar", &headers[1]);
2508         assert_eq!("baz", &headers[2]);
2509     }
2510 
2511     #[test]
seek()2512     fn seek() {
2513         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2514         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2515         rdr.seek(newpos(18, 3, 2)).unwrap();
2516 
2517         let mut rec = StringRecord::new();
2518 
2519         assert_eq!(18, rdr.position().byte());
2520         assert!(rdr.read_record(&mut rec).unwrap());
2521         assert_eq!(3, rec.len());
2522         assert_eq!("d", &rec[0]);
2523 
2524         assert_eq!(24, rdr.position().byte());
2525         assert_eq!(4, rdr.position().line());
2526         assert_eq!(3, rdr.position().record());
2527         assert!(rdr.read_record(&mut rec).unwrap());
2528         assert_eq!(3, rec.len());
2529         assert_eq!("g", &rec[0]);
2530 
2531         assert!(!rdr.read_record(&mut rec).unwrap());
2532     }
2533 
2534     // Test that we can read headers after seeking even if the headers weren't
2535     // explicit read before seeking.
2536     #[test]
seek_headers_after()2537     fn seek_headers_after() {
2538         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2539         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2540         rdr.seek(newpos(18, 3, 2)).unwrap();
2541         assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]);
2542     }
2543 
2544     // Test that we can read headers after seeking if the headers were read
2545     // before seeking.
2546     #[test]
seek_headers_before_after()2547     fn seek_headers_before_after() {
2548         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2549         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2550         let headers = rdr.headers().unwrap().clone();
2551         rdr.seek(newpos(18, 3, 2)).unwrap();
2552         assert_eq!(&headers, rdr.headers().unwrap());
2553     }
2554 
2555     // Test that even if we didn't read headers before seeking, if we seek to
2556     // the current byte offset, then no seeking is done and therefore we can
2557     // still read headers after seeking.
2558     #[test]
seek_headers_no_actual_seek()2559     fn seek_headers_no_actual_seek() {
2560         let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
2561         let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
2562         rdr.seek(Position::new()).unwrap();
2563         assert_eq!("foo", &rdr.headers().unwrap()[0]);
2564     }
2565 
2566     // Test that position info is reported correctly in absence of headers.
2567     #[test]
positions_no_headers()2568     fn positions_no_headers() {
2569         let mut rdr = ReaderBuilder::new()
2570             .has_headers(false)
2571             .from_reader("a,b,c\nx,y,z".as_bytes())
2572             .into_records();
2573 
2574         let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2575         assert_eq!(pos.byte(), 0);
2576         assert_eq!(pos.line(), 1);
2577         assert_eq!(pos.record(), 0);
2578 
2579         let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2580         assert_eq!(pos.byte(), 6);
2581         assert_eq!(pos.line(), 2);
2582         assert_eq!(pos.record(), 1);
2583     }
2584 
2585     // Test that position info is reported correctly with headers.
2586     #[test]
positions_headers()2587     fn positions_headers() {
2588         let mut rdr = ReaderBuilder::new()
2589             .has_headers(true)
2590             .from_reader("a,b,c\nx,y,z".as_bytes())
2591             .into_records();
2592 
2593         let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
2594         assert_eq!(pos.byte(), 6);
2595         assert_eq!(pos.line(), 2);
2596         assert_eq!(pos.record(), 1);
2597     }
2598 
2599     // Test that reading headers on empty data yields an empty record.
2600     #[test]
headers_on_empty_data()2601     fn headers_on_empty_data() {
2602         let mut rdr = ReaderBuilder::new().from_reader("".as_bytes());
2603         let r = rdr.byte_headers().unwrap();
2604         assert_eq!(r.len(), 0);
2605     }
2606 
2607     // Test that reading the first record on empty data works.
2608     #[test]
no_headers_on_empty_data()2609     fn no_headers_on_empty_data() {
2610         let mut rdr =
2611             ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
2612         assert_eq!(rdr.records().count(), 0);
2613     }
2614 
2615     // Test that reading the first record on empty data works, even if
2616     // we've tried to read headers before hand.
2617     #[test]
no_headers_on_empty_data_after_headers()2618     fn no_headers_on_empty_data_after_headers() {
2619         let mut rdr =
2620             ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
2621         assert_eq!(rdr.headers().unwrap().len(), 0);
2622         assert_eq!(rdr.records().count(), 0);
2623     }
2624 }
2625