1 use std::{ 2 fs::File, 3 io::{self, BufRead, Seek}, 4 marker::PhantomData, 5 path::Path, 6 result, 7 }; 8 9 use { 10 csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder}, 11 serde::de::DeserializeOwned, 12 }; 13 14 use crate::{ 15 byte_record::{ByteRecord, Position}, 16 error::{Error, ErrorKind, Result, Utf8Error}, 17 string_record::StringRecord, 18 {Terminator, Trim}, 19 }; 20 21 /// Builds a CSV reader with various configuration knobs. 22 /// 23 /// This builder can be used to tweak the field delimiter, record terminator 24 /// and more. Once a CSV `Reader` is built, its configuration cannot be 25 /// changed. 26 #[derive(Debug)] 27 pub struct ReaderBuilder { 28 capacity: usize, 29 flexible: bool, 30 has_headers: bool, 31 trim: Trim, 32 /// The underlying CSV parser builder. 33 /// 34 /// We explicitly put this on the heap because CoreReaderBuilder embeds an 35 /// entire DFA transition table, which along with other things, tallies up 36 /// to almost 500 bytes on the stack. 37 builder: Box<CoreReaderBuilder>, 38 } 39 40 impl Default for ReaderBuilder { default() -> ReaderBuilder41 fn default() -> ReaderBuilder { 42 ReaderBuilder { 43 capacity: 8 * (1 << 10), 44 flexible: false, 45 has_headers: true, 46 trim: Trim::default(), 47 builder: Box::new(CoreReaderBuilder::default()), 48 } 49 } 50 } 51 52 impl ReaderBuilder { 53 /// Create a new builder for configuring CSV parsing. 54 /// 55 /// To convert a builder into a reader, call one of the methods starting 56 /// with `from_`. 57 /// 58 /// # Example 59 /// 60 /// ``` 61 /// use std::error::Error; 62 /// use csv::{ReaderBuilder, StringRecord}; 63 /// 64 /// # fn main() { example().unwrap(); } 65 /// fn example() -> Result<(), Box<dyn Error>> { 66 /// let data = "\ 67 /// city,country,pop 68 /// Boston,United States,4628910 69 /// Concord,United States,42695 70 /// "; 71 /// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes()); 72 /// 73 /// let records = rdr 74 /// .records() 75 /// .collect::<Result<Vec<StringRecord>, csv::Error>>()?; 76 /// assert_eq!(records, vec![ 77 /// vec!["Boston", "United States", "4628910"], 78 /// vec!["Concord", "United States", "42695"], 79 /// ]); 80 /// Ok(()) 81 /// } 82 /// ``` new() -> ReaderBuilder83 pub fn new() -> ReaderBuilder { 84 ReaderBuilder::default() 85 } 86 87 /// Build a CSV parser from this configuration that reads data from the 88 /// given file path. 89 /// 90 /// If there was a problem opening the file at the given path, then this 91 /// returns the corresponding error. 92 /// 93 /// # Example 94 /// 95 /// ```no_run 96 /// use std::error::Error; 97 /// use csv::ReaderBuilder; 98 /// 99 /// # fn main() { example().unwrap(); } 100 /// fn example() -> Result<(), Box<dyn Error>> { 101 /// let mut rdr = ReaderBuilder::new().from_path("foo.csv")?; 102 /// for result in rdr.records() { 103 /// let record = result?; 104 /// println!("{:?}", record); 105 /// } 106 /// Ok(()) 107 /// } 108 /// ``` from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>>109 pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> { 110 Ok(Reader::new(self, File::open(path)?)) 111 } 112 113 /// Build a CSV parser from this configuration that reads data from `rdr`. 114 /// 115 /// Note that the CSV reader is buffered automatically, so you should not 116 /// wrap `rdr` in a buffered reader like `io::BufReader`. 117 /// 118 /// # Example 119 /// 120 /// ``` 121 /// use std::error::Error; 122 /// use csv::ReaderBuilder; 123 /// 124 /// # fn main() { example().unwrap(); } 125 /// fn example() -> Result<(), Box<dyn Error>> { 126 /// let data = "\ 127 /// city,country,pop 128 /// Boston,United States,4628910 129 /// Concord,United States,42695 130 /// "; 131 /// let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes()); 132 /// for result in rdr.records() { 133 /// let record = result?; 134 /// println!("{:?}", record); 135 /// } 136 /// Ok(()) 137 /// } 138 /// ``` from_reader<R: io::Read>(&self, rdr: R) -> Reader<R>139 pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> { 140 Reader::new(self, rdr) 141 } 142 143 /// The field delimiter to use when parsing CSV. 144 /// 145 /// The default is `b','`. 146 /// 147 /// # Example 148 /// 149 /// ``` 150 /// use std::error::Error; 151 /// use csv::ReaderBuilder; 152 /// 153 /// # fn main() { example().unwrap(); } 154 /// fn example() -> Result<(), Box<dyn Error>> { 155 /// let data = "\ 156 /// city;country;pop 157 /// Boston;United States;4628910 158 /// "; 159 /// let mut rdr = ReaderBuilder::new() 160 /// .delimiter(b';') 161 /// .from_reader(data.as_bytes()); 162 /// 163 /// if let Some(result) = rdr.records().next() { 164 /// let record = result?; 165 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 166 /// Ok(()) 167 /// } else { 168 /// Err(From::from("expected at least one record but got none")) 169 /// } 170 /// } 171 /// ``` delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder172 pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder { 173 self.builder.delimiter(delimiter); 174 self 175 } 176 177 /// Whether to treat the first row as a special header row. 178 /// 179 /// By default, the first row is treated as a special header row, which 180 /// means the header is never returned by any of the record reading methods 181 /// or iterators. When this is disabled (`yes` set to `false`), the first 182 /// row is not treated specially. 183 /// 184 /// Note that the `headers` and `byte_headers` methods are unaffected by 185 /// whether this is set. Those methods always return the first record. 186 /// 187 /// # Example 188 /// 189 /// This example shows what happens when `has_headers` is disabled. 190 /// Namely, the first row is treated just like any other row. 191 /// 192 /// ``` 193 /// use std::error::Error; 194 /// use csv::ReaderBuilder; 195 /// 196 /// # fn main() { example().unwrap(); } 197 /// fn example() -> Result<(), Box<dyn Error>> { 198 /// let data = "\ 199 /// city,country,pop 200 /// Boston,United States,4628910 201 /// "; 202 /// let mut rdr = ReaderBuilder::new() 203 /// .has_headers(false) 204 /// .from_reader(data.as_bytes()); 205 /// let mut iter = rdr.records(); 206 /// 207 /// // Read the first record. 208 /// if let Some(result) = iter.next() { 209 /// let record = result?; 210 /// assert_eq!(record, vec!["city", "country", "pop"]); 211 /// } else { 212 /// return Err(From::from( 213 /// "expected at least two records but got none")); 214 /// } 215 /// 216 /// // Read the second record. 217 /// if let Some(result) = iter.next() { 218 /// let record = result?; 219 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 220 /// } else { 221 /// return Err(From::from( 222 /// "expected at least two records but got one")) 223 /// } 224 /// Ok(()) 225 /// } 226 /// ``` has_headers(&mut self, yes: bool) -> &mut ReaderBuilder227 pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder { 228 self.has_headers = yes; 229 self 230 } 231 232 /// Whether the number of fields in records is allowed to change or not. 233 /// 234 /// When disabled (which is the default), parsing CSV data will return an 235 /// error if a record is found with a number of fields different from the 236 /// number of fields in a previous record. 237 /// 238 /// When enabled, this error checking is turned off. 239 /// 240 /// # Example: flexible records enabled 241 /// 242 /// ``` 243 /// use std::error::Error; 244 /// use csv::ReaderBuilder; 245 /// 246 /// # fn main() { example().unwrap(); } 247 /// fn example() -> Result<(), Box<dyn Error>> { 248 /// // Notice that the first row is missing the population count. 249 /// let data = "\ 250 /// city,country,pop 251 /// Boston,United States 252 /// "; 253 /// let mut rdr = ReaderBuilder::new() 254 /// .flexible(true) 255 /// .from_reader(data.as_bytes()); 256 /// 257 /// if let Some(result) = rdr.records().next() { 258 /// let record = result?; 259 /// assert_eq!(record, vec!["Boston", "United States"]); 260 /// Ok(()) 261 /// } else { 262 /// Err(From::from("expected at least one record but got none")) 263 /// } 264 /// } 265 /// ``` 266 /// 267 /// # Example: flexible records disabled 268 /// 269 /// This shows the error that appears when records of unequal length 270 /// are found and flexible records have been disabled (which is the 271 /// default). 272 /// 273 /// ``` 274 /// use std::error::Error; 275 /// use csv::{ErrorKind, ReaderBuilder}; 276 /// 277 /// # fn main() { example().unwrap(); } 278 /// fn example() -> Result<(), Box<dyn Error>> { 279 /// // Notice that the first row is missing the population count. 280 /// let data = "\ 281 /// city,country,pop 282 /// Boston,United States 283 /// "; 284 /// let mut rdr = ReaderBuilder::new() 285 /// .flexible(false) 286 /// .from_reader(data.as_bytes()); 287 /// 288 /// if let Some(Err(err)) = rdr.records().next() { 289 /// match *err.kind() { 290 /// ErrorKind::UnequalLengths { expected_len, len, .. } => { 291 /// // The header row has 3 fields... 292 /// assert_eq!(expected_len, 3); 293 /// // ... but the first row has only 2 fields. 294 /// assert_eq!(len, 2); 295 /// Ok(()) 296 /// } 297 /// ref wrong => { 298 /// Err(From::from(format!( 299 /// "expected UnequalLengths error but got {:?}", 300 /// wrong))) 301 /// } 302 /// } 303 /// } else { 304 /// Err(From::from( 305 /// "expected at least one errored record but got none")) 306 /// } 307 /// } 308 /// ``` flexible(&mut self, yes: bool) -> &mut ReaderBuilder309 pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder { 310 self.flexible = yes; 311 self 312 } 313 314 /// Whether fields are trimmed of leading and trailing whitespace or not. 315 /// 316 /// By default, no trimming is performed. This method permits one to 317 /// override that behavior and choose one of the following options: 318 /// 319 /// 1. `Trim::Headers` trims only header values. 320 /// 2. `Trim::Fields` trims only non-header or "field" values. 321 /// 3. `Trim::All` trims both header and non-header values. 322 /// 323 /// A value is only interpreted as a header value if this CSV reader is 324 /// configured to read a header record (which is the default). 325 /// 326 /// When reading string records, characters meeting the definition of 327 /// Unicode whitespace are trimmed. When reading byte records, characters 328 /// meeting the definition of ASCII whitespace are trimmed. ASCII 329 /// whitespace characters correspond to the set `[\t\n\v\f\r ]`. 330 /// 331 /// # Example 332 /// 333 /// This example shows what happens when all values are trimmed. 334 /// 335 /// ``` 336 /// use std::error::Error; 337 /// use csv::{ReaderBuilder, StringRecord, Trim}; 338 /// 339 /// # fn main() { example().unwrap(); } 340 /// fn example() -> Result<(), Box<dyn Error>> { 341 /// let data = "\ 342 /// city , country , pop 343 /// Boston,\" 344 /// United States\",4628910 345 /// Concord, United States ,42695 346 /// "; 347 /// let mut rdr = ReaderBuilder::new() 348 /// .trim(Trim::All) 349 /// .from_reader(data.as_bytes()); 350 /// let records = rdr 351 /// .records() 352 /// .collect::<Result<Vec<StringRecord>, csv::Error>>()?; 353 /// assert_eq!(records, vec![ 354 /// vec!["Boston", "United States", "4628910"], 355 /// vec!["Concord", "United States", "42695"], 356 /// ]); 357 /// Ok(()) 358 /// } 359 /// ``` trim(&mut self, trim: Trim) -> &mut ReaderBuilder360 pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder { 361 self.trim = trim; 362 self 363 } 364 365 /// The record terminator to use when parsing CSV. 366 /// 367 /// A record terminator can be any single byte. The default is a special 368 /// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n` 369 /// or `\r\n` as a single record terminator. 370 /// 371 /// # Example: `$` as a record terminator 372 /// 373 /// ``` 374 /// use std::error::Error; 375 /// use csv::{ReaderBuilder, Terminator}; 376 /// 377 /// # fn main() { example().unwrap(); } 378 /// fn example() -> Result<(), Box<dyn Error>> { 379 /// let data = "city,country,pop$Boston,United States,4628910"; 380 /// let mut rdr = ReaderBuilder::new() 381 /// .terminator(Terminator::Any(b'$')) 382 /// .from_reader(data.as_bytes()); 383 /// 384 /// if let Some(result) = rdr.records().next() { 385 /// let record = result?; 386 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 387 /// Ok(()) 388 /// } else { 389 /// Err(From::from("expected at least one record but got none")) 390 /// } 391 /// } 392 /// ``` terminator(&mut self, term: Terminator) -> &mut ReaderBuilder393 pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder { 394 self.builder.terminator(term.to_core()); 395 self 396 } 397 398 /// The quote character to use when parsing CSV. 399 /// 400 /// The default is `b'"'`. 401 /// 402 /// # Example: single quotes instead of double quotes 403 /// 404 /// ``` 405 /// use std::error::Error; 406 /// use csv::ReaderBuilder; 407 /// 408 /// # fn main() { example().unwrap(); } 409 /// fn example() -> Result<(), Box<dyn Error>> { 410 /// let data = "\ 411 /// city,country,pop 412 /// Boston,'United States',4628910 413 /// "; 414 /// let mut rdr = ReaderBuilder::new() 415 /// .quote(b'\'') 416 /// .from_reader(data.as_bytes()); 417 /// 418 /// if let Some(result) = rdr.records().next() { 419 /// let record = result?; 420 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 421 /// Ok(()) 422 /// } else { 423 /// Err(From::from("expected at least one record but got none")) 424 /// } 425 /// } 426 /// ``` quote(&mut self, quote: u8) -> &mut ReaderBuilder427 pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder { 428 self.builder.quote(quote); 429 self 430 } 431 432 /// The escape character to use when parsing CSV. 433 /// 434 /// In some variants of CSV, quotes are escaped using a special escape 435 /// character like `\` (instead of escaping quotes by doubling them). 436 /// 437 /// By default, recognizing these idiosyncratic escapes is disabled. 438 /// 439 /// # Example 440 /// 441 /// ``` 442 /// use std::error::Error; 443 /// use csv::ReaderBuilder; 444 /// 445 /// # fn main() { example().unwrap(); } 446 /// fn example() -> Result<(), Box<dyn Error>> { 447 /// let data = "\ 448 /// city,country,pop 449 /// Boston,\"The \\\"United\\\" States\",4628910 450 /// "; 451 /// let mut rdr = ReaderBuilder::new() 452 /// .escape(Some(b'\\')) 453 /// .from_reader(data.as_bytes()); 454 /// 455 /// if let Some(result) = rdr.records().next() { 456 /// let record = result?; 457 /// assert_eq!(record, vec![ 458 /// "Boston", "The \"United\" States", "4628910", 459 /// ]); 460 /// Ok(()) 461 /// } else { 462 /// Err(From::from("expected at least one record but got none")) 463 /// } 464 /// } 465 /// ``` escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder466 pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder { 467 self.builder.escape(escape); 468 self 469 } 470 471 /// Enable double quote escapes. 472 /// 473 /// This is enabled by default, but it may be disabled. When disabled, 474 /// doubled quotes are not interpreted as escapes. 475 /// 476 /// # Example 477 /// 478 /// ``` 479 /// use std::error::Error; 480 /// use csv::ReaderBuilder; 481 /// 482 /// # fn main() { example().unwrap(); } 483 /// fn example() -> Result<(), Box<dyn Error>> { 484 /// let data = "\ 485 /// city,country,pop 486 /// Boston,\"The \"\"United\"\" States\",4628910 487 /// "; 488 /// let mut rdr = ReaderBuilder::new() 489 /// .double_quote(false) 490 /// .from_reader(data.as_bytes()); 491 /// 492 /// if let Some(result) = rdr.records().next() { 493 /// let record = result?; 494 /// assert_eq!(record, vec![ 495 /// "Boston", "The \"United\"\" States\"", "4628910", 496 /// ]); 497 /// Ok(()) 498 /// } else { 499 /// Err(From::from("expected at least one record but got none")) 500 /// } 501 /// } 502 /// ``` double_quote(&mut self, yes: bool) -> &mut ReaderBuilder503 pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder { 504 self.builder.double_quote(yes); 505 self 506 } 507 508 /// Enable or disable quoting. 509 /// 510 /// This is enabled by default, but it may be disabled. When disabled, 511 /// quotes are not treated specially. 512 /// 513 /// # Example 514 /// 515 /// ``` 516 /// use std::error::Error; 517 /// use csv::ReaderBuilder; 518 /// 519 /// # fn main() { example().unwrap(); } 520 /// fn example() -> Result<(), Box<dyn Error>> { 521 /// let data = "\ 522 /// city,country,pop 523 /// Boston,\"The United States,4628910 524 /// "; 525 /// let mut rdr = ReaderBuilder::new() 526 /// .quoting(false) 527 /// .from_reader(data.as_bytes()); 528 /// 529 /// if let Some(result) = rdr.records().next() { 530 /// let record = result?; 531 /// assert_eq!(record, vec![ 532 /// "Boston", "\"The United States", "4628910", 533 /// ]); 534 /// Ok(()) 535 /// } else { 536 /// Err(From::from("expected at least one record but got none")) 537 /// } 538 /// } 539 /// ``` quoting(&mut self, yes: bool) -> &mut ReaderBuilder540 pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder { 541 self.builder.quoting(yes); 542 self 543 } 544 545 /// The comment character to use when parsing CSV. 546 /// 547 /// If the start of a record begins with the byte given here, then that 548 /// line is ignored by the CSV parser. 549 /// 550 /// This is disabled by default. 551 /// 552 /// # Example 553 /// 554 /// ``` 555 /// use std::error::Error; 556 /// use csv::ReaderBuilder; 557 /// 558 /// # fn main() { example().unwrap(); } 559 /// fn example() -> Result<(), Box<dyn Error>> { 560 /// let data = "\ 561 /// city,country,pop 562 /// #Concord,United States,42695 563 /// Boston,United States,4628910 564 /// "; 565 /// let mut rdr = ReaderBuilder::new() 566 /// .comment(Some(b'#')) 567 /// .from_reader(data.as_bytes()); 568 /// 569 /// if let Some(result) = rdr.records().next() { 570 /// let record = result?; 571 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 572 /// Ok(()) 573 /// } else { 574 /// Err(From::from("expected at least one record but got none")) 575 /// } 576 /// } 577 /// ``` comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder578 pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder { 579 self.builder.comment(comment); 580 self 581 } 582 583 /// A convenience method for specifying a configuration to read ASCII 584 /// delimited text. 585 /// 586 /// This sets the delimiter and record terminator to the ASCII unit 587 /// separator (`\x1F`) and record separator (`\x1E`), respectively. 588 /// 589 /// # Example 590 /// 591 /// ``` 592 /// use std::error::Error; 593 /// use csv::ReaderBuilder; 594 /// 595 /// # fn main() { example().unwrap(); } 596 /// fn example() -> Result<(), Box<dyn Error>> { 597 /// let data = "\ 598 /// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910"; 599 /// let mut rdr = ReaderBuilder::new() 600 /// .ascii() 601 /// .from_reader(data.as_bytes()); 602 /// 603 /// if let Some(result) = rdr.records().next() { 604 /// let record = result?; 605 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 606 /// Ok(()) 607 /// } else { 608 /// Err(From::from("expected at least one record but got none")) 609 /// } 610 /// } 611 /// ``` ascii(&mut self) -> &mut ReaderBuilder612 pub fn ascii(&mut self) -> &mut ReaderBuilder { 613 self.builder.ascii(); 614 self 615 } 616 617 /// Set the capacity (in bytes) of the buffer used in the CSV reader. 618 /// This defaults to a reasonable setting. buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder619 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder { 620 self.capacity = capacity; 621 self 622 } 623 624 /// Enable or disable the NFA for parsing CSV. 625 /// 626 /// This is intended to be a debug option. The NFA is always slower than 627 /// the DFA. 628 #[doc(hidden)] nfa(&mut self, yes: bool) -> &mut ReaderBuilder629 pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder { 630 self.builder.nfa(yes); 631 self 632 } 633 } 634 635 /// A already configured CSV reader. 636 /// 637 /// A CSV reader takes as input CSV data and transforms that into standard Rust 638 /// values. The most flexible way to read CSV data is as a sequence of records, 639 /// where a record is a sequence of fields and each field is a string. However, 640 /// a reader can also deserialize CSV data into Rust types like `i64` or 641 /// `(String, f64, f64, f64)` or even a custom struct automatically using 642 /// Serde. 643 /// 644 /// # Configuration 645 /// 646 /// A CSV reader has a couple convenient constructor methods like `from_path` 647 /// and `from_reader`. However, if you want to configure the CSV reader to use 648 /// a different delimiter or quote character (among many other things), then 649 /// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct 650 /// a `Reader`. For example, to change the field delimiter: 651 /// 652 /// ``` 653 /// use std::error::Error; 654 /// use csv::ReaderBuilder; 655 /// 656 /// # fn main() { example().unwrap(); } 657 /// fn example() -> Result<(), Box<dyn Error>> { 658 /// let data = "\ 659 /// city;country;pop 660 /// Boston;United States;4628910 661 /// "; 662 /// let mut rdr = ReaderBuilder::new() 663 /// .delimiter(b';') 664 /// .from_reader(data.as_bytes()); 665 /// 666 /// if let Some(result) = rdr.records().next() { 667 /// let record = result?; 668 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 669 /// Ok(()) 670 /// } else { 671 /// Err(From::from("expected at least one record but got none")) 672 /// } 673 /// } 674 /// ``` 675 /// 676 /// # Error handling 677 /// 678 /// In general, CSV *parsing* does not ever return an error. That is, there is 679 /// no such thing as malformed CSV data. Instead, this reader will prioritize 680 /// finding a parse over rejecting CSV data that it does not understand. This 681 /// choice was inspired by other popular CSV parsers, but also because it is 682 /// pragmatic. CSV data varies wildly, so even if the CSV data is malformed, 683 /// it might still be possible to work with the data. In the land of CSV, there 684 /// is no "right" or "wrong," only "right" and "less right." 685 /// 686 /// With that said, a number of errors can occur while reading CSV data: 687 /// 688 /// * By default, all records in CSV data must have the same number of fields. 689 /// If a record is found with a different number of fields than a prior 690 /// record, then an error is returned. This behavior can be disabled by 691 /// enabling flexible parsing via the `flexible` method on 692 /// [`ReaderBuilder`](struct.ReaderBuilder.html). 693 /// * When reading CSV data from a resource (like a file), it is possible for 694 /// reading from the underlying resource to fail. This will return an error. 695 /// For subsequent calls to the `Reader` after encountering a such error 696 /// (unless `seek` is used), it will behave as if end of file had been 697 /// reached, in order to avoid running into infinite loops when still 698 /// attempting to read the next record when one has errored. 699 /// * When reading CSV data into `String` or `&str` fields (e.g., via a 700 /// [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly 701 /// enforced. If CSV data is invalid UTF-8, then an error is returned. If 702 /// you want to read invalid UTF-8, then you should use the byte oriented 703 /// APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit 704 /// support for another encoding entirely, then you'll need to use another 705 /// crate to transcode your CSV data to UTF-8 before parsing it. 706 /// * When using Serde to deserialize CSV data into Rust types, it is possible 707 /// for a number of additional errors to occur. For example, deserializing 708 /// a field `xyz` into an `i32` field will result in an error. 709 /// 710 /// For more details on the precise semantics of errors, see the 711 /// [`Error`](enum.Error.html) type. 712 #[derive(Debug)] 713 pub struct Reader<R> { 714 /// The underlying CSV parser. 715 /// 716 /// We explicitly put this on the heap because CoreReader embeds an entire 717 /// DFA transition table, which along with other things, tallies up to 718 /// almost 500 bytes on the stack. 719 core: Box<CoreReader>, 720 /// The underlying reader. 721 rdr: io::BufReader<R>, 722 /// Various state tracking. 723 /// 724 /// There is more state embedded in the `CoreReader`. 725 state: ReaderState, 726 } 727 728 #[derive(Debug)] 729 struct ReaderState { 730 /// When set, this contains the first row of any parsed CSV data. 731 /// 732 /// This is always populated, regardless of whether `has_headers` is set. 733 headers: Option<Headers>, 734 /// When set, the first row of parsed CSV data is excluded from things 735 /// that read records, like iterators and `read_record`. 736 has_headers: bool, 737 /// When set, there is no restriction on the length of records. When not 738 /// set, every record must have the same number of fields, or else an error 739 /// is reported. 740 flexible: bool, 741 trim: Trim, 742 /// The number of fields in the first record parsed. 743 first_field_count: Option<u64>, 744 /// The current position of the parser. 745 /// 746 /// Note that this position is only observable by callers at the start 747 /// of a record. More granular positions are not supported. 748 cur_pos: Position, 749 /// Whether the first record has been read or not. 750 first: bool, 751 /// Whether the reader has been seeked or not. 752 seeked: bool, 753 /// Whether EOF of the underlying reader has been reached or not. 754 /// 755 /// IO errors on the underlying reader will be considered as an EOF for 756 /// subsequent read attempts, as it would be incorrect to keep on trying 757 /// to read when the underlying reader has broken. 758 /// 759 /// For clarity, having the best `Debug` impl and in case they need to be 760 /// treated differently at some point, we store whether the `EOF` is 761 /// considered because an actual EOF happened, or because we encoundered 762 /// an IO error. 763 /// This has no additional runtime cost. 764 eof: ReaderEofState, 765 } 766 767 /// Whether EOF of the underlying reader has been reached or not. 768 /// 769 /// IO errors on the underlying reader will be considered as an EOF for 770 /// subsequent read attempts, as it would be incorrect to keep on trying 771 /// to read when the underlying reader has broken. 772 /// 773 /// For clarity, having the best `Debug` impl and in case they need to be 774 /// treated differently at some point, we store whether the `EOF` is 775 /// considered because an actual EOF happened, or because we encoundered 776 /// an IO error 777 #[derive(Debug, Clone, Copy, PartialEq, Eq)] 778 enum ReaderEofState { 779 NotEof, 780 Eof, 781 IOError, 782 } 783 784 /// Headers encapsulates any data associated with the headers of CSV data. 785 /// 786 /// The headers always correspond to the first row. 787 #[derive(Debug)] 788 struct Headers { 789 /// The header, as raw bytes. 790 byte_record: ByteRecord, 791 /// The header, as valid UTF-8 (or a UTF-8 error). 792 string_record: result::Result<StringRecord, Utf8Error>, 793 } 794 795 impl Reader<Reader<File>> { 796 /// Create a new CSV parser with a default configuration for the given 797 /// file path. 798 /// 799 /// To customize CSV parsing, use a `ReaderBuilder`. 800 /// 801 /// # Example 802 /// 803 /// ```no_run 804 /// use std::error::Error; 805 /// use csv::Reader; 806 /// 807 /// # fn main() { example().unwrap(); } 808 /// fn example() -> Result<(), Box<dyn Error>> { 809 /// let mut rdr = Reader::from_path("foo.csv")?; 810 /// for result in rdr.records() { 811 /// let record = result?; 812 /// println!("{:?}", record); 813 /// } 814 /// Ok(()) 815 /// } 816 /// ``` from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>>817 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> { 818 ReaderBuilder::new().from_path(path) 819 } 820 } 821 822 impl<R: io::Read> Reader<R> { 823 /// Create a new CSV reader given a builder and a source of underlying 824 /// bytes. new(builder: &ReaderBuilder, rdr: R) -> Reader<R>825 fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> { 826 Reader { 827 core: Box::new(builder.builder.build()), 828 rdr: io::BufReader::with_capacity(builder.capacity, rdr), 829 state: ReaderState { 830 headers: None, 831 has_headers: builder.has_headers, 832 flexible: builder.flexible, 833 trim: builder.trim, 834 first_field_count: None, 835 cur_pos: Position::new(), 836 first: false, 837 seeked: false, 838 eof: ReaderEofState::NotEof, 839 }, 840 } 841 } 842 843 /// Create a new CSV parser with a default configuration for the given 844 /// reader. 845 /// 846 /// To customize CSV parsing, use a `ReaderBuilder`. 847 /// 848 /// # Example 849 /// 850 /// ``` 851 /// use std::error::Error; 852 /// use csv::Reader; 853 /// 854 /// # fn main() { example().unwrap(); } 855 /// fn example() -> Result<(), Box<dyn Error>> { 856 /// let data = "\ 857 /// city,country,pop 858 /// Boston,United States,4628910 859 /// Concord,United States,42695 860 /// "; 861 /// let mut rdr = Reader::from_reader(data.as_bytes()); 862 /// for result in rdr.records() { 863 /// let record = result?; 864 /// println!("{:?}", record); 865 /// } 866 /// Ok(()) 867 /// } 868 /// ``` from_reader(rdr: R) -> Reader<R>869 pub fn from_reader(rdr: R) -> Reader<R> { 870 ReaderBuilder::new().from_reader(rdr) 871 } 872 873 /// Returns a borrowed iterator over deserialized records. 874 /// 875 /// Each item yielded by this iterator is a `Result<D, Error>`. 876 /// Therefore, in order to access the record, callers must handle the 877 /// possibility of error (typically with `try!` or `?`). 878 /// 879 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 880 /// default), then this does not include the first record. Additionally, 881 /// if `has_headers` is enabled, then deserializing into a struct will 882 /// automatically align the values in each row to the fields of a struct 883 /// based on the header row. 884 /// 885 /// # Example 886 /// 887 /// This shows how to deserialize CSV data into normal Rust structs. The 888 /// fields of the header row are used to match up the values in each row 889 /// to the fields of the struct. 890 /// 891 /// ``` 892 /// use std::error::Error; 893 /// 894 /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)] 895 /// struct Row { 896 /// city: String, 897 /// country: String, 898 /// #[serde(rename = "popcount")] 899 /// population: u64, 900 /// } 901 /// 902 /// # fn main() { example().unwrap(); } 903 /// fn example() -> Result<(), Box<dyn Error>> { 904 /// let data = "\ 905 /// city,country,popcount 906 /// Boston,United States,4628910 907 /// "; 908 /// let mut rdr = csv::Reader::from_reader(data.as_bytes()); 909 /// let mut iter = rdr.deserialize(); 910 /// 911 /// if let Some(result) = iter.next() { 912 /// let record: Row = result?; 913 /// assert_eq!(record, Row { 914 /// city: "Boston".to_string(), 915 /// country: "United States".to_string(), 916 /// population: 4628910, 917 /// }); 918 /// Ok(()) 919 /// } else { 920 /// Err(From::from("expected at least one record but got none")) 921 /// } 922 /// } 923 /// ``` 924 /// 925 /// # Rules 926 /// 927 /// For the most part, any Rust type that maps straight-forwardly to a CSV 928 /// record is supported. This includes maps, structs, tuples and tuple 929 /// structs. Other Rust types, such as `Vec`s, arrays, and enums have 930 /// a more complicated story. In general, when working with CSV data, one 931 /// should avoid *nested sequences* as much as possible. 932 /// 933 /// Maps, structs, tuples and tuple structs map to CSV records in a simple 934 /// way. Tuples and tuple structs decode their fields in the order that 935 /// they are defined. Structs will do the same only if `has_headers` has 936 /// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html), 937 /// otherwise, structs and maps are deserialized based on the fields 938 /// defined in the header row. (If there is no header row, then 939 /// deserializing into a map will result in an error.) 940 /// 941 /// Nested sequences are supported in a limited capacity. Namely, they 942 /// are flattened. As a result, it's often useful to use a `Vec` to capture 943 /// a "tail" of fields in a record: 944 /// 945 /// ``` 946 /// use std::error::Error; 947 /// 948 /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)] 949 /// struct Row { 950 /// label: String, 951 /// values: Vec<i32>, 952 /// } 953 /// 954 /// # fn main() { example().unwrap(); } 955 /// fn example() -> Result<(), Box<dyn Error>> { 956 /// let data = "foo,1,2,3"; 957 /// let mut rdr = csv::ReaderBuilder::new() 958 /// .has_headers(false) 959 /// .from_reader(data.as_bytes()); 960 /// let mut iter = rdr.deserialize(); 961 /// 962 /// if let Some(result) = iter.next() { 963 /// let record: Row = result?; 964 /// assert_eq!(record, Row { 965 /// label: "foo".to_string(), 966 /// values: vec![1, 2, 3], 967 /// }); 968 /// Ok(()) 969 /// } else { 970 /// Err(From::from("expected at least one record but got none")) 971 /// } 972 /// } 973 /// ``` 974 /// 975 /// In the above example, adding another field to the `Row` struct after 976 /// the `values` field will result in a deserialization error. This is 977 /// because the deserializer doesn't know when to stop reading fields 978 /// into the `values` vector, so it will consume the rest of the fields in 979 /// the record leaving none left over for the additional field. 980 /// 981 /// Finally, simple enums in Rust can be deserialized as well. Namely, 982 /// enums must either be variants with no arguments or variants with a 983 /// single argument. Variants with no arguments are deserialized based on 984 /// which variant name the field matches. Variants with a single argument 985 /// are deserialized based on which variant can store the data. The latter 986 /// is only supported when using "untagged" enum deserialization. The 987 /// following example shows both forms in action: 988 /// 989 /// ``` 990 /// use std::error::Error; 991 /// 992 /// #[derive(Debug, serde::Deserialize, PartialEq)] 993 /// struct Row { 994 /// label: Label, 995 /// value: Number, 996 /// } 997 /// 998 /// #[derive(Debug, serde::Deserialize, PartialEq)] 999 /// #[serde(rename_all = "lowercase")] 1000 /// enum Label { 1001 /// Celsius, 1002 /// Fahrenheit, 1003 /// } 1004 /// 1005 /// #[derive(Debug, serde::Deserialize, PartialEq)] 1006 /// #[serde(untagged)] 1007 /// enum Number { 1008 /// Integer(i64), 1009 /// Float(f64), 1010 /// } 1011 /// 1012 /// # fn main() { example().unwrap(); } 1013 /// fn example() -> Result<(), Box<dyn Error>> { 1014 /// let data = "\ 1015 /// label,value 1016 /// celsius,22.2222 1017 /// fahrenheit,72 1018 /// "; 1019 /// let mut rdr = csv::Reader::from_reader(data.as_bytes()); 1020 /// let mut iter = rdr.deserialize(); 1021 /// 1022 /// // Read the first record. 1023 /// if let Some(result) = iter.next() { 1024 /// let record: Row = result?; 1025 /// assert_eq!(record, Row { 1026 /// label: Label::Celsius, 1027 /// value: Number::Float(22.2222), 1028 /// }); 1029 /// } else { 1030 /// return Err(From::from( 1031 /// "expected at least two records but got none")); 1032 /// } 1033 /// 1034 /// // Read the second record. 1035 /// if let Some(result) = iter.next() { 1036 /// let record: Row = result?; 1037 /// assert_eq!(record, Row { 1038 /// label: Label::Fahrenheit, 1039 /// value: Number::Integer(72), 1040 /// }); 1041 /// Ok(()) 1042 /// } else { 1043 /// Err(From::from( 1044 /// "expected at least two records but got only one")) 1045 /// } 1046 /// } 1047 /// ``` deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D> where D: DeserializeOwned,1048 pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D> 1049 where 1050 D: DeserializeOwned, 1051 { 1052 DeserializeRecordsIter::new(self) 1053 } 1054 1055 /// Returns an owned iterator over deserialized records. 1056 /// 1057 /// Each item yielded by this iterator is a `Result<D, Error>`. 1058 /// Therefore, in order to access the record, callers must handle the 1059 /// possibility of error (typically with `try!` or `?`). 1060 /// 1061 /// This is mostly useful when you want to return a CSV iterator or store 1062 /// it somewhere. 1063 /// 1064 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1065 /// default), then this does not include the first record. Additionally, 1066 /// if `has_headers` is enabled, then deserializing into a struct will 1067 /// automatically align the values in each row to the fields of a struct 1068 /// based on the header row. 1069 /// 1070 /// For more detailed deserialization rules, see the documentation on the 1071 /// `deserialize` method. 1072 /// 1073 /// # Example 1074 /// 1075 /// ``` 1076 /// use std::error::Error; 1077 /// 1078 /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)] 1079 /// struct Row { 1080 /// city: String, 1081 /// country: String, 1082 /// #[serde(rename = "popcount")] 1083 /// population: u64, 1084 /// } 1085 /// 1086 /// # fn main() { example().unwrap(); } 1087 /// fn example() -> Result<(), Box<dyn Error>> { 1088 /// let data = "\ 1089 /// city,country,popcount 1090 /// Boston,United States,4628910 1091 /// "; 1092 /// let rdr = csv::Reader::from_reader(data.as_bytes()); 1093 /// let mut iter = rdr.into_deserialize(); 1094 /// 1095 /// if let Some(result) = iter.next() { 1096 /// let record: Row = result?; 1097 /// assert_eq!(record, Row { 1098 /// city: "Boston".to_string(), 1099 /// country: "United States".to_string(), 1100 /// population: 4628910, 1101 /// }); 1102 /// Ok(()) 1103 /// } else { 1104 /// Err(From::from("expected at least one record but got none")) 1105 /// } 1106 /// } 1107 /// ``` into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D> where D: DeserializeOwned,1108 pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D> 1109 where 1110 D: DeserializeOwned, 1111 { 1112 DeserializeRecordsIntoIter::new(self) 1113 } 1114 1115 /// Returns a borrowed iterator over all records as strings. 1116 /// 1117 /// Each item yielded by this iterator is a `Result<StringRecord, Error>`. 1118 /// Therefore, in order to access the record, callers must handle the 1119 /// possibility of error (typically with `try!` or `?`). 1120 /// 1121 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1122 /// default), then this does not include the first record. 1123 /// 1124 /// # Example 1125 /// 1126 /// ``` 1127 /// use std::error::Error; 1128 /// use csv::Reader; 1129 /// 1130 /// # fn main() { example().unwrap(); } 1131 /// fn example() -> Result<(), Box<dyn Error>> { 1132 /// let data = "\ 1133 /// city,country,pop 1134 /// Boston,United States,4628910 1135 /// "; 1136 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1137 /// let mut iter = rdr.records(); 1138 /// 1139 /// if let Some(result) = iter.next() { 1140 /// let record = result?; 1141 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1142 /// Ok(()) 1143 /// } else { 1144 /// Err(From::from("expected at least one record but got none")) 1145 /// } 1146 /// } 1147 /// ``` records(&mut self) -> StringRecordsIter<R>1148 pub fn records(&mut self) -> StringRecordsIter<R> { 1149 StringRecordsIter::new(self) 1150 } 1151 1152 /// Returns an owned iterator over all records as strings. 1153 /// 1154 /// Each item yielded by this iterator is a `Result<StringRecord, Error>`. 1155 /// Therefore, in order to access the record, callers must handle the 1156 /// possibility of error (typically with `try!` or `?`). 1157 /// 1158 /// This is mostly useful when you want to return a CSV iterator or store 1159 /// it somewhere. 1160 /// 1161 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1162 /// default), then this does not include the first record. 1163 /// 1164 /// # Example 1165 /// 1166 /// ``` 1167 /// use std::error::Error; 1168 /// use csv::Reader; 1169 /// 1170 /// # fn main() { example().unwrap(); } 1171 /// fn example() -> Result<(), Box<dyn Error>> { 1172 /// let data = "\ 1173 /// city,country,pop 1174 /// Boston,United States,4628910 1175 /// "; 1176 /// let rdr = Reader::from_reader(data.as_bytes()); 1177 /// let mut iter = rdr.into_records(); 1178 /// 1179 /// if let Some(result) = iter.next() { 1180 /// let record = result?; 1181 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1182 /// Ok(()) 1183 /// } else { 1184 /// Err(From::from("expected at least one record but got none")) 1185 /// } 1186 /// } 1187 /// ``` into_records(self) -> StringRecordsIntoIter<R>1188 pub fn into_records(self) -> StringRecordsIntoIter<R> { 1189 StringRecordsIntoIter::new(self) 1190 } 1191 1192 /// Returns a borrowed iterator over all records as raw bytes. 1193 /// 1194 /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`. 1195 /// Therefore, in order to access the record, callers must handle the 1196 /// possibility of error (typically with `try!` or `?`). 1197 /// 1198 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1199 /// default), then this does not include the first record. 1200 /// 1201 /// # Example 1202 /// 1203 /// ``` 1204 /// use std::error::Error; 1205 /// use csv::Reader; 1206 /// 1207 /// # fn main() { example().unwrap(); } 1208 /// fn example() -> Result<(), Box<dyn Error>> { 1209 /// let data = "\ 1210 /// city,country,pop 1211 /// Boston,United States,4628910 1212 /// "; 1213 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1214 /// let mut iter = rdr.byte_records(); 1215 /// 1216 /// if let Some(result) = iter.next() { 1217 /// let record = result?; 1218 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1219 /// Ok(()) 1220 /// } else { 1221 /// Err(From::from("expected at least one record but got none")) 1222 /// } 1223 /// } 1224 /// ``` byte_records(&mut self) -> ByteRecordsIter<R>1225 pub fn byte_records(&mut self) -> ByteRecordsIter<R> { 1226 ByteRecordsIter::new(self) 1227 } 1228 1229 /// Returns an owned iterator over all records as raw bytes. 1230 /// 1231 /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`. 1232 /// Therefore, in order to access the record, callers must handle the 1233 /// possibility of error (typically with `try!` or `?`). 1234 /// 1235 /// This is mostly useful when you want to return a CSV iterator or store 1236 /// it somewhere. 1237 /// 1238 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1239 /// default), then this does not include the first record. 1240 /// 1241 /// # Example 1242 /// 1243 /// ``` 1244 /// use std::error::Error; 1245 /// use csv::Reader; 1246 /// 1247 /// # fn main() { example().unwrap(); } 1248 /// fn example() -> Result<(), Box<dyn Error>> { 1249 /// let data = "\ 1250 /// city,country,pop 1251 /// Boston,United States,4628910 1252 /// "; 1253 /// let rdr = Reader::from_reader(data.as_bytes()); 1254 /// let mut iter = rdr.into_byte_records(); 1255 /// 1256 /// if let Some(result) = iter.next() { 1257 /// let record = result?; 1258 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1259 /// Ok(()) 1260 /// } else { 1261 /// Err(From::from("expected at least one record but got none")) 1262 /// } 1263 /// } 1264 /// ``` into_byte_records(self) -> ByteRecordsIntoIter<R>1265 pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> { 1266 ByteRecordsIntoIter::new(self) 1267 } 1268 1269 /// Returns a reference to the first row read by this parser. 1270 /// 1271 /// If no row has been read yet, then this will force parsing of the first 1272 /// row. 1273 /// 1274 /// If there was a problem parsing the row or if it wasn't valid UTF-8, 1275 /// then this returns an error. 1276 /// 1277 /// If the underlying reader emits EOF before any data, then this returns 1278 /// an empty record. 1279 /// 1280 /// Note that this method may be used regardless of whether `has_headers` 1281 /// was enabled (but it is enabled by default). 1282 /// 1283 /// # Example 1284 /// 1285 /// This example shows how to get the header row of CSV data. Notice that 1286 /// the header row does not appear as a record in the iterator! 1287 /// 1288 /// ``` 1289 /// use std::error::Error; 1290 /// use csv::Reader; 1291 /// 1292 /// # fn main() { example().unwrap(); } 1293 /// fn example() -> Result<(), Box<dyn Error>> { 1294 /// let data = "\ 1295 /// city,country,pop 1296 /// Boston,United States,4628910 1297 /// "; 1298 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1299 /// 1300 /// // We can read the headers before iterating. 1301 /// { 1302 /// // `headers` borrows from the reader, so we put this in its 1303 /// // own scope. That way, the borrow ends before we try iterating 1304 /// // below. Alternatively, we could clone the headers. 1305 /// let headers = rdr.headers()?; 1306 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1307 /// } 1308 /// 1309 /// if let Some(result) = rdr.records().next() { 1310 /// let record = result?; 1311 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1312 /// } else { 1313 /// return Err(From::from( 1314 /// "expected at least one record but got none")) 1315 /// } 1316 /// 1317 /// // We can also read the headers after iterating. 1318 /// let headers = rdr.headers()?; 1319 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1320 /// Ok(()) 1321 /// } 1322 /// ``` headers(&mut self) -> Result<&StringRecord>1323 pub fn headers(&mut self) -> Result<&StringRecord> { 1324 if self.state.headers.is_none() { 1325 let mut record = ByteRecord::new(); 1326 self.read_byte_record_impl(&mut record)?; 1327 self.set_headers_impl(Err(record)); 1328 } 1329 let headers = self.state.headers.as_ref().unwrap(); 1330 match headers.string_record { 1331 Ok(ref record) => Ok(record), 1332 Err(ref err) => Err(Error::new(ErrorKind::Utf8 { 1333 pos: headers.byte_record.position().map(Clone::clone), 1334 err: err.clone(), 1335 })), 1336 } 1337 } 1338 1339 /// Returns a reference to the first row read by this parser as raw bytes. 1340 /// 1341 /// If no row has been read yet, then this will force parsing of the first 1342 /// row. 1343 /// 1344 /// If there was a problem parsing the row then this returns an error. 1345 /// 1346 /// If the underlying reader emits EOF before any data, then this returns 1347 /// an empty record. 1348 /// 1349 /// Note that this method may be used regardless of whether `has_headers` 1350 /// was enabled (but it is enabled by default). 1351 /// 1352 /// # Example 1353 /// 1354 /// This example shows how to get the header row of CSV data. Notice that 1355 /// the header row does not appear as a record in the iterator! 1356 /// 1357 /// ``` 1358 /// use std::error::Error; 1359 /// use csv::Reader; 1360 /// 1361 /// # fn main() { example().unwrap(); } 1362 /// fn example() -> Result<(), Box<dyn Error>> { 1363 /// let data = "\ 1364 /// city,country,pop 1365 /// Boston,United States,4628910 1366 /// "; 1367 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1368 /// 1369 /// // We can read the headers before iterating. 1370 /// { 1371 /// // `headers` borrows from the reader, so we put this in its 1372 /// // own scope. That way, the borrow ends before we try iterating 1373 /// // below. Alternatively, we could clone the headers. 1374 /// let headers = rdr.byte_headers()?; 1375 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1376 /// } 1377 /// 1378 /// if let Some(result) = rdr.byte_records().next() { 1379 /// let record = result?; 1380 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1381 /// } else { 1382 /// return Err(From::from( 1383 /// "expected at least one record but got none")) 1384 /// } 1385 /// 1386 /// // We can also read the headers after iterating. 1387 /// let headers = rdr.byte_headers()?; 1388 /// assert_eq!(headers, vec!["city", "country", "pop"]); 1389 /// Ok(()) 1390 /// } 1391 /// ``` byte_headers(&mut self) -> Result<&ByteRecord>1392 pub fn byte_headers(&mut self) -> Result<&ByteRecord> { 1393 if self.state.headers.is_none() { 1394 let mut record = ByteRecord::new(); 1395 self.read_byte_record_impl(&mut record)?; 1396 self.set_headers_impl(Err(record)); 1397 } 1398 Ok(&self.state.headers.as_ref().unwrap().byte_record) 1399 } 1400 1401 /// Set the headers of this CSV parser manually. 1402 /// 1403 /// This overrides any other setting (including `set_byte_headers`). Any 1404 /// automatic detection of headers is disabled. This may be called at any 1405 /// time. 1406 /// 1407 /// # Example 1408 /// 1409 /// ``` 1410 /// use std::error::Error; 1411 /// use csv::{Reader, StringRecord}; 1412 /// 1413 /// # fn main() { example().unwrap(); } 1414 /// fn example() -> Result<(), Box<dyn Error>> { 1415 /// let data = "\ 1416 /// city,country,pop 1417 /// Boston,United States,4628910 1418 /// "; 1419 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1420 /// 1421 /// assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]); 1422 /// rdr.set_headers(StringRecord::from(vec!["a", "b", "c"])); 1423 /// assert_eq!(rdr.headers()?, vec!["a", "b", "c"]); 1424 /// 1425 /// Ok(()) 1426 /// } 1427 /// ``` set_headers(&mut self, headers: StringRecord)1428 pub fn set_headers(&mut self, headers: StringRecord) { 1429 self.set_headers_impl(Ok(headers)); 1430 } 1431 1432 /// Set the headers of this CSV parser manually as raw bytes. 1433 /// 1434 /// This overrides any other setting (including `set_headers`). Any 1435 /// automatic detection of headers is disabled. This may be called at any 1436 /// time. 1437 /// 1438 /// # Example 1439 /// 1440 /// ``` 1441 /// use std::error::Error; 1442 /// use csv::{Reader, ByteRecord}; 1443 /// 1444 /// # fn main() { example().unwrap(); } 1445 /// fn example() -> Result<(), Box<dyn Error>> { 1446 /// let data = "\ 1447 /// city,country,pop 1448 /// Boston,United States,4628910 1449 /// "; 1450 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1451 /// 1452 /// assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]); 1453 /// rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"])); 1454 /// assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]); 1455 /// 1456 /// Ok(()) 1457 /// } 1458 /// ``` set_byte_headers(&mut self, headers: ByteRecord)1459 pub fn set_byte_headers(&mut self, headers: ByteRecord) { 1460 self.set_headers_impl(Err(headers)); 1461 } 1462 set_headers_impl( &mut self, headers: result::Result<StringRecord, ByteRecord>, )1463 fn set_headers_impl( 1464 &mut self, 1465 headers: result::Result<StringRecord, ByteRecord>, 1466 ) { 1467 // If we have string headers, then get byte headers. But if we have 1468 // byte headers, then get the string headers (or a UTF-8 error). 1469 let (mut str_headers, mut byte_headers) = match headers { 1470 Ok(string) => { 1471 let bytes = string.clone().into_byte_record(); 1472 (Ok(string), bytes) 1473 } 1474 Err(bytes) => { 1475 match StringRecord::from_byte_record(bytes.clone()) { 1476 Ok(str_headers) => (Ok(str_headers), bytes), 1477 Err(err) => (Err(err.utf8_error().clone()), bytes), 1478 } 1479 } 1480 }; 1481 if self.state.trim.should_trim_headers() { 1482 if let Ok(ref mut str_headers) = str_headers.as_mut() { 1483 str_headers.trim(); 1484 } 1485 byte_headers.trim(); 1486 } 1487 self.state.headers = Some(Headers { 1488 byte_record: byte_headers, 1489 string_record: str_headers, 1490 }); 1491 } 1492 1493 /// Read a single row into the given record. Returns false when no more 1494 /// records could be read. 1495 /// 1496 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1497 /// default), then this will never read the first record. 1498 /// 1499 /// This method is useful when you want to read records as fast as 1500 /// as possible. It's less ergonomic than an iterator, but it permits the 1501 /// caller to reuse the `StringRecord` allocation, which usually results 1502 /// in higher throughput. 1503 /// 1504 /// Records read via this method are guaranteed to have a position set 1505 /// on them, even if the reader is at EOF or if an error is returned. 1506 /// 1507 /// # Example 1508 /// 1509 /// ``` 1510 /// use std::error::Error; 1511 /// use csv::{Reader, StringRecord}; 1512 /// 1513 /// # fn main() { example().unwrap(); } 1514 /// fn example() -> Result<(), Box<dyn Error>> { 1515 /// let data = "\ 1516 /// city,country,pop 1517 /// Boston,United States,4628910 1518 /// "; 1519 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1520 /// let mut record = StringRecord::new(); 1521 /// 1522 /// if rdr.read_record(&mut record)? { 1523 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1524 /// Ok(()) 1525 /// } else { 1526 /// Err(From::from("expected at least one record but got none")) 1527 /// } 1528 /// } 1529 /// ``` read_record(&mut self, record: &mut StringRecord) -> Result<bool>1530 pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> { 1531 let result = record.read(self); 1532 // We need to trim again because trimming string records includes 1533 // Unicode whitespace. (ByteRecord trimming only includes ASCII 1534 // whitespace.) 1535 if self.state.trim.should_trim_fields() { 1536 record.trim(); 1537 } 1538 result 1539 } 1540 1541 /// Read a single row into the given byte record. Returns false when no 1542 /// more records could be read. 1543 /// 1544 /// If `has_headers` was enabled via a `ReaderBuilder` (which is the 1545 /// default), then this will never read the first record. 1546 /// 1547 /// This method is useful when you want to read records as fast as 1548 /// as possible. It's less ergonomic than an iterator, but it permits the 1549 /// caller to reuse the `ByteRecord` allocation, which usually results 1550 /// in higher throughput. 1551 /// 1552 /// Records read via this method are guaranteed to have a position set 1553 /// on them, even if the reader is at EOF or if an error is returned. 1554 /// 1555 /// # Example 1556 /// 1557 /// ``` 1558 /// use std::error::Error; 1559 /// use csv::{ByteRecord, Reader}; 1560 /// 1561 /// # fn main() { example().unwrap(); } 1562 /// fn example() -> Result<(), Box<dyn Error>> { 1563 /// let data = "\ 1564 /// city,country,pop 1565 /// Boston,United States,4628910 1566 /// "; 1567 /// let mut rdr = Reader::from_reader(data.as_bytes()); 1568 /// let mut record = ByteRecord::new(); 1569 /// 1570 /// if rdr.read_byte_record(&mut record)? { 1571 /// assert_eq!(record, vec!["Boston", "United States", "4628910"]); 1572 /// Ok(()) 1573 /// } else { 1574 /// Err(From::from("expected at least one record but got none")) 1575 /// } 1576 /// } 1577 /// ``` read_byte_record( &mut self, record: &mut ByteRecord, ) -> Result<bool>1578 pub fn read_byte_record( 1579 &mut self, 1580 record: &mut ByteRecord, 1581 ) -> Result<bool> { 1582 if !self.state.seeked && !self.state.has_headers && !self.state.first { 1583 // If the caller indicated "no headers" and we haven't yielded the 1584 // first record yet, then we should yield our header row if we have 1585 // one. 1586 if let Some(ref headers) = self.state.headers { 1587 self.state.first = true; 1588 record.clone_from(&headers.byte_record); 1589 if self.state.trim.should_trim_fields() { 1590 record.trim(); 1591 } 1592 return Ok(!record.is_empty()); 1593 } 1594 } 1595 let ok = self.read_byte_record_impl(record)?; 1596 self.state.first = true; 1597 if !self.state.seeked && self.state.headers.is_none() { 1598 self.set_headers_impl(Err(record.clone())); 1599 // If the end user indicated that we have headers, then we should 1600 // never return the first row. Instead, we should attempt to 1601 // read and return the next one. 1602 if self.state.has_headers { 1603 let result = self.read_byte_record_impl(record); 1604 if self.state.trim.should_trim_fields() { 1605 record.trim(); 1606 } 1607 return result; 1608 } 1609 } else if self.state.trim.should_trim_fields() { 1610 record.trim(); 1611 } 1612 Ok(ok) 1613 } 1614 1615 /// Read a byte record from the underlying CSV reader, without accounting 1616 /// for headers. 1617 #[inline(always)] read_byte_record_impl( &mut self, record: &mut ByteRecord, ) -> Result<bool>1618 fn read_byte_record_impl( 1619 &mut self, 1620 record: &mut ByteRecord, 1621 ) -> Result<bool> { 1622 use csv_core::ReadRecordResult::*; 1623 1624 record.clear(); 1625 record.set_position(Some(self.state.cur_pos.clone())); 1626 if self.state.eof != ReaderEofState::NotEof { 1627 return Ok(false); 1628 } 1629 let (mut outlen, mut endlen) = (0, 0); 1630 loop { 1631 let (res, nin, nout, nend) = { 1632 let input_res = self.rdr.fill_buf(); 1633 if input_res.is_err() { 1634 self.state.eof = ReaderEofState::IOError; 1635 } 1636 let input = input_res?; 1637 let (fields, ends) = record.as_parts(); 1638 self.core.read_record( 1639 input, 1640 &mut fields[outlen..], 1641 &mut ends[endlen..], 1642 ) 1643 }; 1644 self.rdr.consume(nin); 1645 let byte = self.state.cur_pos.byte(); 1646 self.state 1647 .cur_pos 1648 .set_byte(byte + nin as u64) 1649 .set_line(self.core.line()); 1650 outlen += nout; 1651 endlen += nend; 1652 match res { 1653 InputEmpty => continue, 1654 OutputFull => { 1655 record.expand_fields(); 1656 continue; 1657 } 1658 OutputEndsFull => { 1659 record.expand_ends(); 1660 continue; 1661 } 1662 Record => { 1663 record.set_len(endlen); 1664 self.state.add_record(record)?; 1665 return Ok(true); 1666 } 1667 End => { 1668 self.state.eof = ReaderEofState::Eof; 1669 return Ok(false); 1670 } 1671 } 1672 } 1673 } 1674 1675 /// Return the current position of this CSV reader. 1676 /// 1677 /// The byte offset in the position returned can be used to `seek` this 1678 /// reader. In particular, seeking to a position returned here on the same 1679 /// data will result in parsing the same subsequent record. 1680 /// 1681 /// # Example: reading the position 1682 /// 1683 /// ``` 1684 /// use std::{error::Error, io}; 1685 /// use csv::{Reader, Position}; 1686 /// 1687 /// # fn main() { example().unwrap(); } 1688 /// fn example() -> Result<(), Box<dyn Error>> { 1689 /// let data = "\ 1690 /// city,country,popcount 1691 /// Boston,United States,4628910 1692 /// Concord,United States,42695 1693 /// "; 1694 /// let rdr = Reader::from_reader(io::Cursor::new(data)); 1695 /// let mut iter = rdr.into_records(); 1696 /// let mut pos = Position::new(); 1697 /// loop { 1698 /// // Read the position immediately before each record. 1699 /// let next_pos = iter.reader().position().clone(); 1700 /// if iter.next().is_none() { 1701 /// break; 1702 /// } 1703 /// pos = next_pos; 1704 /// } 1705 /// 1706 /// // `pos` should now be the position immediately before the last 1707 /// // record. 1708 /// assert_eq!(pos.byte(), 51); 1709 /// assert_eq!(pos.line(), 3); 1710 /// assert_eq!(pos.record(), 2); 1711 /// Ok(()) 1712 /// } 1713 /// ``` position(&self) -> &Position1714 pub fn position(&self) -> &Position { 1715 &self.state.cur_pos 1716 } 1717 1718 /// Returns true if and only if this reader has been exhausted. 1719 /// 1720 /// When this returns true, no more records can be read from this reader 1721 /// (unless it has been seeked to another position). 1722 /// 1723 /// # Example 1724 /// 1725 /// ``` 1726 /// use std::{error::Error, io}; 1727 /// use csv::{Reader, Position}; 1728 /// 1729 /// # fn main() { example().unwrap(); } 1730 /// fn example() -> Result<(), Box<dyn Error>> { 1731 /// let data = "\ 1732 /// city,country,popcount 1733 /// Boston,United States,4628910 1734 /// Concord,United States,42695 1735 /// "; 1736 /// let mut rdr = Reader::from_reader(io::Cursor::new(data)); 1737 /// assert!(!rdr.is_done()); 1738 /// for result in rdr.records() { 1739 /// let _ = result?; 1740 /// } 1741 /// assert!(rdr.is_done()); 1742 /// Ok(()) 1743 /// } 1744 /// ``` is_done(&self) -> bool1745 pub fn is_done(&self) -> bool { 1746 self.state.eof != ReaderEofState::NotEof 1747 } 1748 1749 /// Returns true if and only if this reader has been configured to 1750 /// interpret the first record as a header record. has_headers(&self) -> bool1751 pub fn has_headers(&self) -> bool { 1752 self.state.has_headers 1753 } 1754 1755 /// Returns a reference to the underlying reader. get_ref(&self) -> &R1756 pub fn get_ref(&self) -> &R { 1757 self.rdr.get_ref() 1758 } 1759 1760 /// Returns a mutable reference to the underlying reader. get_mut(&mut self) -> &mut R1761 pub fn get_mut(&mut self) -> &mut R { 1762 self.rdr.get_mut() 1763 } 1764 1765 /// Unwraps this CSV reader, returning the underlying reader. 1766 /// 1767 /// Note that any leftover data inside this reader's internal buffer is 1768 /// lost. into_inner(self) -> R1769 pub fn into_inner(self) -> R { 1770 self.rdr.into_inner() 1771 } 1772 } 1773 1774 impl<R: io::Read + io::Seek> Reader<R> { 1775 /// Seeks the underlying reader to the position given. 1776 /// 1777 /// This comes with a few caveats: 1778 /// 1779 /// * Any internal buffer associated with this reader is cleared. 1780 /// * If the given position does not correspond to a position immediately 1781 /// before the start of a record, then the behavior of this reader is 1782 /// unspecified. 1783 /// * Any special logic that skips the first record in the CSV reader 1784 /// when reading or iterating over records is disabled. 1785 /// 1786 /// If the given position has a byte offset equivalent to the current 1787 /// position, then no seeking is performed. 1788 /// 1789 /// If the header row has not already been read, then this will attempt 1790 /// to read the header row before seeking. Therefore, it is possible that 1791 /// this returns an error associated with reading CSV data. 1792 /// 1793 /// Note that seeking is performed based only on the byte offset in the 1794 /// given position. Namely, the record or line numbers in the position may 1795 /// be incorrect, but this will cause any future position generated by 1796 /// this CSV reader to be similarly incorrect. 1797 /// 1798 /// # Example: seek to parse a record twice 1799 /// 1800 /// ``` 1801 /// use std::{error::Error, io}; 1802 /// use csv::{Reader, Position}; 1803 /// 1804 /// # fn main() { example().unwrap(); } 1805 /// fn example() -> Result<(), Box<dyn Error>> { 1806 /// let data = "\ 1807 /// city,country,popcount 1808 /// Boston,United States,4628910 1809 /// Concord,United States,42695 1810 /// "; 1811 /// let rdr = Reader::from_reader(io::Cursor::new(data)); 1812 /// let mut iter = rdr.into_records(); 1813 /// let mut pos = Position::new(); 1814 /// loop { 1815 /// // Read the position immediately before each record. 1816 /// let next_pos = iter.reader().position().clone(); 1817 /// if iter.next().is_none() { 1818 /// break; 1819 /// } 1820 /// pos = next_pos; 1821 /// } 1822 /// 1823 /// // Now seek the reader back to `pos`. This will let us read the 1824 /// // last record again. 1825 /// iter.reader_mut().seek(pos)?; 1826 /// let mut iter = iter.into_reader().into_records(); 1827 /// if let Some(result) = iter.next() { 1828 /// let record = result?; 1829 /// assert_eq!(record, vec!["Concord", "United States", "42695"]); 1830 /// Ok(()) 1831 /// } else { 1832 /// Err(From::from("expected at least one record but got none")) 1833 /// } 1834 /// } 1835 /// ``` seek(&mut self, pos: Position) -> Result<()>1836 pub fn seek(&mut self, pos: Position) -> Result<()> { 1837 self.byte_headers()?; 1838 self.state.seeked = true; 1839 if pos.byte() == self.state.cur_pos.byte() { 1840 return Ok(()); 1841 } 1842 self.rdr.seek(io::SeekFrom::Start(pos.byte()))?; 1843 self.core.reset(); 1844 self.core.set_line(pos.line()); 1845 self.state.cur_pos = pos; 1846 self.state.eof = ReaderEofState::NotEof; 1847 Ok(()) 1848 } 1849 1850 /// This is like `seek`, but provides direct control over how the seeking 1851 /// operation is performed via `io::SeekFrom`. 1852 /// 1853 /// The `pos` position given *should* correspond the position indicated 1854 /// by `seek_from`, but there is no requirement. If the `pos` position 1855 /// given is incorrect, then the position information returned by this 1856 /// reader will be similarly incorrect. 1857 /// 1858 /// If the header row has not already been read, then this will attempt 1859 /// to read the header row before seeking. Therefore, it is possible that 1860 /// this returns an error associated with reading CSV data. 1861 /// 1862 /// Unlike `seek`, this will always cause an actual seek to be performed. seek_raw( &mut self, seek_from: io::SeekFrom, pos: Position, ) -> Result<()>1863 pub fn seek_raw( 1864 &mut self, 1865 seek_from: io::SeekFrom, 1866 pos: Position, 1867 ) -> Result<()> { 1868 self.byte_headers()?; 1869 self.state.seeked = true; 1870 self.rdr.seek(seek_from)?; 1871 self.core.reset(); 1872 self.core.set_line(pos.line()); 1873 self.state.cur_pos = pos; 1874 self.state.eof = ReaderEofState::NotEof; 1875 Ok(()) 1876 } 1877 } 1878 1879 impl ReaderState { 1880 #[inline(always)] add_record(&mut self, record: &ByteRecord) -> Result<()>1881 fn add_record(&mut self, record: &ByteRecord) -> Result<()> { 1882 let i = self.cur_pos.record(); 1883 self.cur_pos.set_record(i.checked_add(1).unwrap()); 1884 if !self.flexible { 1885 match self.first_field_count { 1886 None => self.first_field_count = Some(record.len() as u64), 1887 Some(expected) => { 1888 if record.len() as u64 != expected { 1889 return Err(Error::new(ErrorKind::UnequalLengths { 1890 pos: record.position().map(Clone::clone), 1891 expected_len: expected, 1892 len: record.len() as u64, 1893 })); 1894 } 1895 } 1896 } 1897 } 1898 Ok(()) 1899 } 1900 } 1901 1902 /// An owned iterator over deserialized records. 1903 /// 1904 /// The type parameter `R` refers to the underlying `io::Read` type, and `D` 1905 /// refers to the type that this iterator will deserialize a record into. 1906 pub struct DeserializeRecordsIntoIter<R, D> { 1907 rdr: Reader<R>, 1908 rec: StringRecord, 1909 headers: Option<StringRecord>, 1910 _priv: PhantomData<D>, 1911 } 1912 1913 impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> { new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D>1914 fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> { 1915 let headers = if !rdr.state.has_headers { 1916 None 1917 } else { 1918 rdr.headers().ok().map(Clone::clone) 1919 }; 1920 DeserializeRecordsIntoIter { 1921 rdr, 1922 rec: StringRecord::new(), 1923 headers, 1924 _priv: PhantomData, 1925 } 1926 } 1927 1928 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>1929 pub fn reader(&self) -> &Reader<R> { 1930 &self.rdr 1931 } 1932 1933 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>1934 pub fn reader_mut(&mut self) -> &mut Reader<R> { 1935 &mut self.rdr 1936 } 1937 1938 /// Drop this iterator and return the underlying CSV reader. into_reader(self) -> Reader<R>1939 pub fn into_reader(self) -> Reader<R> { 1940 self.rdr 1941 } 1942 } 1943 1944 impl<R: io::Read, D: DeserializeOwned> Iterator 1945 for DeserializeRecordsIntoIter<R, D> 1946 { 1947 type Item = Result<D>; 1948 next(&mut self) -> Option<Result<D>>1949 fn next(&mut self) -> Option<Result<D>> { 1950 match self.rdr.read_record(&mut self.rec) { 1951 Err(err) => Some(Err(err)), 1952 Ok(false) => None, 1953 Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())), 1954 } 1955 } 1956 } 1957 1958 /// A borrowed iterator over deserialized records. 1959 /// 1960 /// The lifetime parameter `'r` refers to the lifetime of the underlying 1961 /// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read` 1962 /// type, and `D` refers to the type that this iterator will deserialize a 1963 /// record into. 1964 pub struct DeserializeRecordsIter<'r, R: 'r, D> { 1965 rdr: &'r mut Reader<R>, 1966 rec: StringRecord, 1967 headers: Option<StringRecord>, 1968 _priv: PhantomData<D>, 1969 } 1970 1971 impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> { new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D>1972 fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> { 1973 let headers = if !rdr.state.has_headers { 1974 None 1975 } else { 1976 rdr.headers().ok().map(Clone::clone) 1977 }; 1978 DeserializeRecordsIter { 1979 rdr, 1980 rec: StringRecord::new(), 1981 headers, 1982 _priv: PhantomData, 1983 } 1984 } 1985 1986 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>1987 pub fn reader(&self) -> &Reader<R> { 1988 &self.rdr 1989 } 1990 1991 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>1992 pub fn reader_mut(&mut self) -> &mut Reader<R> { 1993 &mut self.rdr 1994 } 1995 } 1996 1997 impl<'r, R: io::Read, D: DeserializeOwned> Iterator 1998 for DeserializeRecordsIter<'r, R, D> 1999 { 2000 type Item = Result<D>; 2001 next(&mut self) -> Option<Result<D>>2002 fn next(&mut self) -> Option<Result<D>> { 2003 match self.rdr.read_record(&mut self.rec) { 2004 Err(err) => Some(Err(err)), 2005 Ok(false) => None, 2006 Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())), 2007 } 2008 } 2009 } 2010 2011 /// An owned iterator over records as strings. 2012 pub struct StringRecordsIntoIter<R> { 2013 rdr: Reader<R>, 2014 rec: StringRecord, 2015 } 2016 2017 impl<R: io::Read> StringRecordsIntoIter<R> { new(rdr: Reader<R>) -> StringRecordsIntoIter<R>2018 fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> { 2019 StringRecordsIntoIter { rdr, rec: StringRecord::new() } 2020 } 2021 2022 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2023 pub fn reader(&self) -> &Reader<R> { 2024 &self.rdr 2025 } 2026 2027 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2028 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2029 &mut self.rdr 2030 } 2031 2032 /// Drop this iterator and return the underlying CSV reader. into_reader(self) -> Reader<R>2033 pub fn into_reader(self) -> Reader<R> { 2034 self.rdr 2035 } 2036 } 2037 2038 impl<R: io::Read> Iterator for StringRecordsIntoIter<R> { 2039 type Item = Result<StringRecord>; 2040 next(&mut self) -> Option<Result<StringRecord>>2041 fn next(&mut self) -> Option<Result<StringRecord>> { 2042 match self.rdr.read_record(&mut self.rec) { 2043 Err(err) => Some(Err(err)), 2044 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2045 Ok(false) => None, 2046 } 2047 } 2048 } 2049 2050 /// A borrowed iterator over records as strings. 2051 /// 2052 /// The lifetime parameter `'r` refers to the lifetime of the underlying 2053 /// CSV `Reader`. 2054 pub struct StringRecordsIter<'r, R: 'r> { 2055 rdr: &'r mut Reader<R>, 2056 rec: StringRecord, 2057 } 2058 2059 impl<'r, R: io::Read> StringRecordsIter<'r, R> { new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R>2060 fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> { 2061 StringRecordsIter { rdr, rec: StringRecord::new() } 2062 } 2063 2064 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2065 pub fn reader(&self) -> &Reader<R> { 2066 &self.rdr 2067 } 2068 2069 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2070 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2071 &mut self.rdr 2072 } 2073 } 2074 2075 impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> { 2076 type Item = Result<StringRecord>; 2077 next(&mut self) -> Option<Result<StringRecord>>2078 fn next(&mut self) -> Option<Result<StringRecord>> { 2079 match self.rdr.read_record(&mut self.rec) { 2080 Err(err) => Some(Err(err)), 2081 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2082 Ok(false) => None, 2083 } 2084 } 2085 } 2086 2087 /// An owned iterator over records as raw bytes. 2088 pub struct ByteRecordsIntoIter<R> { 2089 rdr: Reader<R>, 2090 rec: ByteRecord, 2091 } 2092 2093 impl<R: io::Read> ByteRecordsIntoIter<R> { new(rdr: Reader<R>) -> ByteRecordsIntoIter<R>2094 fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> { 2095 ByteRecordsIntoIter { rdr, rec: ByteRecord::new() } 2096 } 2097 2098 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2099 pub fn reader(&self) -> &Reader<R> { 2100 &self.rdr 2101 } 2102 2103 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2104 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2105 &mut self.rdr 2106 } 2107 2108 /// Drop this iterator and return the underlying CSV reader. into_reader(self) -> Reader<R>2109 pub fn into_reader(self) -> Reader<R> { 2110 self.rdr 2111 } 2112 } 2113 2114 impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> { 2115 type Item = Result<ByteRecord>; 2116 next(&mut self) -> Option<Result<ByteRecord>>2117 fn next(&mut self) -> Option<Result<ByteRecord>> { 2118 match self.rdr.read_byte_record(&mut self.rec) { 2119 Err(err) => Some(Err(err)), 2120 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2121 Ok(false) => None, 2122 } 2123 } 2124 } 2125 2126 /// A borrowed iterator over records as raw bytes. 2127 /// 2128 /// The lifetime parameter `'r` refers to the lifetime of the underlying 2129 /// CSV `Reader`. 2130 pub struct ByteRecordsIter<'r, R: 'r> { 2131 rdr: &'r mut Reader<R>, 2132 rec: ByteRecord, 2133 } 2134 2135 impl<'r, R: io::Read> ByteRecordsIter<'r, R> { new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R>2136 fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> { 2137 ByteRecordsIter { rdr, rec: ByteRecord::new() } 2138 } 2139 2140 /// Return a reference to the underlying CSV reader. reader(&self) -> &Reader<R>2141 pub fn reader(&self) -> &Reader<R> { 2142 &self.rdr 2143 } 2144 2145 /// Return a mutable reference to the underlying CSV reader. reader_mut(&mut self) -> &mut Reader<R>2146 pub fn reader_mut(&mut self) -> &mut Reader<R> { 2147 &mut self.rdr 2148 } 2149 } 2150 2151 impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> { 2152 type Item = Result<ByteRecord>; 2153 next(&mut self) -> Option<Result<ByteRecord>>2154 fn next(&mut self) -> Option<Result<ByteRecord>> { 2155 match self.rdr.read_byte_record(&mut self.rec) { 2156 Err(err) => Some(Err(err)), 2157 Ok(true) => Some(Ok(self.rec.clone_truncated())), 2158 Ok(false) => None, 2159 } 2160 } 2161 } 2162 2163 #[cfg(test)] 2164 mod tests { 2165 use std::io; 2166 2167 use crate::{ 2168 byte_record::ByteRecord, error::ErrorKind, string_record::StringRecord, 2169 }; 2170 2171 use super::{Position, ReaderBuilder, Trim}; 2172 b(s: &str) -> &[u8]2173 fn b(s: &str) -> &[u8] { 2174 s.as_bytes() 2175 } s(b: &[u8]) -> &str2176 fn s(b: &[u8]) -> &str { 2177 ::std::str::from_utf8(b).unwrap() 2178 } 2179 newpos(byte: u64, line: u64, record: u64) -> Position2180 fn newpos(byte: u64, line: u64, record: u64) -> Position { 2181 let mut p = Position::new(); 2182 p.set_byte(byte).set_line(line).set_record(record); 2183 p 2184 } 2185 2186 #[test] read_byte_record()2187 fn read_byte_record() { 2188 let data = b("foo,\"b,ar\",baz\nabc,mno,xyz"); 2189 let mut rdr = 2190 ReaderBuilder::new().has_headers(false).from_reader(data); 2191 let mut rec = ByteRecord::new(); 2192 2193 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2194 assert_eq!(3, rec.len()); 2195 assert_eq!("foo", s(&rec[0])); 2196 assert_eq!("b,ar", s(&rec[1])); 2197 assert_eq!("baz", s(&rec[2])); 2198 2199 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2200 assert_eq!(3, rec.len()); 2201 assert_eq!("abc", s(&rec[0])); 2202 assert_eq!("mno", s(&rec[1])); 2203 assert_eq!("xyz", s(&rec[2])); 2204 2205 assert!(!rdr.read_byte_record(&mut rec).unwrap()); 2206 } 2207 2208 #[test] read_trimmed_records_and_headers()2209 fn read_trimmed_records_and_headers() { 2210 let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t"); 2211 let mut rdr = ReaderBuilder::new() 2212 .has_headers(true) 2213 .trim(Trim::All) 2214 .from_reader(data); 2215 let mut rec = ByteRecord::new(); 2216 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2217 assert_eq!("1", s(&rec[0])); 2218 assert_eq!("2", s(&rec[1])); 2219 assert_eq!("3", s(&rec[2])); 2220 let mut rec = StringRecord::new(); 2221 assert!(rdr.read_record(&mut rec).unwrap()); 2222 assert_eq!("1", &rec[0]); 2223 assert_eq!("", &rec[1]); 2224 assert_eq!("3", &rec[2]); 2225 { 2226 let headers = rdr.headers().unwrap(); 2227 assert_eq!(3, headers.len()); 2228 assert_eq!("foo", &headers[0]); 2229 assert_eq!("bar", &headers[1]); 2230 assert_eq!("baz", &headers[2]); 2231 } 2232 } 2233 2234 #[test] read_trimmed_header()2235 fn read_trimmed_header() { 2236 let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t"); 2237 let mut rdr = ReaderBuilder::new() 2238 .has_headers(true) 2239 .trim(Trim::Headers) 2240 .from_reader(data); 2241 let mut rec = ByteRecord::new(); 2242 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2243 assert_eq!(" 1", s(&rec[0])); 2244 assert_eq!(" 2", s(&rec[1])); 2245 assert_eq!(" 3", s(&rec[2])); 2246 { 2247 let headers = rdr.headers().unwrap(); 2248 assert_eq!(3, headers.len()); 2249 assert_eq!("foo", &headers[0]); 2250 assert_eq!("bar", &headers[1]); 2251 assert_eq!("baz", &headers[2]); 2252 } 2253 } 2254 2255 #[test] read_trimed_header_invalid_utf8()2256 fn read_trimed_header_invalid_utf8() { 2257 let data = &b"foo, b\xFFar,\tbaz\na,b,c\nd,e,f"[..]; 2258 let mut rdr = ReaderBuilder::new() 2259 .has_headers(true) 2260 .trim(Trim::Headers) 2261 .from_reader(data); 2262 let mut rec = StringRecord::new(); 2263 2264 // force the headers to be read 2265 let _ = rdr.read_record(&mut rec); 2266 // Check the byte headers are trimmed 2267 { 2268 let headers = rdr.byte_headers().unwrap(); 2269 assert_eq!(3, headers.len()); 2270 assert_eq!(b"foo", &headers[0]); 2271 assert_eq!(b"b\xFFar", &headers[1]); 2272 assert_eq!(b"baz", &headers[2]); 2273 } 2274 match *rdr.headers().unwrap_err().kind() { 2275 ErrorKind::Utf8 { pos: Some(ref pos), ref err } => { 2276 assert_eq!(pos, &newpos(0, 1, 0)); 2277 assert_eq!(err.field(), 1); 2278 assert_eq!(err.valid_up_to(), 3); 2279 } 2280 ref err => panic!("match failed, got {:?}", err), 2281 } 2282 } 2283 2284 #[test] read_trimmed_records()2285 fn read_trimmed_records() { 2286 let data = b("foo, bar,\tbaz\n 1, 2, 3\n1\t,\t,3\t\t"); 2287 let mut rdr = ReaderBuilder::new() 2288 .has_headers(true) 2289 .trim(Trim::Fields) 2290 .from_reader(data); 2291 let mut rec = ByteRecord::new(); 2292 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2293 assert_eq!("1", s(&rec[0])); 2294 assert_eq!("2", s(&rec[1])); 2295 assert_eq!("3", s(&rec[2])); 2296 { 2297 let headers = rdr.headers().unwrap(); 2298 assert_eq!(3, headers.len()); 2299 assert_eq!("foo", &headers[0]); 2300 assert_eq!(" bar", &headers[1]); 2301 assert_eq!("\tbaz", &headers[2]); 2302 } 2303 } 2304 2305 #[test] read_record_unequal_fails()2306 fn read_record_unequal_fails() { 2307 let data = b("foo\nbar,baz"); 2308 let mut rdr = 2309 ReaderBuilder::new().has_headers(false).from_reader(data); 2310 let mut rec = ByteRecord::new(); 2311 2312 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2313 assert_eq!(1, rec.len()); 2314 assert_eq!("foo", s(&rec[0])); 2315 2316 match rdr.read_byte_record(&mut rec) { 2317 Err(err) => match *err.kind() { 2318 ErrorKind::UnequalLengths { 2319 expected_len: 1, 2320 ref pos, 2321 len: 2, 2322 } => { 2323 assert_eq!(pos, &Some(newpos(4, 2, 1))); 2324 } 2325 ref wrong => panic!("match failed, got {:?}", wrong), 2326 }, 2327 wrong => panic!("match failed, got {:?}", wrong), 2328 } 2329 } 2330 2331 #[test] read_record_unequal_ok()2332 fn read_record_unequal_ok() { 2333 let data = b("foo\nbar,baz"); 2334 let mut rdr = ReaderBuilder::new() 2335 .has_headers(false) 2336 .flexible(true) 2337 .from_reader(data); 2338 let mut rec = ByteRecord::new(); 2339 2340 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2341 assert_eq!(1, rec.len()); 2342 assert_eq!("foo", s(&rec[0])); 2343 2344 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2345 assert_eq!(2, rec.len()); 2346 assert_eq!("bar", s(&rec[0])); 2347 assert_eq!("baz", s(&rec[1])); 2348 2349 assert!(!rdr.read_byte_record(&mut rec).unwrap()); 2350 } 2351 2352 // This tests that even if we get a CSV error, we can continue reading 2353 // if we want. 2354 #[test] read_record_unequal_continue()2355 fn read_record_unequal_continue() { 2356 let data = b("foo\nbar,baz\nquux"); 2357 let mut rdr = 2358 ReaderBuilder::new().has_headers(false).from_reader(data); 2359 let mut rec = ByteRecord::new(); 2360 2361 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2362 assert_eq!(1, rec.len()); 2363 assert_eq!("foo", s(&rec[0])); 2364 2365 match rdr.read_byte_record(&mut rec) { 2366 Err(err) => match err.kind() { 2367 &ErrorKind::UnequalLengths { 2368 expected_len: 1, 2369 ref pos, 2370 len: 2, 2371 } => { 2372 assert_eq!(pos, &Some(newpos(4, 2, 1))); 2373 } 2374 wrong => panic!("match failed, got {:?}", wrong), 2375 }, 2376 wrong => panic!("match failed, got {:?}", wrong), 2377 } 2378 2379 assert!(rdr.read_byte_record(&mut rec).unwrap()); 2380 assert_eq!(1, rec.len()); 2381 assert_eq!("quux", s(&rec[0])); 2382 2383 assert!(!rdr.read_byte_record(&mut rec).unwrap()); 2384 } 2385 2386 #[test] read_record_headers()2387 fn read_record_headers() { 2388 let data = b("foo,bar,baz\na,b,c\nd,e,f"); 2389 let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data); 2390 let mut rec = StringRecord::new(); 2391 2392 assert!(rdr.read_record(&mut rec).unwrap()); 2393 assert_eq!(3, rec.len()); 2394 assert_eq!("a", &rec[0]); 2395 2396 assert!(rdr.read_record(&mut rec).unwrap()); 2397 assert_eq!(3, rec.len()); 2398 assert_eq!("d", &rec[0]); 2399 2400 assert!(!rdr.read_record(&mut rec).unwrap()); 2401 2402 { 2403 let headers = rdr.byte_headers().unwrap(); 2404 assert_eq!(3, headers.len()); 2405 assert_eq!(b"foo", &headers[0]); 2406 assert_eq!(b"bar", &headers[1]); 2407 assert_eq!(b"baz", &headers[2]); 2408 } 2409 { 2410 let headers = rdr.headers().unwrap(); 2411 assert_eq!(3, headers.len()); 2412 assert_eq!("foo", &headers[0]); 2413 assert_eq!("bar", &headers[1]); 2414 assert_eq!("baz", &headers[2]); 2415 } 2416 } 2417 2418 #[test] read_record_headers_invalid_utf8()2419 fn read_record_headers_invalid_utf8() { 2420 let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..]; 2421 let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data); 2422 let mut rec = StringRecord::new(); 2423 2424 assert!(rdr.read_record(&mut rec).unwrap()); 2425 assert_eq!(3, rec.len()); 2426 assert_eq!("a", &rec[0]); 2427 2428 assert!(rdr.read_record(&mut rec).unwrap()); 2429 assert_eq!(3, rec.len()); 2430 assert_eq!("d", &rec[0]); 2431 2432 assert!(!rdr.read_record(&mut rec).unwrap()); 2433 2434 // Check that we can read the headers as raw bytes, but that 2435 // if we read them as strings, we get an appropriate UTF-8 error. 2436 { 2437 let headers = rdr.byte_headers().unwrap(); 2438 assert_eq!(3, headers.len()); 2439 assert_eq!(b"foo", &headers[0]); 2440 assert_eq!(b"b\xFFar", &headers[1]); 2441 assert_eq!(b"baz", &headers[2]); 2442 } 2443 match *rdr.headers().unwrap_err().kind() { 2444 ErrorKind::Utf8 { pos: Some(ref pos), ref err } => { 2445 assert_eq!(pos, &newpos(0, 1, 0)); 2446 assert_eq!(err.field(), 1); 2447 assert_eq!(err.valid_up_to(), 1); 2448 } 2449 ref err => panic!("match failed, got {:?}", err), 2450 } 2451 } 2452 2453 #[test] read_record_no_headers_before()2454 fn read_record_no_headers_before() { 2455 let data = b("foo,bar,baz\na,b,c\nd,e,f"); 2456 let mut rdr = 2457 ReaderBuilder::new().has_headers(false).from_reader(data); 2458 let mut rec = StringRecord::new(); 2459 2460 { 2461 let headers = rdr.headers().unwrap(); 2462 assert_eq!(3, headers.len()); 2463 assert_eq!("foo", &headers[0]); 2464 assert_eq!("bar", &headers[1]); 2465 assert_eq!("baz", &headers[2]); 2466 } 2467 2468 assert!(rdr.read_record(&mut rec).unwrap()); 2469 assert_eq!(3, rec.len()); 2470 assert_eq!("foo", &rec[0]); 2471 2472 assert!(rdr.read_record(&mut rec).unwrap()); 2473 assert_eq!(3, rec.len()); 2474 assert_eq!("a", &rec[0]); 2475 2476 assert!(rdr.read_record(&mut rec).unwrap()); 2477 assert_eq!(3, rec.len()); 2478 assert_eq!("d", &rec[0]); 2479 2480 assert!(!rdr.read_record(&mut rec).unwrap()); 2481 } 2482 2483 #[test] read_record_no_headers_after()2484 fn read_record_no_headers_after() { 2485 let data = b("foo,bar,baz\na,b,c\nd,e,f"); 2486 let mut rdr = 2487 ReaderBuilder::new().has_headers(false).from_reader(data); 2488 let mut rec = StringRecord::new(); 2489 2490 assert!(rdr.read_record(&mut rec).unwrap()); 2491 assert_eq!(3, rec.len()); 2492 assert_eq!("foo", &rec[0]); 2493 2494 assert!(rdr.read_record(&mut rec).unwrap()); 2495 assert_eq!(3, rec.len()); 2496 assert_eq!("a", &rec[0]); 2497 2498 assert!(rdr.read_record(&mut rec).unwrap()); 2499 assert_eq!(3, rec.len()); 2500 assert_eq!("d", &rec[0]); 2501 2502 assert!(!rdr.read_record(&mut rec).unwrap()); 2503 2504 let headers = rdr.headers().unwrap(); 2505 assert_eq!(3, headers.len()); 2506 assert_eq!("foo", &headers[0]); 2507 assert_eq!("bar", &headers[1]); 2508 assert_eq!("baz", &headers[2]); 2509 } 2510 2511 #[test] seek()2512 fn seek() { 2513 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2514 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2515 rdr.seek(newpos(18, 3, 2)).unwrap(); 2516 2517 let mut rec = StringRecord::new(); 2518 2519 assert_eq!(18, rdr.position().byte()); 2520 assert!(rdr.read_record(&mut rec).unwrap()); 2521 assert_eq!(3, rec.len()); 2522 assert_eq!("d", &rec[0]); 2523 2524 assert_eq!(24, rdr.position().byte()); 2525 assert_eq!(4, rdr.position().line()); 2526 assert_eq!(3, rdr.position().record()); 2527 assert!(rdr.read_record(&mut rec).unwrap()); 2528 assert_eq!(3, rec.len()); 2529 assert_eq!("g", &rec[0]); 2530 2531 assert!(!rdr.read_record(&mut rec).unwrap()); 2532 } 2533 2534 // Test that we can read headers after seeking even if the headers weren't 2535 // explicit read before seeking. 2536 #[test] seek_headers_after()2537 fn seek_headers_after() { 2538 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2539 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2540 rdr.seek(newpos(18, 3, 2)).unwrap(); 2541 assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]); 2542 } 2543 2544 // Test that we can read headers after seeking if the headers were read 2545 // before seeking. 2546 #[test] seek_headers_before_after()2547 fn seek_headers_before_after() { 2548 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2549 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2550 let headers = rdr.headers().unwrap().clone(); 2551 rdr.seek(newpos(18, 3, 2)).unwrap(); 2552 assert_eq!(&headers, rdr.headers().unwrap()); 2553 } 2554 2555 // Test that even if we didn't read headers before seeking, if we seek to 2556 // the current byte offset, then no seeking is done and therefore we can 2557 // still read headers after seeking. 2558 #[test] seek_headers_no_actual_seek()2559 fn seek_headers_no_actual_seek() { 2560 let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i"); 2561 let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data)); 2562 rdr.seek(Position::new()).unwrap(); 2563 assert_eq!("foo", &rdr.headers().unwrap()[0]); 2564 } 2565 2566 // Test that position info is reported correctly in absence of headers. 2567 #[test] positions_no_headers()2568 fn positions_no_headers() { 2569 let mut rdr = ReaderBuilder::new() 2570 .has_headers(false) 2571 .from_reader("a,b,c\nx,y,z".as_bytes()) 2572 .into_records(); 2573 2574 let pos = rdr.next().unwrap().unwrap().position().unwrap().clone(); 2575 assert_eq!(pos.byte(), 0); 2576 assert_eq!(pos.line(), 1); 2577 assert_eq!(pos.record(), 0); 2578 2579 let pos = rdr.next().unwrap().unwrap().position().unwrap().clone(); 2580 assert_eq!(pos.byte(), 6); 2581 assert_eq!(pos.line(), 2); 2582 assert_eq!(pos.record(), 1); 2583 } 2584 2585 // Test that position info is reported correctly with headers. 2586 #[test] positions_headers()2587 fn positions_headers() { 2588 let mut rdr = ReaderBuilder::new() 2589 .has_headers(true) 2590 .from_reader("a,b,c\nx,y,z".as_bytes()) 2591 .into_records(); 2592 2593 let pos = rdr.next().unwrap().unwrap().position().unwrap().clone(); 2594 assert_eq!(pos.byte(), 6); 2595 assert_eq!(pos.line(), 2); 2596 assert_eq!(pos.record(), 1); 2597 } 2598 2599 // Test that reading headers on empty data yields an empty record. 2600 #[test] headers_on_empty_data()2601 fn headers_on_empty_data() { 2602 let mut rdr = ReaderBuilder::new().from_reader("".as_bytes()); 2603 let r = rdr.byte_headers().unwrap(); 2604 assert_eq!(r.len(), 0); 2605 } 2606 2607 // Test that reading the first record on empty data works. 2608 #[test] no_headers_on_empty_data()2609 fn no_headers_on_empty_data() { 2610 let mut rdr = 2611 ReaderBuilder::new().has_headers(false).from_reader("".as_bytes()); 2612 assert_eq!(rdr.records().count(), 0); 2613 } 2614 2615 // Test that reading the first record on empty data works, even if 2616 // we've tried to read headers before hand. 2617 #[test] no_headers_on_empty_data_after_headers()2618 fn no_headers_on_empty_data_after_headers() { 2619 let mut rdr = 2620 ReaderBuilder::new().has_headers(false).from_reader("".as_bytes()); 2621 assert_eq!(rdr.headers().unwrap().len(), 0); 2622 assert_eq!(rdr.records().count(), 0); 2623 } 2624 } 2625