1 use std::{
2     cmp, fmt,
3     iter::FromIterator,
4     ops::{self, Range},
5     result,
6 };
7 
8 use serde::de::Deserialize;
9 
10 use crate::{
11     deserializer::deserialize_byte_record,
12     error::{new_utf8_error, Result, Utf8Error},
13     string_record::StringRecord,
14 };
15 
16 /// A single CSV record stored as raw bytes.
17 ///
18 /// A byte record permits reading or writing CSV rows that are not UTF-8.
19 /// In general, you should prefer using a
20 /// [`StringRecord`](struct.StringRecord.html)
21 /// since it is more ergonomic, but a `ByteRecord` is provided in case you need
22 /// it.
23 ///
24 /// If you are using the Serde (de)serialization APIs, then you probably never
25 /// need to interact with a `ByteRecord` or a `StringRecord`. However, there
26 /// are some circumstances in which you might need to use a raw record type
27 /// while still using Serde. For example, if you need to deserialize possibly
28 /// invalid UTF-8 fields, then you'll need to first read your record into a
29 /// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
30 /// reason for using the raw record deserialization APIs is if you're using
31 /// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
32 ///
33 /// Two `ByteRecord`s are compared on the basis of their field data. Any
34 /// position information associated with the records is ignored.
35 #[derive(Clone, Eq)]
36 pub struct ByteRecord(Box<ByteRecordInner>);
37 
38 impl PartialEq for ByteRecord {
eq(&self, other: &ByteRecord) -> bool39     fn eq(&self, other: &ByteRecord) -> bool {
40         if self.len() != other.len() {
41             return false;
42         }
43         self.iter().zip(other.iter()).all(|e| e.0 == e.1)
44     }
45 }
46 
47 impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord {
eq(&self, other: &Vec<T>) -> bool48     fn eq(&self, other: &Vec<T>) -> bool {
49         self.iter_eq(other)
50     }
51 }
52 
53 impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord {
eq(&self, other: &Vec<T>) -> bool54     fn eq(&self, other: &Vec<T>) -> bool {
55         self.iter_eq(other)
56     }
57 }
58 
59 impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord {
eq(&self, other: &[T]) -> bool60     fn eq(&self, other: &[T]) -> bool {
61         self.iter_eq(other)
62     }
63 }
64 
65 impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
eq(&self, other: &[T]) -> bool66     fn eq(&self, other: &[T]) -> bool {
67         self.iter_eq(other)
68     }
69 }
70 
71 impl fmt::Debug for ByteRecord {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result72     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
73         write!(f, "ByteRecord(")?;
74         f.debug_list()
75             .entries(self.iter().map(crate::debug::Bytes))
76             .finish()?;
77         write!(f, ")")?;
78         Ok(())
79     }
80 }
81 
82 /// The inner portion of a byte record.
83 ///
84 /// We use this memory layout so that moving a `ByteRecord` only requires
85 /// moving a single pointer. The optimization is dubious at best, but does
86 /// seem to result in slightly better numbers in microbenchmarks. Methinks this
87 /// may heavily depend on the underlying allocator.
88 #[derive(Clone, Debug, Eq, PartialEq)]
89 struct ByteRecordInner {
90     /// The position of this byte record.
91     pos: Option<Position>,
92     /// All fields in this record, stored contiguously.
93     fields: Vec<u8>,
94     /// The number of and location of each field in this record.
95     bounds: Bounds,
96 }
97 
98 impl Default for ByteRecord {
99     #[inline]
default() -> ByteRecord100     fn default() -> ByteRecord {
101         ByteRecord::new()
102     }
103 }
104 
105 impl ByteRecord {
106     /// Create a new empty `ByteRecord`.
107     ///
108     /// Note that you may find the `ByteRecord::from` constructor more
109     /// convenient, which is provided by an impl on the `From` trait.
110     ///
111     /// # Example: create an empty record
112     ///
113     /// ```
114     /// use csv::ByteRecord;
115     ///
116     /// let record = ByteRecord::new();
117     /// assert_eq!(record.len(), 0);
118     /// ```
119     ///
120     /// # Example: initialize a record from a `Vec`
121     ///
122     /// ```
123     /// use csv::ByteRecord;
124     ///
125     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
126     /// assert_eq!(record.len(), 3);
127     /// ```
128     #[inline]
new() -> ByteRecord129     pub fn new() -> ByteRecord {
130         ByteRecord::with_capacity(0, 0)
131     }
132 
133     /// Create a new empty `ByteRecord` with the given capacity settings.
134     ///
135     /// `buffer` refers to the capacity of the buffer used to store the
136     /// actual row contents. `fields` refers to the number of fields one
137     /// might expect to store.
138     #[inline]
with_capacity(buffer: usize, fields: usize) -> ByteRecord139     pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord {
140         ByteRecord(Box::new(ByteRecordInner {
141             pos: None,
142             fields: vec![0; buffer],
143             bounds: Bounds::with_capacity(fields),
144         }))
145     }
146 
147     /// Deserialize this record.
148     ///
149     /// The `D` type parameter refers to the type that this record should be
150     /// deserialized into. The `'de` lifetime refers to the lifetime of the
151     /// `ByteRecord`. The `'de` lifetime permits deserializing into structs
152     /// that borrow field data from this record.
153     ///
154     /// An optional `headers` parameter permits deserializing into a struct
155     /// based on its field names (corresponding to header values) rather than
156     /// the order in which the fields are defined.
157     ///
158     /// # Example: without headers
159     ///
160     /// This shows how to deserialize a single row into a struct based on the
161     /// order in which fields occur. This example also shows how to borrow
162     /// fields from the `ByteRecord`, which results in zero allocation
163     /// deserialization.
164     ///
165     /// ```
166     /// use std::error::Error;
167     ///
168     /// use csv::ByteRecord;
169     /// use serde::Deserialize;
170     ///
171     /// #[derive(Deserialize)]
172     /// struct Row<'a> {
173     ///     city: &'a str,
174     ///     country: &'a str,
175     ///     population: u64,
176     /// }
177     ///
178     /// # fn main() { example().unwrap() }
179     /// fn example() -> Result<(), Box<dyn Error>> {
180     ///     let record = ByteRecord::from(vec![
181     ///         "Boston", "United States", "4628910",
182     ///     ]);
183     ///
184     ///     let row: Row = record.deserialize(None)?;
185     ///     assert_eq!(row.city, "Boston");
186     ///     assert_eq!(row.country, "United States");
187     ///     assert_eq!(row.population, 4628910);
188     ///     Ok(())
189     /// }
190     /// ```
191     ///
192     /// # Example: with headers
193     ///
194     /// This example is like the previous one, but shows how to deserialize
195     /// into a struct based on the struct's field names. For this to work,
196     /// you must provide a header row.
197     ///
198     /// This example also shows that you can deserialize into owned data
199     /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
200     ///
201     /// ```
202     /// use std::error::Error;
203     ///
204     /// use csv::ByteRecord;
205     /// use serde::Deserialize;
206     ///
207     /// #[derive(Deserialize)]
208     /// struct Row {
209     ///     city: String,
210     ///     country: String,
211     ///     population: u64,
212     /// }
213     ///
214     /// # fn main() { example().unwrap() }
215     /// fn example() -> Result<(), Box<dyn Error>> {
216     ///     // Notice that the fields are not in the same order
217     ///     // as the fields in the struct!
218     ///     let header = ByteRecord::from(vec![
219     ///         "country", "city", "population",
220     ///     ]);
221     ///     let record = ByteRecord::from(vec![
222     ///         "United States", "Boston", "4628910",
223     ///     ]);
224     ///
225     ///     let row: Row = record.deserialize(Some(&header))?;
226     ///     assert_eq!(row.city, "Boston");
227     ///     assert_eq!(row.country, "United States");
228     ///     assert_eq!(row.population, 4628910);
229     ///     Ok(())
230     /// }
231     /// ```
deserialize<'de, D: Deserialize<'de>>( &'de self, headers: Option<&'de ByteRecord>, ) -> Result<D>232     pub fn deserialize<'de, D: Deserialize<'de>>(
233         &'de self,
234         headers: Option<&'de ByteRecord>,
235     ) -> Result<D> {
236         deserialize_byte_record(self, headers)
237     }
238 
239     /// Returns an iterator over all fields in this record.
240     ///
241     /// # Example
242     ///
243     /// This example shows how to iterate over each field in a `ByteRecord`.
244     ///
245     /// ```
246     /// use csv::ByteRecord;
247     ///
248     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
249     /// for field in record.iter() {
250     ///     assert!(field == b"a" || field == b"b" || field == b"c");
251     /// }
252     /// ```
253     #[inline]
iter(&self) -> ByteRecordIter254     pub fn iter(&self) -> ByteRecordIter {
255         self.into_iter()
256     }
257 
258     /// Return the field at index `i`.
259     ///
260     /// If no field at index `i` exists, then this returns `None`.
261     ///
262     /// # Example
263     ///
264     /// ```
265     /// use csv::ByteRecord;
266     ///
267     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
268     /// assert_eq!(record.get(1), Some(&b"b"[..]));
269     /// assert_eq!(record.get(3), None);
270     /// ```
271     #[inline]
get(&self, i: usize) -> Option<&[u8]>272     pub fn get(&self, i: usize) -> Option<&[u8]> {
273         self.0.bounds.get(i).map(|range| &self.0.fields[range])
274     }
275 
276     /// Returns true if and only if this record is empty.
277     ///
278     /// # Example
279     ///
280     /// ```
281     /// use csv::ByteRecord;
282     ///
283     /// assert!(ByteRecord::new().is_empty());
284     /// ```
285     #[inline]
is_empty(&self) -> bool286     pub fn is_empty(&self) -> bool {
287         self.len() == 0
288     }
289 
290     /// Returns the number of fields in this record.
291     ///
292     /// # Example
293     ///
294     /// ```
295     /// use csv::ByteRecord;
296     ///
297     /// let record = ByteRecord::from(vec!["a", "b", "c"]);
298     /// assert_eq!(record.len(), 3);
299     /// ```
300     #[inline]
len(&self) -> usize301     pub fn len(&self) -> usize {
302         self.0.bounds.len()
303     }
304 
305     /// Truncate this record to `n` fields.
306     ///
307     /// If `n` is greater than the number of fields in this record, then this
308     /// has no effect.
309     ///
310     /// # Example
311     ///
312     /// ```
313     /// use csv::ByteRecord;
314     ///
315     /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
316     /// assert_eq!(record.len(), 3);
317     /// record.truncate(1);
318     /// assert_eq!(record.len(), 1);
319     /// assert_eq!(record, vec!["a"]);
320     /// ```
321     #[inline]
truncate(&mut self, n: usize)322     pub fn truncate(&mut self, n: usize) {
323         if n <= self.len() {
324             self.0.bounds.len = n;
325         }
326     }
327 
328     /// Clear this record so that it has zero fields.
329     ///
330     /// This is equivalent to calling `truncate(0)`.
331     ///
332     /// Note that it is not necessary to clear the record to reuse it with
333     /// the CSV reader.
334     ///
335     /// # Example
336     ///
337     /// ```
338     /// use csv::ByteRecord;
339     ///
340     /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
341     /// assert_eq!(record.len(), 3);
342     /// record.clear();
343     /// assert_eq!(record.len(), 0);
344     /// ```
345     #[inline]
clear(&mut self)346     pub fn clear(&mut self) {
347         self.truncate(0);
348     }
349 
350     /// Trim the fields of this record so that leading and trailing whitespace
351     /// is removed.
352     ///
353     /// This method uses the ASCII definition of whitespace. That is, only
354     /// bytes in the class `[\t\n\v\f\r ]` are trimmed.
355     ///
356     /// # Example
357     ///
358     /// ```
359     /// use csv::ByteRecord;
360     ///
361     /// let mut record = ByteRecord::from(vec![
362     ///     "  ", "\tfoo", "bar  ", "b a z",
363     /// ]);
364     /// record.trim();
365     /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
366     /// ```
trim(&mut self)367     pub fn trim(&mut self) {
368         let length = self.len();
369         if length == 0 {
370             return;
371         }
372         // TODO: We could likely do this in place, but for now, we allocate.
373         let mut trimmed =
374             ByteRecord::with_capacity(self.as_slice().len(), self.len());
375         trimmed.set_position(self.position().cloned());
376         for field in self.iter() {
377             trimmed.push_field(trim_ascii(field));
378         }
379         *self = trimmed;
380     }
381 
382     /// Add a new field to this record.
383     ///
384     /// # Example
385     ///
386     /// ```
387     /// use csv::ByteRecord;
388     ///
389     /// let mut record = ByteRecord::new();
390     /// record.push_field(b"foo");
391     /// assert_eq!(&record[0], b"foo");
392     /// ```
393     #[inline]
push_field(&mut self, field: &[u8])394     pub fn push_field(&mut self, field: &[u8]) {
395         let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len());
396         while e > self.0.fields.len() {
397             self.expand_fields();
398         }
399         self.0.fields[s..e].copy_from_slice(field);
400         self.0.bounds.add(e);
401     }
402 
403     /// Return the position of this record, if available.
404     ///
405     /// # Example
406     ///
407     /// ```
408     /// use std::error::Error;
409     ///
410     /// use csv::{ByteRecord, ReaderBuilder};
411     ///
412     /// # fn main() { example().unwrap(); }
413     /// fn example() -> Result<(), Box<dyn Error>> {
414     ///     let mut record = ByteRecord::new();
415     ///     let mut rdr = ReaderBuilder::new()
416     ///         .has_headers(false)
417     ///         .from_reader("a,b,c\nx,y,z".as_bytes());
418     ///
419     ///     assert!(rdr.read_byte_record(&mut record)?);
420     ///     {
421     ///         let pos = record.position().expect("a record position");
422     ///         assert_eq!(pos.byte(), 0);
423     ///         assert_eq!(pos.line(), 1);
424     ///         assert_eq!(pos.record(), 0);
425     ///     }
426     ///
427     ///     assert!(rdr.read_byte_record(&mut record)?);
428     ///     {
429     ///         let pos = record.position().expect("a record position");
430     ///         assert_eq!(pos.byte(), 6);
431     ///         assert_eq!(pos.line(), 2);
432     ///         assert_eq!(pos.record(), 1);
433     ///     }
434     ///
435     ///     // Finish the CSV reader for good measure.
436     ///     assert!(!rdr.read_byte_record(&mut record)?);
437     ///     Ok(())
438     /// }
439     /// ```
440     #[inline]
position(&self) -> Option<&Position>441     pub fn position(&self) -> Option<&Position> {
442         self.0.pos.as_ref()
443     }
444 
445     /// Set the position of this record.
446     ///
447     /// # Example
448     ///
449     /// ```
450     /// use csv::{ByteRecord, Position};
451     ///
452     /// let mut record = ByteRecord::from(vec!["a", "b", "c"]);
453     /// let mut pos = Position::new();
454     /// pos.set_byte(100);
455     /// pos.set_line(4);
456     /// pos.set_record(2);
457     ///
458     /// record.set_position(Some(pos.clone()));
459     /// assert_eq!(record.position(), Some(&pos));
460     /// ```
461     #[inline]
set_position(&mut self, pos: Option<Position>)462     pub fn set_position(&mut self, pos: Option<Position>) {
463         self.0.pos = pos;
464     }
465 
466     /// Return the start and end position of a field in this record.
467     ///
468     /// If no such field exists at the given index, then return `None`.
469     ///
470     /// The range returned can be used with the slice returned by `as_slice`.
471     ///
472     /// # Example
473     ///
474     /// ```
475     /// use csv::ByteRecord;
476     ///
477     /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
478     /// let range = record.range(1).expect("a record range");
479     /// assert_eq!(&record.as_slice()[range], &b"quux"[..]);
480     /// ```
481     #[inline]
range(&self, i: usize) -> Option<Range<usize>>482     pub fn range(&self, i: usize) -> Option<Range<usize>> {
483         self.0.bounds.get(i)
484     }
485 
486     /// Return the entire row as a single byte slice. The slice returned stores
487     /// all fields contiguously. The boundaries of each field can be determined
488     /// via the `range` method.
489     ///
490     /// # Example
491     ///
492     /// ```
493     /// use csv::ByteRecord;
494     ///
495     /// let record = ByteRecord::from(vec!["foo", "quux", "z"]);
496     /// assert_eq!(record.as_slice(), &b"fooquuxz"[..]);
497     /// ```
498     #[inline]
as_slice(&self) -> &[u8]499     pub fn as_slice(&self) -> &[u8] {
500         &self.0.fields[..self.0.bounds.end()]
501     }
502 
503     /// Clone this record, but only copy `fields` up to the end of bounds. This
504     /// is useful when one wants to copy a record, but not necessarily any
505     /// excess capacity in that record.
506     #[inline]
clone_truncated(&self) -> ByteRecord507     pub(crate) fn clone_truncated(&self) -> ByteRecord {
508         let mut br = ByteRecord::new();
509         br.0.pos = self.0.pos.clone();
510         br.0.bounds = self.0.bounds.clone();
511         br.0.fields = self.0.fields[..self.0.bounds.end()].to_vec();
512         br
513     }
514 
515     /// Retrieve the underlying parts of a byte record.
516     #[inline]
as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>)517     pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) {
518         let inner = &mut *self.0;
519         (&mut inner.fields, &mut inner.bounds.ends)
520     }
521 
522     /// Set the number of fields in the given record record.
523     #[inline]
set_len(&mut self, len: usize)524     pub(crate) fn set_len(&mut self, len: usize) {
525         self.0.bounds.len = len;
526     }
527 
528     /// Expand the capacity for storing fields.
529     #[inline]
expand_fields(&mut self)530     pub(crate) fn expand_fields(&mut self) {
531         let new_len = self.0.fields.len().checked_mul(2).unwrap();
532         self.0.fields.resize(cmp::max(4, new_len), 0);
533     }
534 
535     /// Expand the capacity for storing field ending positions.
536     #[inline]
expand_ends(&mut self)537     pub(crate) fn expand_ends(&mut self) {
538         self.0.bounds.expand();
539     }
540 
541     /// Validate the given record as UTF-8.
542     ///
543     /// If it's not UTF-8, return an error.
544     #[inline]
validate(&self) -> result::Result<(), Utf8Error>545     pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> {
546         // If the entire buffer is ASCII, then we have nothing to fear.
547         if self.0.fields[..self.0.bounds.end()].is_ascii() {
548             return Ok(());
549         }
550         // Otherwise, we must check each field individually to ensure that
551         // it's valid UTF-8.
552         for (i, field) in self.iter().enumerate() {
553             if let Err(err) = std::str::from_utf8(field) {
554                 return Err(new_utf8_error(i, err.valid_up_to()));
555             }
556         }
557         Ok(())
558     }
559 
560     /// Compare the given byte record with the iterator of fields for equality.
iter_eq<I, T>(&self, other: I) -> bool where I: IntoIterator<Item = T>, T: AsRef<[u8]>,561     pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool
562     where
563         I: IntoIterator<Item = T>,
564         T: AsRef<[u8]>,
565     {
566         let mut it_record = self.iter();
567         let mut it_other = other.into_iter();
568         loop {
569             match (it_record.next(), it_other.next()) {
570                 (None, None) => return true,
571                 (None, Some(_)) | (Some(_), None) => return false,
572                 (Some(x), Some(y)) => {
573                     if x != y.as_ref() {
574                         return false;
575                     }
576                 }
577             }
578         }
579     }
580 }
581 
582 /// A position in CSV data.
583 ///
584 /// A position is used to report errors in CSV data. All positions include the
585 /// byte offset, line number and record index at which the error occurred.
586 ///
587 /// Byte offsets and record indices start at `0`. Line numbers start at `1`.
588 ///
589 /// A CSV reader will automatically assign the position of each record.
590 #[derive(Clone, Debug, Eq, PartialEq)]
591 pub struct Position {
592     byte: u64,
593     line: u64,
594     record: u64,
595 }
596 
597 impl Position {
598     /// Returns a new position initialized to the start value.
599     #[inline]
new() -> Position600     pub fn new() -> Position {
601         Position { byte: 0, line: 1, record: 0 }
602     }
603 
604     /// The byte offset, starting at `0`, of this position.
605     #[inline]
byte(&self) -> u64606     pub fn byte(&self) -> u64 {
607         self.byte
608     }
609     /// The line number, starting at `1`, of this position.
610     #[inline]
line(&self) -> u64611     pub fn line(&self) -> u64 {
612         self.line
613     }
614     /// The record index, starting with the first record at `0`.
615     #[inline]
record(&self) -> u64616     pub fn record(&self) -> u64 {
617         self.record
618     }
619 
620     /// Set the byte offset of this position.
621     #[inline]
set_byte(&mut self, byte: u64) -> &mut Position622     pub fn set_byte(&mut self, byte: u64) -> &mut Position {
623         self.byte = byte;
624         self
625     }
626 
627     /// Set the line number of this position.
628     ///
629     /// If the line number is less than `1`, then this method panics.
630     #[inline]
set_line(&mut self, line: u64) -> &mut Position631     pub fn set_line(&mut self, line: u64) -> &mut Position {
632         assert!(line > 0);
633         self.line = line;
634         self
635     }
636 
637     /// Set the record index of this position.
638     #[inline]
set_record(&mut self, record: u64) -> &mut Position639     pub fn set_record(&mut self, record: u64) -> &mut Position {
640         self.record = record;
641         self
642     }
643 }
644 
645 /// The bounds of fields in a single record.
646 #[derive(Clone, Debug, Eq, PartialEq)]
647 struct Bounds {
648     /// The ending index of each field.
649     ends: Vec<usize>,
650     /// The number of fields in this record.
651     ///
652     /// Technically, we could drop this field and maintain an invariant that
653     /// `ends.len()` is always the number of fields, but doing that efficiently
654     /// requires attention to safety. We play it safe at essentially no cost.
655     len: usize,
656 }
657 
658 impl Default for Bounds {
659     #[inline]
default() -> Bounds660     fn default() -> Bounds {
661         Bounds::with_capacity(0)
662     }
663 }
664 
665 impl Bounds {
666     /// Create a new set of bounds with the given capacity for storing the
667     /// ends of fields.
668     #[inline]
with_capacity(capacity: usize) -> Bounds669     fn with_capacity(capacity: usize) -> Bounds {
670         Bounds { ends: vec![0; capacity], len: 0 }
671     }
672 
673     /// Returns the bounds of field `i`.
674     #[inline]
get(&self, i: usize) -> Option<Range<usize>>675     fn get(&self, i: usize) -> Option<Range<usize>> {
676         if i >= self.len {
677             return None;
678         }
679         let end = match self.ends.get(i) {
680             None => return None,
681             Some(&end) => end,
682         };
683         let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) {
684             None => 0,
685             Some(&start) => start,
686         };
687         Some(ops::Range { start, end })
688     }
689 
690     /// Returns a slice of ending positions of all fields.
691     #[inline]
ends(&self) -> &[usize]692     fn ends(&self) -> &[usize] {
693         &self.ends[..self.len]
694     }
695 
696     /// Return the last position of the last field.
697     ///
698     /// If there are no fields, this returns `0`.
699     #[inline]
end(&self) -> usize700     fn end(&self) -> usize {
701         self.ends().last().map(|&i| i).unwrap_or(0)
702     }
703 
704     /// Returns the number of fields in these bounds.
705     #[inline]
len(&self) -> usize706     fn len(&self) -> usize {
707         self.len
708     }
709 
710     /// Expand the capacity for storing field ending positions.
711     #[inline]
expand(&mut self)712     fn expand(&mut self) {
713         let new_len = self.ends.len().checked_mul(2).unwrap();
714         self.ends.resize(cmp::max(4, new_len), 0);
715     }
716 
717     /// Add a new field with the given ending position.
718     #[inline]
add(&mut self, pos: usize)719     fn add(&mut self, pos: usize) {
720         if self.len >= self.ends.len() {
721             self.expand();
722         }
723         self.ends[self.len] = pos;
724         self.len += 1;
725     }
726 }
727 
728 impl ops::Index<usize> for ByteRecord {
729     type Output = [u8];
730     #[inline]
index(&self, i: usize) -> &[u8]731     fn index(&self, i: usize) -> &[u8] {
732         self.get(i).unwrap()
733     }
734 }
735 
736 impl From<StringRecord> for ByteRecord {
737     #[inline]
from(record: StringRecord) -> ByteRecord738     fn from(record: StringRecord) -> ByteRecord {
739         record.into_byte_record()
740     }
741 }
742 
743 impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord {
744     #[inline]
from(xs: Vec<T>) -> ByteRecord745     fn from(xs: Vec<T>) -> ByteRecord {
746         ByteRecord::from_iter(&xs)
747     }
748 }
749 
750 impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord {
751     #[inline]
from(xs: &'a [T]) -> ByteRecord752     fn from(xs: &'a [T]) -> ByteRecord {
753         ByteRecord::from_iter(xs)
754     }
755 }
756 
757 impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
758     #[inline]
from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord759     fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord {
760         let mut record = ByteRecord::new();
761         record.extend(iter);
762         record
763     }
764 }
765 
766 impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
767     #[inline]
extend<I: IntoIterator<Item = T>>(&mut self, iter: I)768     fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
769         for x in iter {
770             self.push_field(x.as_ref());
771         }
772     }
773 }
774 
775 /// A double-ended iterator over the fields in a byte record.
776 ///
777 /// The `'r` lifetime variable refers to the lifetime of the `ByteRecord` that
778 /// is being iterated over.
779 #[derive(Clone)]
780 pub struct ByteRecordIter<'r> {
781     /// The record we are iterating over.
782     r: &'r ByteRecord,
783     /// The starting index of the previous field. (For reverse iteration.)
784     last_start: usize,
785     /// The ending index of the previous field. (For forward iteration.)
786     last_end: usize,
787     /// The index of forward iteration.
788     i_forward: usize,
789     /// The index of reverse iteration.
790     i_reverse: usize,
791 }
792 
793 impl<'r> IntoIterator for &'r ByteRecord {
794     type IntoIter = ByteRecordIter<'r>;
795     type Item = &'r [u8];
796 
797     #[inline]
into_iter(self) -> ByteRecordIter<'r>798     fn into_iter(self) -> ByteRecordIter<'r> {
799         ByteRecordIter {
800             r: self,
801             last_start: self.as_slice().len(),
802             last_end: 0,
803             i_forward: 0,
804             i_reverse: self.len(),
805         }
806     }
807 }
808 
809 impl<'r> ExactSizeIterator for ByteRecordIter<'r> {}
810 
811 impl<'r> Iterator for ByteRecordIter<'r> {
812     type Item = &'r [u8];
813 
814     #[inline]
next(&mut self) -> Option<&'r [u8]>815     fn next(&mut self) -> Option<&'r [u8]> {
816         if self.i_forward == self.i_reverse {
817             None
818         } else {
819             let start = self.last_end;
820             let end = self.r.0.bounds.ends()[self.i_forward];
821             self.i_forward += 1;
822             self.last_end = end;
823             Some(&self.r.0.fields[start..end])
824         }
825     }
826 
827     #[inline]
size_hint(&self) -> (usize, Option<usize>)828     fn size_hint(&self) -> (usize, Option<usize>) {
829         let x = self.i_reverse - self.i_forward;
830         (x, Some(x))
831     }
832 
833     #[inline]
count(self) -> usize834     fn count(self) -> usize {
835         self.len()
836     }
837 }
838 
839 impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
840     #[inline]
next_back(&mut self) -> Option<&'r [u8]>841     fn next_back(&mut self) -> Option<&'r [u8]> {
842         if self.i_forward == self.i_reverse {
843             None
844         } else {
845             self.i_reverse -= 1;
846             let start = self
847                 .i_reverse
848                 .checked_sub(1)
849                 .map(|i| self.r.0.bounds.ends()[i])
850                 .unwrap_or(0);
851             let end = self.last_start;
852             self.last_start = start;
853             Some(&self.r.0.fields[start..end])
854         }
855     }
856 }
857 
trim_ascii(bytes: &[u8]) -> &[u8]858 fn trim_ascii(bytes: &[u8]) -> &[u8] {
859     trim_ascii_start(trim_ascii_end(bytes))
860 }
861 
trim_ascii_start(mut bytes: &[u8]) -> &[u8]862 fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
863     while let [first, rest @ ..] = bytes {
864         if first.is_ascii_whitespace() {
865             bytes = rest;
866         } else {
867             break;
868         }
869     }
870     bytes
871 }
872 
trim_ascii_end(mut bytes: &[u8]) -> &[u8]873 fn trim_ascii_end(mut bytes: &[u8]) -> &[u8] {
874     while let [rest @ .., last] = bytes {
875         if last.is_ascii_whitespace() {
876             bytes = rest;
877         } else {
878             break;
879         }
880     }
881     bytes
882 }
883 
884 #[cfg(test)]
885 mod tests {
886     use crate::string_record::StringRecord;
887 
888     use super::ByteRecord;
889 
b(s: &str) -> &[u8]890     fn b(s: &str) -> &[u8] {
891         s.as_bytes()
892     }
893 
894     #[test]
record_1()895     fn record_1() {
896         let mut rec = ByteRecord::new();
897         rec.push_field(b"foo");
898 
899         assert_eq!(rec.len(), 1);
900         assert_eq!(rec.get(0), Some(b("foo")));
901         assert_eq!(rec.get(1), None);
902         assert_eq!(rec.get(2), None);
903     }
904 
905     #[test]
record_2()906     fn record_2() {
907         let mut rec = ByteRecord::new();
908         rec.push_field(b"foo");
909         rec.push_field(b"quux");
910 
911         assert_eq!(rec.len(), 2);
912         assert_eq!(rec.get(0), Some(b("foo")));
913         assert_eq!(rec.get(1), Some(b("quux")));
914         assert_eq!(rec.get(2), None);
915         assert_eq!(rec.get(3), None);
916     }
917 
918     #[test]
empty_record()919     fn empty_record() {
920         let rec = ByteRecord::new();
921 
922         assert_eq!(rec.len(), 0);
923         assert_eq!(rec.get(0), None);
924         assert_eq!(rec.get(1), None);
925     }
926 
927     #[test]
trim_whitespace_only()928     fn trim_whitespace_only() {
929         let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]);
930         rec.trim();
931         assert_eq!(rec.get(0), Some(b("")));
932     }
933 
934     #[test]
trim_front()935     fn trim_front() {
936         let mut rec = ByteRecord::from(vec![b" abc"]);
937         rec.trim();
938         assert_eq!(rec.get(0), Some(b("abc")));
939 
940         let mut rec = ByteRecord::from(vec![b(" abc"), b("  xyz")]);
941         rec.trim();
942         assert_eq!(rec.get(0), Some(b("abc")));
943         assert_eq!(rec.get(1), Some(b("xyz")));
944     }
945 
946     #[test]
trim_back()947     fn trim_back() {
948         let mut rec = ByteRecord::from(vec![b"abc "]);
949         rec.trim();
950         assert_eq!(rec.get(0), Some(b("abc")));
951 
952         let mut rec = ByteRecord::from(vec![b("abc "), b("xyz  ")]);
953         rec.trim();
954         assert_eq!(rec.get(0), Some(b("abc")));
955         assert_eq!(rec.get(1), Some(b("xyz")));
956     }
957 
958     #[test]
trim_both()959     fn trim_both() {
960         let mut rec = ByteRecord::from(vec![b" abc "]);
961         rec.trim();
962         assert_eq!(rec.get(0), Some(b("abc")));
963 
964         let mut rec = ByteRecord::from(vec![b(" abc "), b("  xyz  ")]);
965         rec.trim();
966         assert_eq!(rec.get(0), Some(b("abc")));
967         assert_eq!(rec.get(1), Some(b("xyz")));
968     }
969 
970     #[test]
trim_does_not_panic_on_empty_records_1()971     fn trim_does_not_panic_on_empty_records_1() {
972         let mut rec = ByteRecord::from(vec![b""]);
973         rec.trim();
974         assert_eq!(rec.get(0), Some(b("")));
975     }
976 
977     #[test]
trim_does_not_panic_on_empty_records_2()978     fn trim_does_not_panic_on_empty_records_2() {
979         let mut rec = ByteRecord::from(vec![b"", b""]);
980         rec.trim();
981         assert_eq!(rec.get(0), Some(b("")));
982         assert_eq!(rec.get(1), Some(b("")));
983     }
984 
985     #[test]
trim_does_not_panic_on_empty_records_3()986     fn trim_does_not_panic_on_empty_records_3() {
987         let mut rec = ByteRecord::new();
988         rec.trim();
989         assert_eq!(rec.as_slice().len(), 0);
990     }
991 
992     #[test]
empty_field_1()993     fn empty_field_1() {
994         let mut rec = ByteRecord::new();
995         rec.push_field(b"");
996 
997         assert_eq!(rec.len(), 1);
998         assert_eq!(rec.get(0), Some(b("")));
999         assert_eq!(rec.get(1), None);
1000         assert_eq!(rec.get(2), None);
1001     }
1002 
1003     #[test]
empty_field_2()1004     fn empty_field_2() {
1005         let mut rec = ByteRecord::new();
1006         rec.push_field(b"");
1007         rec.push_field(b"");
1008 
1009         assert_eq!(rec.len(), 2);
1010         assert_eq!(rec.get(0), Some(b("")));
1011         assert_eq!(rec.get(1), Some(b("")));
1012         assert_eq!(rec.get(2), None);
1013         assert_eq!(rec.get(3), None);
1014     }
1015 
1016     #[test]
empty_surround_1()1017     fn empty_surround_1() {
1018         let mut rec = ByteRecord::new();
1019         rec.push_field(b"foo");
1020         rec.push_field(b"");
1021         rec.push_field(b"quux");
1022 
1023         assert_eq!(rec.len(), 3);
1024         assert_eq!(rec.get(0), Some(b("foo")));
1025         assert_eq!(rec.get(1), Some(b("")));
1026         assert_eq!(rec.get(2), Some(b("quux")));
1027         assert_eq!(rec.get(3), None);
1028         assert_eq!(rec.get(4), None);
1029     }
1030 
1031     #[test]
empty_surround_2()1032     fn empty_surround_2() {
1033         let mut rec = ByteRecord::new();
1034         rec.push_field(b"foo");
1035         rec.push_field(b"");
1036         rec.push_field(b"quux");
1037         rec.push_field(b"");
1038 
1039         assert_eq!(rec.len(), 4);
1040         assert_eq!(rec.get(0), Some(b("foo")));
1041         assert_eq!(rec.get(1), Some(b("")));
1042         assert_eq!(rec.get(2), Some(b("quux")));
1043         assert_eq!(rec.get(3), Some(b("")));
1044         assert_eq!(rec.get(4), None);
1045         assert_eq!(rec.get(5), None);
1046     }
1047 
1048     #[test]
utf8_error_1()1049     fn utf8_error_1() {
1050         let mut rec = ByteRecord::new();
1051         rec.push_field(b"foo");
1052         rec.push_field(b"b\xFFar");
1053 
1054         let err = StringRecord::from_byte_record(rec).unwrap_err();
1055         assert_eq!(err.utf8_error().field(), 1);
1056         assert_eq!(err.utf8_error().valid_up_to(), 1);
1057     }
1058 
1059     #[test]
utf8_error_2()1060     fn utf8_error_2() {
1061         let mut rec = ByteRecord::new();
1062         rec.push_field(b"\xFF");
1063 
1064         let err = StringRecord::from_byte_record(rec).unwrap_err();
1065         assert_eq!(err.utf8_error().field(), 0);
1066         assert_eq!(err.utf8_error().valid_up_to(), 0);
1067     }
1068 
1069     #[test]
utf8_error_3()1070     fn utf8_error_3() {
1071         let mut rec = ByteRecord::new();
1072         rec.push_field(b"a\xFF");
1073 
1074         let err = StringRecord::from_byte_record(rec).unwrap_err();
1075         assert_eq!(err.utf8_error().field(), 0);
1076         assert_eq!(err.utf8_error().valid_up_to(), 1);
1077     }
1078 
1079     #[test]
utf8_error_4()1080     fn utf8_error_4() {
1081         let mut rec = ByteRecord::new();
1082         rec.push_field(b"a");
1083         rec.push_field(b"b");
1084         rec.push_field(b"c");
1085         rec.push_field(b"d");
1086         rec.push_field(b"xyz\xFF");
1087 
1088         let err = StringRecord::from_byte_record(rec).unwrap_err();
1089         assert_eq!(err.utf8_error().field(), 4);
1090         assert_eq!(err.utf8_error().valid_up_to(), 3);
1091     }
1092 
1093     #[test]
utf8_error_5()1094     fn utf8_error_5() {
1095         let mut rec = ByteRecord::new();
1096         rec.push_field(b"a");
1097         rec.push_field(b"b");
1098         rec.push_field(b"c");
1099         rec.push_field(b"d");
1100         rec.push_field(b"\xFFxyz");
1101 
1102         let err = StringRecord::from_byte_record(rec).unwrap_err();
1103         assert_eq!(err.utf8_error().field(), 4);
1104         assert_eq!(err.utf8_error().valid_up_to(), 0);
1105     }
1106 
1107     // This tests a tricky case where a single field on its own isn't valid
1108     // UTF-8, but the concatenation of all fields is.
1109     #[test]
utf8_error_6()1110     fn utf8_error_6() {
1111         let mut rec = ByteRecord::new();
1112         rec.push_field(b"a\xc9");
1113         rec.push_field(b"\x91b");
1114 
1115         let err = StringRecord::from_byte_record(rec).unwrap_err();
1116         assert_eq!(err.utf8_error().field(), 0);
1117         assert_eq!(err.utf8_error().valid_up_to(), 1);
1118     }
1119 
1120     // This tests that we can always clear a `ByteRecord` and get a guaranteed
1121     // successful conversion to UTF-8. This permits reusing the allocation.
1122     #[test]
utf8_clear_ok()1123     fn utf8_clear_ok() {
1124         let mut rec = ByteRecord::new();
1125         rec.push_field(b"\xFF");
1126         assert!(StringRecord::from_byte_record(rec).is_err());
1127 
1128         let mut rec = ByteRecord::new();
1129         rec.push_field(b"\xFF");
1130         rec.clear();
1131         assert!(StringRecord::from_byte_record(rec).is_ok());
1132     }
1133 
1134     #[test]
iter()1135     fn iter() {
1136         let data = vec!["foo", "bar", "baz", "quux", "wat"];
1137         let rec = ByteRecord::from(&*data);
1138         let got: Vec<&str> =
1139             rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect();
1140         assert_eq!(data, got);
1141     }
1142 
1143     #[test]
iter_reverse()1144     fn iter_reverse() {
1145         let mut data = vec!["foo", "bar", "baz", "quux", "wat"];
1146         let rec = ByteRecord::from(&*data);
1147         let got: Vec<&str> = rec
1148             .iter()
1149             .rev()
1150             .map(|x| ::std::str::from_utf8(x).unwrap())
1151             .collect();
1152         data.reverse();
1153         assert_eq!(data, got);
1154     }
1155 
1156     #[test]
iter_forward_and_reverse()1157     fn iter_forward_and_reverse() {
1158         let data = vec!["foo", "bar", "baz", "quux", "wat"];
1159         let rec = ByteRecord::from(data);
1160         let mut it = rec.iter();
1161 
1162         assert_eq!(it.next_back(), Some(b("wat")));
1163         assert_eq!(it.next(), Some(b("foo")));
1164         assert_eq!(it.next(), Some(b("bar")));
1165         assert_eq!(it.next_back(), Some(b("quux")));
1166         assert_eq!(it.next(), Some(b("baz")));
1167         assert_eq!(it.next_back(), None);
1168         assert_eq!(it.next(), None);
1169     }
1170 
1171     // Check that record equality respects field boundaries.
1172     //
1173     // Regression test for #138.
1174     #[test]
eq_field_boundaries()1175     fn eq_field_boundaries() {
1176         let test1 = ByteRecord::from(vec!["12", "34"]);
1177         let test2 = ByteRecord::from(vec!["123", "4"]);
1178 
1179         assert_ne!(test1, test2);
1180     }
1181 
1182     // Check that record equality respects number of fields.
1183     //
1184     // Regression test for #138.
1185     #[test]
eq_record_len()1186     fn eq_record_len() {
1187         let test1 = ByteRecord::from(vec!["12", "34", "56"]);
1188         let test2 = ByteRecord::from(vec!["12", "34"]);
1189         assert_ne!(test1, test2);
1190     }
1191 }
1192