Skip to content

Commit 0f64d3f

Browse files
daeBurntSushi
authored andcommitted
api: automatically escape fields that contain the comment character
Previously, if data is written with QuoteStyle::Necessary, and the first field of a row happens to contain a comment character, the row will be ignored as a comment when later reading it back in. This change adds a `comment` property to Writer, and automatically quotes fields that have the provided comment character in them, so they round-trip correctly. Closes BurntSushi#283
1 parent 574ae1f commit 0f64d3f

File tree

2 files changed

+75
-0
lines changed

2 files changed

+75
-0
lines changed

csv-core/src/writer.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ impl WriterBuilder {
2626
quote: b'"',
2727
escape: b'\\',
2828
double_quote: true,
29+
comment: None,
2930
};
3031
WriterBuilder { wtr: wtr }
3132
}
@@ -56,6 +57,13 @@ impl WriterBuilder {
5657
}
5758
_ => unreachable!(),
5859
}
60+
// If the first field of a row starts with a comment character,
61+
// it needs to be quoted, or the row will not be readable later.
62+
// As requires_quotes is calculated in advance, we force quotes
63+
// when a comment character is encountered anywhere in the field.
64+
if let Some(comment) = self.wtr.comment {
65+
wtr.requires_quotes[comment as usize] = true;
66+
}
5967
wtr
6068
}
6169

@@ -119,6 +127,17 @@ impl WriterBuilder {
119127
self.wtr.double_quote = yes;
120128
self
121129
}
130+
131+
/// The comment character that will be used when later reading the file.
132+
///
133+
/// If `quote_style` is set to `QuoteStyle::Necessary`, a field will
134+
/// be quoted if the comment character is detected anywhere in the field.
135+
///
136+
/// The default value is None.
137+
pub fn comment(&mut self, comment: Option<u8>) -> &mut WriterBuilder {
138+
self.wtr.comment = comment;
139+
self
140+
}
122141
}
123142

124143
impl Default for WriterBuilder {
@@ -166,6 +185,7 @@ pub struct Writer {
166185
quote: u8,
167186
escape: u8,
168187
double_quote: bool,
188+
comment: Option<u8>,
169189
}
170190

171191
impl Clone for Writer {
@@ -183,6 +203,7 @@ impl Clone for Writer {
183203
quote: self.quote,
184204
escape: self.escape,
185205
double_quote: self.double_quote,
206+
comment: self.comment,
186207
}
187208
}
188209
}
@@ -1044,4 +1065,21 @@ mod tests {
10441065
inp = &inp[1..];
10451066
assert_quote!(inp, out, 1, 2, InputEmpty, r#""""#);
10461067
}
1068+
1069+
#[test]
1070+
fn comment_char_is_automatically_quoted() {
1071+
let mut wtr = WriterBuilder::new().comment(Some(b'#')).build();
1072+
let out = &mut [0; 1024];
1073+
1074+
assert_field!(
1075+
wtr,
1076+
b("# abc"),
1077+
&mut out[..],
1078+
5,
1079+
6,
1080+
InputEmpty,
1081+
"\"# abc"
1082+
);
1083+
assert_write!(wtr, finish, &mut out[..], 1, InputEmpty, "\"");
1084+
}
10471085
}

src/writer.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,34 @@ impl WriterBuilder {
470470
self
471471
}
472472

473+
/// The comment character that will be used when later reading the file.
474+
///
475+
/// If `quote_style` is set to `QuoteStyle::Necessary`, a field will
476+
/// be quoted if the comment character is detected anywhere in the field.
477+
///
478+
/// The default value is None.
479+
///
480+
/// # Example
481+
///
482+
/// ```
483+
/// use std::error::Error;
484+
/// use csv::WriterBuilder;
485+
///
486+
/// # fn main() { example().unwrap(); }
487+
/// fn example() -> Result<(), Box<dyn Error>> {
488+
/// let mut wtr =
489+
/// WriterBuilder::new().comment(Some(b'#')).from_writer(Vec::new());
490+
/// wtr.write_record(&["# comment", "another"]).unwrap();
491+
/// let buf = wtr.into_inner().unwrap();
492+
/// assert_eq!(String::from_utf8(buf).unwrap(), "\"# comment\",another\n");
493+
/// Ok(())
494+
/// }
495+
/// ```
496+
pub fn comment(&mut self, comment: Option<u8>) -> &mut WriterBuilder {
497+
self.builder.comment(comment);
498+
self
499+
}
500+
473501
/// Set the capacity (in bytes) of the internal buffer used in the CSV
474502
/// writer. This defaults to a reasonable setting.
475503
pub fn buffer_capacity(&mut self, capacity: usize) -> &mut WriterBuilder {
@@ -1414,4 +1442,13 @@ mod tests {
14141442
wtr.serialize((true, 1.3, "hi")).unwrap();
14151443
assert_eq!(wtr_as_string(wtr), "true,1.3,hi\n");
14161444
}
1445+
1446+
#[test]
1447+
fn comment_char_is_automatically_quoted() {
1448+
let mut wtr =
1449+
WriterBuilder::new().comment(Some(b'#')).from_writer(Vec::new());
1450+
wtr.write_record(&["# comment", "another"]).unwrap();
1451+
let buf = wtr.into_inner().unwrap();
1452+
assert_eq!(String::from_utf8(buf).unwrap(), "\"# comment\",another\n");
1453+
}
14171454
}

0 commit comments

Comments
 (0)