Решение на CSV Filter от Деян Горанов

Обратно към всички решения

Към профила на Деян Горанов

Резултати

  • 15 точки от тестове
  • 0 бонус точки
  • 15 точки общо
  • 15 успешни тест(а)
  • 0 неуспешни тест(а)

Код

use std::{
collections::HashMap,
io::{self, BufRead, Write},
};
use utils::{CharSplittable, HasDup};
#[cfg(test)]
mod tests {
use super::*;
use io::BufReader;
#[test]
fn test_skip_next() {
assert_eq!(skip_next("(foo", '('), Some("foo"));
assert_eq!(skip_next("(foo", ')'), None);
assert_eq!(skip_next("", ')'), None);
assert_eq!(skip_next("фуѝбар ", 'ѝ'), None);
assert_eq!(skip_next("ѝфубар ", 'ѝ'), Some("фубар "));
}
#[test]
fn test_take_until() {
assert_eq!(take_until(" foo/bar ", '/'), (" foo", "/bar "));
assert_eq!(take_until("foobar", '/'), ("foobar", ""));
assert_eq!(take_until("/foobar", '/'), ("", "/foobar"));
assert_eq!(take_until("foobar/", '/'), ("foobar", "/"));
assert_eq!(take_until("/", '/'), ("", "/"));
assert_eq!(take_until("//", '/'), ("", "//"));
assert_eq!(take_until("", '/'), ("", ""));
assert_eq!(take_until(" фуѝбар ", 'ѝ'), (" фу", "ѝбар "));
}
#[test]
fn test_take_and_skip() {
assert_eq!(take_and_skip(" foo/bar ", '/'), Some((" foo", "bar ")));
assert_eq!(take_and_skip("foobar", '/'), None);
assert_eq!(take_and_skip("/foobar", '/'), Some(("", "foobar")));
assert_eq!(take_and_skip("foobar/", '/'), Some(("foobar", "")));
assert_eq!(take_and_skip("/", '/'), Some(("", "")));
assert_eq!(take_and_skip("//", '/'), Some(("", "/")));
assert_eq!(take_and_skip("", '/'), None);
assert_eq!(take_and_skip(" фуѝбар ", 'ѝ'), Some((" фу", "бар ")));
}
#[test]
fn test_char_split() {
assert_eq!(
"aa,b,c,гошо, ,,".char_split(',').collect::<Vec<&str>>(),
vec!["aa", "b", "c", "гошо", " ", "", "" ],
);
assert_eq!(
",aa".char_split(',').collect::<Vec<&str>>(),
vec!["", "aa"]
);
assert_eq!(
"".char_split(',').collect::<Vec<&str>>(),
vec![""]
);
}
#[test]
fn test_has_dup() {
assert!(!["kon", "tra", "i", "drugi", "", "бг", "вг"].has_dup());
assert!(!["", "kon"].has_dup());
assert!(["kon", "kon"].has_dup());
assert!(["", ""].has_dup());
}
macro_rules! assert_csv {
($expr:expr, $pat:pat,) => {
assert!(matches!(
Csv::new(BufReader::new($expr.as_bytes())),
$pat
));
}
}
#[test]
fn test_csv_new_error() {
assert_csv!(
"",
Err(CsvError::InvalidHeader(_)),
);
assert_csv!(
" kon , tra , kon ",
Err(CsvError::InvalidHeader(_)),
);
assert_csv!(
" kon , tra , konk ",
Ok(_),
);
assert_csv!(
" kon , tra , kon k ",
Ok(_),
);
assert_csv!(
" кон k, tra , кон k ",
Err(CsvError::InvalidHeader(_)),
);
}
#[test]
fn test_csv_parse_line_errors() {
let mut csv =
Csv::new(BufReader::new(" name, age, birth date ".as_bytes()))
.unwrap();
assert!(matches!(
csv.parse_line(r#""Basic Name,"13","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#" "Basic Name" , "13" , "2020-01-01" "#),
Ok(_)
));
assert!(matches!(
csv.parse_line(r#""Basic Name""13","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#""Basic Name","13","2020-01-01"#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#"Basic Name","13","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#""Basic Name","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#""Basic Name","13","2020-01-01","andmore""#),
Err(CsvError::InvalidRow(_))
));
}
#[test]
fn test_with_example1() {
// Подготвяме данните:
let reader = BufReader::new(r#"
name, age, birth date
"Douglas Adams", "42", "1952-03-11"
"Gen Z. Person", "20", "2000-01-01"
"Ada Lovelace", "36", "1815-12-10"
"#.trim().as_bytes());
// Конструираме си CSV-то:
let mut csv = Csv::new(reader).unwrap();
// Инсталираме условието -- само редове с възраст над 30 ще останат:
csv.apply_selection(|row| {
let age = row.get("age").ok_or_else(|| CsvError::InvalidColumn(String::from("age")))?;
let age = age.parse::<u32>().map_err(|_| CsvError::ParseError(String::from(age)))?;
Ok(age > 30)
});
// Итерираме през резултата:
assert!(matches!(
csv.next(),
Some(Ok(row)) if row.get("name").unwrap().as_str() == "Douglas Adams"
));
assert!(matches!(
csv.next(),
Some(Ok(row)) if row.get("name").unwrap().as_str() == "Ada Lovelace"
));
assert!(matches!(
csv.next(),
None
));
}
}
// ##### Utils #####
pub use utils::skip_next;
pub use utils::take_until;
pub use utils::take_and_skip;
mod utils {
use std::{
collections::HashSet,
hash::Hash,
};
pub fn skip_next(input: &str, target: char) -> Option<&str> {
input.strip_prefix(target)
}
pub fn take_until(input: &str, target: char) -> (&str, &str) {
input.find(target)
.map(|i| input.split_at(i))
.unwrap_or((input, ""))
}
pub fn take_and_skip(input: &str, target: char) -> Option<(&str, &str)> {
let (f, s) = take_until(input, target);
skip_next(s, target)
.map(|s_skipped| (f, s_skipped))
}
/// Затваряме си очите за [`std::str::split`] и си правим наша версия
/// с (може би) доста лош performance.
/// Но пък уползотворяваме по-горе-дефинираните функции.
pub struct CharSplit<'a> {
/// Ако сеното свърши, значи няма какво повече да го делим.
haystack: Option<&'a str>,
needle: char,
}
impl<'a> CharSplit<'a> {
fn new(input: &'a str, target: char) -> Self {
Self { haystack: Some(input), needle: target }
}
}
impl<'a> Iterator for CharSplit<'a> {
type Item = &'a str;
fn next (&mut self) -> Option<Self::Item> {
if let Some((item, rest)) =
take_and_skip(self.haystack?, self.needle)
{
self.haystack = Some(rest);
Some(item)
} else {
let res = self.haystack;
self.haystack = None;
res
}
}
}
pub trait CharSplittable {
fn char_split<'a>(&'a self, target: char) -> CharSplit<'a>;
}
impl CharSplittable for str {
fn char_split<'a>(&'a self, target: char) -> CharSplit<'a> {
CharSplit::new(self, target)
}
}
pub trait HasDup {
fn has_dup(&self) -> bool;
}
impl<T> HasDup for [T]
where T: Eq + Hash
{
fn has_dup(&self) -> bool {
let mut set = HashSet::new();
self.iter().any(|x| !set.insert(x))
}
}
}
// ##### CSV errors #####
#[derive(Debug)]
pub enum CsvError {
IO(io::Error),
ParseError(String),
InvalidHeader(String),
InvalidRow(String),
InvalidColumn(String),
}
impl From<io::Error> for CsvError {
fn from(e: io::Error) -> Self {
CsvError::IO(e)
}
}
// ##### CSV #####
type Row = HashMap<String, String>;
pub struct Csv<R: BufRead> {
pub columns: Vec<String>,
reader: R,
selection: Option<Box<dyn Fn(&Row) -> Result<bool, CsvError>>>,
}
impl<R: BufRead> Csv<R> {
pub fn new(mut reader: R) -> Result<Self, CsvError> {
let mut line = String::new();
if reader.read_line(&mut line)? == 0 {
return Err(CsvError::InvalidHeader(
"Empty stream - header missing".into()
));
}
let columns =
line.char_split(',')
.map(|x| x.trim().into())
.collect::<Vec<String>>();
if columns.has_dup() {
return Err(CsvError::InvalidHeader(
"Headers have duplicates".into()
));
}
Ok(Csv{ columns, reader, selection: None })
}
pub fn parse_line(&mut self, line: &str) -> Result<Row, CsvError> {
// hack, because the reader sometimes returns two lines?!
let line = line.trim().split('\n').last().unwrap_or(line);

Ако махна този ред, test_with_example1 гърми, защото се връща CsvError::InvalidRow. Слагайки dbg!(&line) тук се вижда че в момента на гърмене имаме

line == r#"   "Gen Z. Person", "20", "2000-01-01"\n   "Ada Lovelace", "36", "1815-12-10""#

(unescape-нах output-а).

Gen Z. Person е много млад, и затова selection му връща false, но някакси неговия ред не бива консумиран от reader stream-а и си остава там. Така следващото извикване на read_line връща и GenZ реда и следващия ред.

Като видях това си помислих че заради някакъв lazy-ness на итератор, реда се чете, но не се консумира, но виждам къде може да е това. Защо това се случва и как мога да го оправя?

Пълния изход от test_with_example1, като на ред 313 сложа dbg!(&line):

---- tests::test_with_example1 stdout ----
[src/lib.rs:396] &line = "            \"Douglas Adams\", \"42\", \"1952-03-11\"\n"
[src/lib.rs:396] &line = "            \"Gen Z. Person\", \"20\", \"2000-01-01\"\n"
[src/lib.rs:396] &line = "            \"Gen Z. Person\", \"20\", \"2000-01-01\"\n            \"Ada Lovelace\", \"36\", \"1815-12-10\""
thread 'tests::test_with_example1' panicked at 'assertion failed: matches!(csv . next(), Some(Ok(row)) if row . get("name") . unwrap() .
         as_str() == "Ada Lovelace")', src/lib.rs:164:9

Като се промени теста да ползва .unwrap() се вижда че проблемът е че се връща стойност Err(CsvError::InvalidRow("Expected a comma...")) от csv.next().

let (delimiters, values): (Vec<_>, Vec<_>) =
line.char_split('"')
.enumerate()
.partition(|(i, _)| i % 2 == 0);
fn check_edge(position: &str, val: Option<&(usize, &str)>)
-> Result<(), CsvError>
{
if let Some((_, x)) = val {
if !x.trim().is_empty() {
return Err(CsvError::InvalidRow(
format!("\
Unquoted value at the {} of the row. \
It should be between double quotes \"like this\".\
", position)
));
}
}
Ok(())
};
check_edge("start", delimiters.first())?;
check_edge("end", delimiters.last())?;
// elements without the first and the last
if delimiters.len() > 2 &&
delimiters[1 .. delimiters.len() - 2].into_iter()
.any(|(_, x)| x.trim() != ",")
{
return Err(CsvError::InvalidRow("\
Expected a comma, found unquoted value. \
All values are quoted, and there should be commas between them \
\"like\", \"this\".\
".into()
));
}
if delimiters.len() != values.len() + 1 {
return Err(CsvError::InvalidRow(
"Quote not closed".into()
));
}
if values.len() != self.columns.len() {
return Err(CsvError::InvalidRow(
format!(
"Count mismatch. \
There are {} values, but {} columns",
values.len(),
self.columns.len(),
)
));
}
let row =
self.columns.iter()
.cloned()
.zip(
values.into_iter()
.map(|(_, x)| String::from(x))
)
.collect();
Ok(row)
}
pub fn apply_selection<F>(&mut self, callback: F)
where F: Fn(&Row) -> Result<bool, CsvError> + 'static
{
self.selection = Some(Box::new(callback));
}
pub fn write_to<W: Write>(self, mut writer: W) -> Result<(), CsvError> {
let columns = self.columns.clone();
writeln!(writer, "{}", columns.join(", "))?;
self.map(|res| res.and_then(|mut row| {
let row_string =
columns.iter()
.map(|col_name|
row.remove(col_name)
.map_or_else(
|| Err(CsvError::InvalidColumn(col_name.clone())),
|val| Ok(format!("\"{}\"", val))
)
)
.collect::<Result<Vec<_>, _>>()
?.join(", ");
writeln!(writer, "{}", row_string)
.map_err(|e| CsvError::from(e))
}))
.collect::<Result<Vec<()>, CsvError>>()
.map(|_| ())
}
fn next_row(&mut self, line: &mut String)-> Option<Result<Row, CsvError>> {
match self.reader.read_line(line) {
Ok(0) =>
None,
res =>
Some(
res
.map_err(|e| e.into())
.and_then(|_| self.parse_line(line.as_str()))
)
}
}
}
impl<R: BufRead> Iterator for Csv<R> {
type Item = Result<Row, CsvError>;
fn next(&mut self) -> Option<Self::Item> {
let mut line = String::new();
// TODO: move self.selection unwrapping out of the loop
loop {
// here `Some(_)` means 'end the loop, we have a value to return'
// and None means 'continue the loop'
let res: Option<Result<Row, CsvError>> =
self.next_row(&mut line)?
.and_then(|row| {
let is_selected =
self.selection.as_ref()
.map(|select| select(&row))
.unwrap_or(Ok(true))?;
// в nightly `Ok(is_selected.then_some(row))`
if is_selected {
Ok(Some(row))
} else {
Ok(None)
}
})
.transpose();
if let Some(_) = res {
return res
}
}
}
}

Лог от изпълнението

Compiling solution v0.1.0 (/tmp/d20210111-1538662-1vc3bn4/solution)
    Finished test [unoptimized + debuginfo] target(s) in 6.07s
     Running target/debug/deps/solution_test-8916805fc40a2dab

running 15 tests
test solution_test::test_csv_basic ... ok
test solution_test::test_csv_duplicate_columns ... ok
test solution_test::test_csv_empty ... ok
test solution_test::test_csv_iterating_with_a_selection ... ok
test solution_test::test_csv_iterating_with_no_selection ... ok
test solution_test::test_csv_parse_line ... ok
test solution_test::test_csv_parse_line_with_commas ... ok
test solution_test::test_csv_selection_and_writing ... ok
test solution_test::test_csv_single_column_no_data ... ok
test solution_test::test_csv_writing_without_a_selection ... ok
test solution_test::test_csv_writing_without_any_rows ... ok
test solution_test::test_parsing_helpers_for_unicode ... ok
test solution_test::test_skip_next ... ok
test solution_test::test_take_and_skip ... ok
test solution_test::test_take_until ... ok

test result: ok. 15 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out

История (4 версии и 1 коментар)

Деян качи първо решение на 11.01.2021 15:21 (преди над 4 години)

Деян качи решение на 11.01.2021 16:39 (преди над 4 години)

use std::{
collections::HashMap,
io::{self, BufRead, Write},
};
use utils::{CharSplittable, HasDup};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_skip_next() {
assert_eq!(skip_next("(foo", '('), Some("foo"));
assert_eq!(skip_next("(foo", ')'), None);
assert_eq!(skip_next("", ')'), None);
assert_eq!(skip_next("фуѝбар ", 'ѝ'), None);
assert_eq!(skip_next("ѝфубар ", 'ѝ'), Some("фубар "));
}
#[test]
fn test_take_until() {
assert_eq!(take_until(" foo/bar ", '/'), (" foo", "/bar "));
assert_eq!(take_until("foobar", '/'), ("foobar", ""));
assert_eq!(take_until("/foobar", '/'), ("", "/foobar"));
assert_eq!(take_until("foobar/", '/'), ("foobar", "/"));
assert_eq!(take_until("/", '/'), ("", "/"));
assert_eq!(take_until("//", '/'), ("", "//"));
assert_eq!(take_until("", '/'), ("", ""));
assert_eq!(take_until(" фуѝбар ", 'ѝ'), (" фу", "ѝбар "));
}
#[test]
fn test_take_and_skip() {
assert_eq!(take_and_skip(" foo/bar ", '/'), Some((" foo", "bar ")));
assert_eq!(take_and_skip("foobar", '/'), None);
assert_eq!(take_and_skip("/foobar", '/'), Some(("", "foobar")));
assert_eq!(take_and_skip("foobar/", '/'), Some(("foobar", "")));
assert_eq!(take_and_skip("/", '/'), Some(("", "")));
assert_eq!(take_and_skip("//", '/'), Some(("", "/")));
assert_eq!(take_and_skip("", '/'), None);
assert_eq!(take_and_skip(" фуѝбар ", 'ѝ'), Some((" фу", "бар ")));
}
#[test]
fn test_char_split() {
assert_eq!(
"aa,b,c,гошо, ,,".char_split(',').collect::<Vec<&str>>(),
vec!["aa", "b", "c", "гошо", " ", "", "" ],
);
assert_eq!(
",aa".char_split(',').collect::<Vec<&str>>(),
vec!["", "aa"]
);
assert_eq!(
"".char_split(',').collect::<Vec<&str>>(),
vec![""]
);
}
#[test]
fn test_has_dup() {
assert!(!["kon", "tra", "i", "drugi", "", "бг", "вг"].has_dup());
assert!(!["", "kon"].has_dup());
assert!(["kon", "kon"].has_dup());
assert!(["", ""].has_dup());
}
+ macro_rules! assert_csv {
+ ($expr:expr, $pat:pat,) => {
+ assert!(matches!(
+ Csv::new(BufReader::new($expr.as_bytes())),
+ $pat
+ ));
+ }
+ }
+
+ #[test]
+ fn test_csv_new_error() {
+ assert_csv!(
+ "",
+ Err(CsvError::InvalidHeader(_)),
+ );
+ assert_csv!(
+ " kon , tra , kon ",
+ Err(CsvError::InvalidHeader(_)),
+ );
+ assert_csv!(
+ " kon , tra , konk ",
+ Ok(_),
+ );
+ assert_csv!(
+ " kon , tra , kon k ",
+ Ok(_),
+ );
+ assert_csv!(
+ " кон k, tra , кон k ",
+ Err(CsvError::InvalidHeader(_)),
+ );
+ }
+
+ #[test]
+ fn test_csv_parse_line_errors() {
+ let mut csv =
+ Csv::new(BufReader::new(" name, age, birth date ".as_bytes()))
+ .unwrap();
+
+ let row = csv.parse_line(r#""Basic Name","13","2020-01-01""#).unwrap();
+
+ assert!(matches!(
+ csv.parse_line(r#""Basic Name,"13","2020-01-01""#),
+ Err(CsvError::InvalidRow(_))
+ ));
+ assert!(matches!(
+ csv.parse_line(r#" "Basic Name" , "13" , "2020-01-01" "#),
+ Ok(_)
+ ));
+ assert!(matches!(
+ csv.parse_line(r#""Basic Name""13","2020-01-01""#),
+ Err(CsvError::InvalidRow(_))
+ ));
+ assert!(matches!(
+ csv.parse_line(r#""Basic Name","13","2020-01-01"#),
+ Err(CsvError::InvalidRow(_))
+ ));
+ assert!(matches!(
+ csv.parse_line(r#"Basic Name","13","2020-01-01""#),
+ Err(CsvError::InvalidRow(_))
+ ));
+ assert!(matches!(
+ csv.parse_line(r#""Basic Name","2020-01-01""#),
+ Err(CsvError::InvalidRow(_))
+ ));
+ assert!(matches!(
+ csv.parse_line(r#""Basic Name","13","2020-01-01","andmore""#),
+ Err(CsvError::InvalidRow(_))
+ ));
+ }
+
}
// ##### Utils #####
pub use utils::skip_next;
pub use utils::take_until;
pub use utils::take_and_skip;
mod utils {
use std::{
collections::HashSet,
hash::Hash,
};
pub fn skip_next(input: &str, target: char) -> Option<&str> {
input.strip_prefix(target)
}
pub fn take_until(input: &str, target: char) -> (&str, &str) {
input.find(target)
.map(|i| input.split_at(i))
.unwrap_or((input, ""))
}
pub fn take_and_skip(input: &str, target: char) -> Option<(&str, &str)> {
let (f, s) = take_until(input, target);
skip_next(s, target)
.map(|s_skipped| (f, s_skipped))
}
/// Затваряме си очите за [`std::str::split`] и си правим наша версия
/// с (може би) доста лош performance.
/// Но пък уползотворяваме по-горе-дефинираните функции.
pub struct CharSplit<'a> {
/// Ако сеното свърши, значи няма какво повече да го делим.
haystack: Option<&'a str>,
needle: char,
}
impl<'a> CharSplit<'a> {
fn new(input: &'a str, target: char) -> Self {
Self { haystack: Some(input), needle: target }
}
}
impl<'a> Iterator for CharSplit<'a> {
type Item = &'a str;
fn next (&mut self) -> Option<Self::Item> {
if let Some((item, rest)) =
take_and_skip(self.haystack?, self.needle)
{
self.haystack = Some(rest);
Some(item)
} else {
let res = self.haystack;
self.haystack = None;
res
}
}
}
pub trait CharSplittable {
fn char_split<'a>(&'a self, target: char) -> CharSplit<'a>;
}
impl CharSplittable for str {
fn char_split<'a>(&'a self, target: char) -> CharSplit<'a> {
CharSplit::new(self, target)
}
}
pub trait HasDup {
fn has_dup(&self) -> bool;
}
impl<T> HasDup for [T]
where T: Eq + Hash
{
fn has_dup(&self) -> bool {
let mut set = HashSet::new();
self.iter().any(|x| !set.insert(x))
}
}
}
// ##### CSV errors #####
#[derive(Debug)]
pub enum CsvError {
IO(io::Error),
ParseError(String),
InvalidHeader(String),
InvalidRow(String),
InvalidColumn(String),
}
impl From<io::Error> for CsvError {
fn from(e: io::Error) -> Self {
CsvError::IO(e)
}
}
// ##### CSV #####
type Row = HashMap<String, String>;
pub struct Csv<R: BufRead> {
pub columns: Vec<String>,
reader: R,
selection: Option<Box<dyn Fn(&Row) -> Result<bool, CsvError>>>,
}
impl<R: BufRead> Csv<R> {
pub fn new(mut reader: R) -> Result<Self, CsvError> {
let mut line = String::new();
if reader.read_line(&mut line)? == 0 {
return Err(CsvError::InvalidHeader(
"Empty stream - header missing".into()
));
}
let columns =
line.char_split(',')
.map(|x| x.trim().into())
.collect::<Vec<String>>();
if columns.has_dup() {
return Err(CsvError::InvalidHeader(
"Headers have duplicates".into()
));
}
Ok(Csv{ columns, reader, selection: None })
}
pub fn parse_line(&mut self, line: &str) -> Result<Row, CsvError> {
let (delimiters, values): (Vec<_>, Vec<_>) =
line.char_split('"')
.enumerate()
.partition(|(i, _)| i % 2 == 0);
fn check_edge(position: &str, val: Option<&(usize, &str)>)
-> Result<(), CsvError>
{
if let Some((_, x)) = val {
if !x.trim().is_empty() {
return Err(CsvError::InvalidRow(
format!("\
Unquoted value at the {} of the row. \
It should be between double quotes \"like this\".\
", position)
));
}
}
Ok(())
};
check_edge("start", delimiters.first())?;
check_edge("end", delimiters.last())?;
// elements without the first and the last
if delimiters[1 .. delimiters.len() - 2].into_iter()
.any(|(_, x)| x.trim() != ",")
{
return Err(CsvError::InvalidRow("\
Expected a comma, found unquoted value. \
All values are quoted, and there should be commas between them \
\"like\", \"this\".\
".into()
));
}
if delimiters.len() != values.len() + 1 {
return Err(CsvError::InvalidRow(
"Quote not closed".into()
));
}
if values.len() != self.columns.len() {
return Err(CsvError::InvalidRow(
format!(
"Count mismatch. \
There are {} values, but {} columns",
values.len(),
self.columns.len(),
)
));
}
let row =
self.columns.iter()
.cloned()
.zip(
values.into_iter()
.map(|(_, x)| String::from(x))
)
.collect();
Ok(row)
}
pub fn apply_selection<F>(&mut self, callback: F)
where F: Fn(&Row) -> Result<bool, CsvError> + 'static
{
self.selection = Some(Box::new(callback));
}
pub fn write_to<W: Write>(self, mut writer: W) -> Result<(), CsvError> {
let columns = self.columns.clone();
writeln!(writer, "{}", columns.join(", "))?;
self.map(|res| res.and_then(|mut row| {
let row_string =
columns.iter()
.map(|col_name|
row.remove(col_name)
.map_or_else(
|| Err(CsvError::InvalidColumn(col_name.clone())),
|val| Ok(format!("\"{}\"", val))
)
)
.collect::<Result<Vec<_>, _>>()
?.join(", ");
writeln!(writer, "{}", row_string)
.map_err(|e| CsvError::from(e))
}))
.collect::<Result<Vec<()>, CsvError>>()
.map(|_| ())
}
fn next_row(&mut self, line: &mut String)-> Option<Result<Row, CsvError>> {
match self.reader.read_line(line) {
Ok(0) =>
None,
res =>
Some(
res
.map_err(|e| e.into())
.and_then(|_| self.parse_line(line.as_str()))
)
}
}
}
impl<R: BufRead> Iterator for Csv<R> {
type Item = Result<Row, CsvError>;
fn next(&mut self) -> Option<Self::Item> {
let mut line = String::new();
// TODO: move self.selection unwrapping out of the loop
loop {
// here `Some(_)` means 'end the loop, we have a value to return'
// and None means 'continue the loop'
let res: Option<Result<Row, CsvError>> =
self.next_row(&mut line)?
.and_then(|row| {
let is_selected =
self.selection.as_ref()
.map(|select| select(&row))
.unwrap_or(Ok(true))?;
// в nightly `Ok(is_selected.then_some(row))`
if is_selected {
Ok(Some(row))
} else {
Ok(None)
}
})
.transpose();
if let Some(_) = res {
return res
}
}
}
}

Деян качи решение на 11.01.2021 16:55 (преди над 4 години)

use std::{
collections::HashMap,
io::{self, BufRead, Write},
};
use utils::{CharSplittable, HasDup};
#[cfg(test)]
mod tests {
use super::*;
+ use io::BufReader;
#[test]
fn test_skip_next() {
assert_eq!(skip_next("(foo", '('), Some("foo"));
assert_eq!(skip_next("(foo", ')'), None);
assert_eq!(skip_next("", ')'), None);
assert_eq!(skip_next("фуѝбар ", 'ѝ'), None);
assert_eq!(skip_next("ѝфубар ", 'ѝ'), Some("фубар "));
}
#[test]
fn test_take_until() {
assert_eq!(take_until(" foo/bar ", '/'), (" foo", "/bar "));
assert_eq!(take_until("foobar", '/'), ("foobar", ""));
assert_eq!(take_until("/foobar", '/'), ("", "/foobar"));
assert_eq!(take_until("foobar/", '/'), ("foobar", "/"));
assert_eq!(take_until("/", '/'), ("", "/"));
assert_eq!(take_until("//", '/'), ("", "//"));
assert_eq!(take_until("", '/'), ("", ""));
assert_eq!(take_until(" фуѝбар ", 'ѝ'), (" фу", "ѝбар "));
}
#[test]
fn test_take_and_skip() {
assert_eq!(take_and_skip(" foo/bar ", '/'), Some((" foo", "bar ")));
assert_eq!(take_and_skip("foobar", '/'), None);
assert_eq!(take_and_skip("/foobar", '/'), Some(("", "foobar")));
assert_eq!(take_and_skip("foobar/", '/'), Some(("foobar", "")));
assert_eq!(take_and_skip("/", '/'), Some(("", "")));
assert_eq!(take_and_skip("//", '/'), Some(("", "/")));
assert_eq!(take_and_skip("", '/'), None);
assert_eq!(take_and_skip(" фуѝбар ", 'ѝ'), Some((" фу", "бар ")));
}
#[test]
fn test_char_split() {
assert_eq!(
"aa,b,c,гошо, ,,".char_split(',').collect::<Vec<&str>>(),
vec!["aa", "b", "c", "гошо", " ", "", "" ],
);
assert_eq!(
",aa".char_split(',').collect::<Vec<&str>>(),
vec!["", "aa"]
);
assert_eq!(
"".char_split(',').collect::<Vec<&str>>(),
vec![""]
);
}
#[test]
fn test_has_dup() {
assert!(!["kon", "tra", "i", "drugi", "", "бг", "вг"].has_dup());
assert!(!["", "kon"].has_dup());
assert!(["kon", "kon"].has_dup());
assert!(["", ""].has_dup());
}
+
macro_rules! assert_csv {
($expr:expr, $pat:pat,) => {
assert!(matches!(
Csv::new(BufReader::new($expr.as_bytes())),
$pat
));
}
}
#[test]
fn test_csv_new_error() {
assert_csv!(
"",
Err(CsvError::InvalidHeader(_)),
);
assert_csv!(
" kon , tra , kon ",
Err(CsvError::InvalidHeader(_)),
);
assert_csv!(
" kon , tra , konk ",
Ok(_),
);
assert_csv!(
" kon , tra , kon k ",
Ok(_),
);
assert_csv!(
" кон k, tra , кон k ",
Err(CsvError::InvalidHeader(_)),
);
}
#[test]
fn test_csv_parse_line_errors() {
let mut csv =
Csv::new(BufReader::new(" name, age, birth date ".as_bytes()))
.unwrap();
- let row = csv.parse_line(r#""Basic Name","13","2020-01-01""#).unwrap();
-
assert!(matches!(
csv.parse_line(r#""Basic Name,"13","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#" "Basic Name" , "13" , "2020-01-01" "#),
Ok(_)
));
assert!(matches!(
csv.parse_line(r#""Basic Name""13","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#""Basic Name","13","2020-01-01"#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#"Basic Name","13","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#""Basic Name","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#""Basic Name","13","2020-01-01","andmore""#),
Err(CsvError::InvalidRow(_))
));
}
+ #[test]
+ fn test_with_example1() {
+ // Подготвяме данните:
+ let reader = BufReader::new(r#"
+ name, age, birth date
+ "Douglas Adams", "42", "1952-03-11"
+ "Gen Z. Person", "20", "2000-01-01"
+ "Ada Lovelace", "36", "1815-12-10"
+ "#.trim().as_bytes());
+
+ // Конструираме си CSV-то:
+ let mut csv = Csv::new(reader).unwrap();
+
+ // Инсталираме условието -- само редове с възраст над 30 ще останат:
+ csv.apply_selection(|row| {
+ let age = row.get("age").ok_or_else(|| CsvError::InvalidColumn(String::from("age")))?;
+ let age = age.parse::<u32>().map_err(|_| CsvError::ParseError(String::from(age)))?;
+
+ Ok(age > 30)
+ });
+
+ // Итерираме през резултата:
+ assert!(matches!(
+ csv.next(),
+ Some(Ok(row)) if row.get("name").unwrap().as_str() == "Douglas Adams"
+ ));
+ assert!(matches!(
+ csv.next(),
+ Some(Ok(row)) if row.get("name").unwrap().as_str() == "Ada Lovelace"
+ ));
+ assert!(matches!(
+ csv.next(),
+ None
+ ));
+ }
}
// ##### Utils #####
pub use utils::skip_next;
pub use utils::take_until;
pub use utils::take_and_skip;
mod utils {
use std::{
collections::HashSet,
hash::Hash,
};
pub fn skip_next(input: &str, target: char) -> Option<&str> {
input.strip_prefix(target)
}
pub fn take_until(input: &str, target: char) -> (&str, &str) {
input.find(target)
.map(|i| input.split_at(i))
.unwrap_or((input, ""))
}
pub fn take_and_skip(input: &str, target: char) -> Option<(&str, &str)> {
let (f, s) = take_until(input, target);
skip_next(s, target)
.map(|s_skipped| (f, s_skipped))
}
/// Затваряме си очите за [`std::str::split`] и си правим наша версия
/// с (може би) доста лош performance.
/// Но пък уползотворяваме по-горе-дефинираните функции.
pub struct CharSplit<'a> {
/// Ако сеното свърши, значи няма какво повече да го делим.
haystack: Option<&'a str>,
needle: char,
}
impl<'a> CharSplit<'a> {
fn new(input: &'a str, target: char) -> Self {
Self { haystack: Some(input), needle: target }
}
}
impl<'a> Iterator for CharSplit<'a> {
type Item = &'a str;
fn next (&mut self) -> Option<Self::Item> {
if let Some((item, rest)) =
take_and_skip(self.haystack?, self.needle)
{
self.haystack = Some(rest);
Some(item)
} else {
let res = self.haystack;
self.haystack = None;
res
}
}
}
pub trait CharSplittable {
fn char_split<'a>(&'a self, target: char) -> CharSplit<'a>;
}
impl CharSplittable for str {
fn char_split<'a>(&'a self, target: char) -> CharSplit<'a> {
CharSplit::new(self, target)
}
}
pub trait HasDup {
fn has_dup(&self) -> bool;
}
impl<T> HasDup for [T]
where T: Eq + Hash
{
fn has_dup(&self) -> bool {
let mut set = HashSet::new();
self.iter().any(|x| !set.insert(x))
}
}
}
// ##### CSV errors #####
#[derive(Debug)]
pub enum CsvError {
IO(io::Error),
ParseError(String),
InvalidHeader(String),
InvalidRow(String),
InvalidColumn(String),
}
impl From<io::Error> for CsvError {
fn from(e: io::Error) -> Self {
CsvError::IO(e)
}
}
// ##### CSV #####
type Row = HashMap<String, String>;
pub struct Csv<R: BufRead> {
pub columns: Vec<String>,
reader: R,
selection: Option<Box<dyn Fn(&Row) -> Result<bool, CsvError>>>,
}
impl<R: BufRead> Csv<R> {
pub fn new(mut reader: R) -> Result<Self, CsvError> {
let mut line = String::new();
if reader.read_line(&mut line)? == 0 {
return Err(CsvError::InvalidHeader(
"Empty stream - header missing".into()
));
}
let columns =
line.char_split(',')
.map(|x| x.trim().into())
.collect::<Vec<String>>();
if columns.has_dup() {
return Err(CsvError::InvalidHeader(
"Headers have duplicates".into()
));
}
Ok(Csv{ columns, reader, selection: None })
}
pub fn parse_line(&mut self, line: &str) -> Result<Row, CsvError> {
+ // hack, because the reader sometimes returns two lines?!
+ let line = line.trim().split('\n').last().unwrap_or(line);
+
let (delimiters, values): (Vec<_>, Vec<_>) =
line.char_split('"')
.enumerate()
.partition(|(i, _)| i % 2 == 0);
fn check_edge(position: &str, val: Option<&(usize, &str)>)
-> Result<(), CsvError>
{
if let Some((_, x)) = val {
if !x.trim().is_empty() {
return Err(CsvError::InvalidRow(
format!("\
Unquoted value at the {} of the row. \
It should be between double quotes \"like this\".\
", position)
));
}
}
Ok(())
};
check_edge("start", delimiters.first())?;
check_edge("end", delimiters.last())?;
// elements without the first and the last
- if delimiters[1 .. delimiters.len() - 2].into_iter()
- .any(|(_, x)| x.trim() != ",")
+ if delimiters.len() > 2 &&
+ delimiters[1 .. delimiters.len() - 2].into_iter()
+ .any(|(_, x)| x.trim() != ",")
{
return Err(CsvError::InvalidRow("\
Expected a comma, found unquoted value. \
All values are quoted, and there should be commas between them \
\"like\", \"this\".\
".into()
));
}
if delimiters.len() != values.len() + 1 {
return Err(CsvError::InvalidRow(
"Quote not closed".into()
));
}
if values.len() != self.columns.len() {
return Err(CsvError::InvalidRow(
format!(
"Count mismatch. \
There are {} values, but {} columns",
values.len(),
self.columns.len(),
)
));
}
let row =
self.columns.iter()
.cloned()
.zip(
values.into_iter()
.map(|(_, x)| String::from(x))
)
.collect();
Ok(row)
}
pub fn apply_selection<F>(&mut self, callback: F)
where F: Fn(&Row) -> Result<bool, CsvError> + 'static
{
self.selection = Some(Box::new(callback));
}
pub fn write_to<W: Write>(self, mut writer: W) -> Result<(), CsvError> {
let columns = self.columns.clone();
writeln!(writer, "{}", columns.join(", "))?;
self.map(|res| res.and_then(|mut row| {
let row_string =
columns.iter()
.map(|col_name|
row.remove(col_name)
.map_or_else(
|| Err(CsvError::InvalidColumn(col_name.clone())),
|val| Ok(format!("\"{}\"", val))
)
)
.collect::<Result<Vec<_>, _>>()
?.join(", ");
writeln!(writer, "{}", row_string)
.map_err(|e| CsvError::from(e))
}))
.collect::<Result<Vec<()>, CsvError>>()
.map(|_| ())
}
fn next_row(&mut self, line: &mut String)-> Option<Result<Row, CsvError>> {
match self.reader.read_line(line) {
Ok(0) =>
None,
res =>
Some(
res
.map_err(|e| e.into())
.and_then(|_| self.parse_line(line.as_str()))
)
}
}
}
impl<R: BufRead> Iterator for Csv<R> {
type Item = Result<Row, CsvError>;
fn next(&mut self) -> Option<Self::Item> {
let mut line = String::new();
// TODO: move self.selection unwrapping out of the loop
loop {
// here `Some(_)` means 'end the loop, we have a value to return'
// and None means 'continue the loop'
let res: Option<Result<Row, CsvError>> =
self.next_row(&mut line)?
- .and_then(|row| {
+ .and_then(|mut row| {
let is_selected =
self.selection.as_ref()
.map(|select| select(&row))
.unwrap_or(Ok(true))?;
// в nightly `Ok(is_selected.then_some(row))`
if is_selected {
Ok(Some(row))
} else {
+ row.drain();
Ok(None)
}
})
.transpose();
if let Some(_) = res {
return res
}
}
}
}

Деян качи решение на 11.01.2021 16:58 (преди над 4 години)

use std::{
collections::HashMap,
io::{self, BufRead, Write},
};
use utils::{CharSplittable, HasDup};
#[cfg(test)]
mod tests {
use super::*;
use io::BufReader;
#[test]
fn test_skip_next() {
assert_eq!(skip_next("(foo", '('), Some("foo"));
assert_eq!(skip_next("(foo", ')'), None);
assert_eq!(skip_next("", ')'), None);
assert_eq!(skip_next("фуѝбар ", 'ѝ'), None);
assert_eq!(skip_next("ѝфубар ", 'ѝ'), Some("фубар "));
}
#[test]
fn test_take_until() {
assert_eq!(take_until(" foo/bar ", '/'), (" foo", "/bar "));
assert_eq!(take_until("foobar", '/'), ("foobar", ""));
assert_eq!(take_until("/foobar", '/'), ("", "/foobar"));
assert_eq!(take_until("foobar/", '/'), ("foobar", "/"));
assert_eq!(take_until("/", '/'), ("", "/"));
assert_eq!(take_until("//", '/'), ("", "//"));
assert_eq!(take_until("", '/'), ("", ""));
assert_eq!(take_until(" фуѝбар ", 'ѝ'), (" фу", "ѝбар "));
}
#[test]
fn test_take_and_skip() {
assert_eq!(take_and_skip(" foo/bar ", '/'), Some((" foo", "bar ")));
assert_eq!(take_and_skip("foobar", '/'), None);
assert_eq!(take_and_skip("/foobar", '/'), Some(("", "foobar")));
assert_eq!(take_and_skip("foobar/", '/'), Some(("foobar", "")));
assert_eq!(take_and_skip("/", '/'), Some(("", "")));
assert_eq!(take_and_skip("//", '/'), Some(("", "/")));
assert_eq!(take_and_skip("", '/'), None);
assert_eq!(take_and_skip(" фуѝбар ", 'ѝ'), Some((" фу", "бар ")));
}
#[test]
fn test_char_split() {
assert_eq!(
"aa,b,c,гошо, ,,".char_split(',').collect::<Vec<&str>>(),
vec!["aa", "b", "c", "гошо", " ", "", "" ],
);
assert_eq!(
",aa".char_split(',').collect::<Vec<&str>>(),
vec!["", "aa"]
);
assert_eq!(
"".char_split(',').collect::<Vec<&str>>(),
vec![""]
);
}
#[test]
fn test_has_dup() {
assert!(!["kon", "tra", "i", "drugi", "", "бг", "вг"].has_dup());
assert!(!["", "kon"].has_dup());
assert!(["kon", "kon"].has_dup());
assert!(["", ""].has_dup());
}
macro_rules! assert_csv {
($expr:expr, $pat:pat,) => {
assert!(matches!(
Csv::new(BufReader::new($expr.as_bytes())),
$pat
));
}
}
#[test]
fn test_csv_new_error() {
assert_csv!(
"",
Err(CsvError::InvalidHeader(_)),
);
assert_csv!(
" kon , tra , kon ",
Err(CsvError::InvalidHeader(_)),
);
assert_csv!(
" kon , tra , konk ",
Ok(_),
);
assert_csv!(
" kon , tra , kon k ",
Ok(_),
);
assert_csv!(
" кон k, tra , кон k ",
Err(CsvError::InvalidHeader(_)),
);
}
#[test]
fn test_csv_parse_line_errors() {
let mut csv =
Csv::new(BufReader::new(" name, age, birth date ".as_bytes()))
.unwrap();
assert!(matches!(
csv.parse_line(r#""Basic Name,"13","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#" "Basic Name" , "13" , "2020-01-01" "#),
Ok(_)
));
assert!(matches!(
csv.parse_line(r#""Basic Name""13","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#""Basic Name","13","2020-01-01"#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#"Basic Name","13","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#""Basic Name","2020-01-01""#),
Err(CsvError::InvalidRow(_))
));
assert!(matches!(
csv.parse_line(r#""Basic Name","13","2020-01-01","andmore""#),
Err(CsvError::InvalidRow(_))
));
}
#[test]
fn test_with_example1() {
// Подготвяме данните:
let reader = BufReader::new(r#"
name, age, birth date
"Douglas Adams", "42", "1952-03-11"
"Gen Z. Person", "20", "2000-01-01"
"Ada Lovelace", "36", "1815-12-10"
"#.trim().as_bytes());
// Конструираме си CSV-то:
let mut csv = Csv::new(reader).unwrap();
// Инсталираме условието -- само редове с възраст над 30 ще останат:
csv.apply_selection(|row| {
let age = row.get("age").ok_or_else(|| CsvError::InvalidColumn(String::from("age")))?;
let age = age.parse::<u32>().map_err(|_| CsvError::ParseError(String::from(age)))?;
Ok(age > 30)
});
// Итерираме през резултата:
assert!(matches!(
csv.next(),
Some(Ok(row)) if row.get("name").unwrap().as_str() == "Douglas Adams"
));
assert!(matches!(
csv.next(),
Some(Ok(row)) if row.get("name").unwrap().as_str() == "Ada Lovelace"
));
assert!(matches!(
csv.next(),
None
));
}
}
// ##### Utils #####
pub use utils::skip_next;
pub use utils::take_until;
pub use utils::take_and_skip;
mod utils {
use std::{
collections::HashSet,
hash::Hash,
};
pub fn skip_next(input: &str, target: char) -> Option<&str> {
input.strip_prefix(target)
}
pub fn take_until(input: &str, target: char) -> (&str, &str) {
input.find(target)
.map(|i| input.split_at(i))
.unwrap_or((input, ""))
}
pub fn take_and_skip(input: &str, target: char) -> Option<(&str, &str)> {
let (f, s) = take_until(input, target);
skip_next(s, target)
.map(|s_skipped| (f, s_skipped))
}
/// Затваряме си очите за [`std::str::split`] и си правим наша версия
/// с (може би) доста лош performance.
/// Но пък уползотворяваме по-горе-дефинираните функции.
pub struct CharSplit<'a> {
/// Ако сеното свърши, значи няма какво повече да го делим.
haystack: Option<&'a str>,
needle: char,
}
impl<'a> CharSplit<'a> {
fn new(input: &'a str, target: char) -> Self {
Self { haystack: Some(input), needle: target }
}
}
impl<'a> Iterator for CharSplit<'a> {
type Item = &'a str;
fn next (&mut self) -> Option<Self::Item> {
if let Some((item, rest)) =
take_and_skip(self.haystack?, self.needle)
{
self.haystack = Some(rest);
Some(item)
} else {
let res = self.haystack;
self.haystack = None;
res
}
}
}
pub trait CharSplittable {
fn char_split<'a>(&'a self, target: char) -> CharSplit<'a>;
}
impl CharSplittable for str {
fn char_split<'a>(&'a self, target: char) -> CharSplit<'a> {
CharSplit::new(self, target)
}
}
pub trait HasDup {
fn has_dup(&self) -> bool;
}
impl<T> HasDup for [T]
where T: Eq + Hash
{
fn has_dup(&self) -> bool {
let mut set = HashSet::new();
self.iter().any(|x| !set.insert(x))
}
}
}
// ##### CSV errors #####
#[derive(Debug)]
pub enum CsvError {
IO(io::Error),
ParseError(String),
InvalidHeader(String),
InvalidRow(String),
InvalidColumn(String),
}
impl From<io::Error> for CsvError {
fn from(e: io::Error) -> Self {
CsvError::IO(e)
}
}
// ##### CSV #####
type Row = HashMap<String, String>;
pub struct Csv<R: BufRead> {
pub columns: Vec<String>,
reader: R,
selection: Option<Box<dyn Fn(&Row) -> Result<bool, CsvError>>>,
}
impl<R: BufRead> Csv<R> {
pub fn new(mut reader: R) -> Result<Self, CsvError> {
let mut line = String::new();
if reader.read_line(&mut line)? == 0 {
return Err(CsvError::InvalidHeader(
"Empty stream - header missing".into()
));
}
let columns =
line.char_split(',')
.map(|x| x.trim().into())
.collect::<Vec<String>>();
if columns.has_dup() {
return Err(CsvError::InvalidHeader(
"Headers have duplicates".into()
));
}
Ok(Csv{ columns, reader, selection: None })
}
pub fn parse_line(&mut self, line: &str) -> Result<Row, CsvError> {
// hack, because the reader sometimes returns two lines?!
let line = line.trim().split('\n').last().unwrap_or(line);

Ако махна този ред, test_with_example1 гърми, защото се връща CsvError::InvalidRow. Слагайки dbg!(&line) тук се вижда че в момента на гърмене имаме

line == r#"   "Gen Z. Person", "20", "2000-01-01"\n   "Ada Lovelace", "36", "1815-12-10""#

(unescape-нах output-а).

Gen Z. Person е много млад, и затова selection му връща false, но някакси неговия ред не бива консумиран от reader stream-а и си остава там. Така следващото извикване на read_line връща и GenZ реда и следващия ред.

Като видях това си помислих че заради някакъв lazy-ness на итератор, реда се чете, но не се консумира, но виждам къде може да е това. Защо това се случва и как мога да го оправя?

Пълния изход от test_with_example1, като на ред 313 сложа dbg!(&line):

---- tests::test_with_example1 stdout ----
[src/lib.rs:396] &line = "            \"Douglas Adams\", \"42\", \"1952-03-11\"\n"
[src/lib.rs:396] &line = "            \"Gen Z. Person\", \"20\", \"2000-01-01\"\n"
[src/lib.rs:396] &line = "            \"Gen Z. Person\", \"20\", \"2000-01-01\"\n            \"Ada Lovelace\", \"36\", \"1815-12-10\""
thread 'tests::test_with_example1' panicked at 'assertion failed: matches!(csv . next(), Some(Ok(row)) if row . get("name") . unwrap() .
         as_str() == "Ada Lovelace")', src/lib.rs:164:9

Като се промени теста да ползва .unwrap() се вижда че проблемът е че се връща стойност Err(CsvError::InvalidRow("Expected a comma...")) от csv.next().

let (delimiters, values): (Vec<_>, Vec<_>) =
line.char_split('"')
.enumerate()
.partition(|(i, _)| i % 2 == 0);
fn check_edge(position: &str, val: Option<&(usize, &str)>)
-> Result<(), CsvError>
{
if let Some((_, x)) = val {
if !x.trim().is_empty() {
return Err(CsvError::InvalidRow(
format!("\
Unquoted value at the {} of the row. \
It should be between double quotes \"like this\".\
", position)
));
}
}
Ok(())
};
check_edge("start", delimiters.first())?;
check_edge("end", delimiters.last())?;
// elements without the first and the last
if delimiters.len() > 2 &&
delimiters[1 .. delimiters.len() - 2].into_iter()
.any(|(_, x)| x.trim() != ",")
{
return Err(CsvError::InvalidRow("\
Expected a comma, found unquoted value. \
All values are quoted, and there should be commas between them \
\"like\", \"this\".\
".into()
));
}
if delimiters.len() != values.len() + 1 {
return Err(CsvError::InvalidRow(
"Quote not closed".into()
));
}
if values.len() != self.columns.len() {
return Err(CsvError::InvalidRow(
format!(
"Count mismatch. \
There are {} values, but {} columns",
values.len(),
self.columns.len(),
)
));
}
let row =
self.columns.iter()
.cloned()
.zip(
values.into_iter()
.map(|(_, x)| String::from(x))
)
.collect();
Ok(row)
}
pub fn apply_selection<F>(&mut self, callback: F)
where F: Fn(&Row) -> Result<bool, CsvError> + 'static
{
self.selection = Some(Box::new(callback));
}
pub fn write_to<W: Write>(self, mut writer: W) -> Result<(), CsvError> {
let columns = self.columns.clone();
writeln!(writer, "{}", columns.join(", "))?;
self.map(|res| res.and_then(|mut row| {
let row_string =
columns.iter()
.map(|col_name|
row.remove(col_name)
.map_or_else(
|| Err(CsvError::InvalidColumn(col_name.clone())),
|val| Ok(format!("\"{}\"", val))
)
)
.collect::<Result<Vec<_>, _>>()
?.join(", ");
writeln!(writer, "{}", row_string)
.map_err(|e| CsvError::from(e))
}))
.collect::<Result<Vec<()>, CsvError>>()
.map(|_| ())
}
fn next_row(&mut self, line: &mut String)-> Option<Result<Row, CsvError>> {
match self.reader.read_line(line) {
Ok(0) =>
None,
res =>
Some(
res
.map_err(|e| e.into())
.and_then(|_| self.parse_line(line.as_str()))
)
}
}
}
impl<R: BufRead> Iterator for Csv<R> {
type Item = Result<Row, CsvError>;
fn next(&mut self) -> Option<Self::Item> {
let mut line = String::new();
// TODO: move self.selection unwrapping out of the loop
loop {
// here `Some(_)` means 'end the loop, we have a value to return'
// and None means 'continue the loop'
let res: Option<Result<Row, CsvError>> =
self.next_row(&mut line)?
- .and_then(|mut row| {
+ .and_then(|row| {
let is_selected =
self.selection.as_ref()
.map(|select| select(&row))
.unwrap_or(Ok(true))?;
// в nightly `Ok(is_selected.then_some(row))`
if is_selected {
Ok(Some(row))
} else {
- row.drain();
Ok(None)
}
})
.transpose();
if let Some(_) = res {
return res
}
}
}
}