Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 32 additions & 24 deletions src/clojure_csv/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ and quotes. The main functions are parse-csv and write-csv."}
"Given a reader, returns true if it is currently pointing at a character that
is the same as quote-char. The reader position will not be changed when the
function returns."
[^Reader reader ^long quote-char]
[^Reader reader ^long quote-char ^long escape-char]
(.mark reader 2)
(let [result (and (== quote-char (.read reader))
(let [result (and (== escape-char (.read reader))
(== quote-char (.read reader)))]
(.reset reader)
result))
Expand All @@ -125,7 +125,7 @@ and quotes. The main functions are parse-csv and write-csv."}
"Given a reader that is queued up to the beginning of a quoted field,
reads the field and returns it as a string. The reader will be left at the
first character past the end of the field."
[^Reader reader ^long delimiter ^long quote-char strict]
[^Reader reader delimiter quote-char escape-char strict]
(let [field-str (StringBuilder.)]
(.skip reader 1) ;; Discard the quote that starts the field.
(loop [c (reader-peek reader)]
Expand All @@ -137,7 +137,7 @@ and quotes. The main functions are parse-csv and write-csv."}
(.toString field-str))
;; If we see two quote chars in a row, only add one of them to the
;; output, skip both of the characters, and continue.
(escaped-quote-at-reader-pos? reader quote-char)
(escaped-quote-at-reader-pos? reader quote-char escape-char)
(do (.appendCodePoint field-str quote-char)
(.skip reader 2)
(recur (reader-peek reader)))
Expand All @@ -154,47 +154,48 @@ and quotes. The main functions are parse-csv and write-csv."}
"Takes a Reader as input and returns the first row of the CSV file,
parsed into cells (an array of strings). The reader passed in will be
positioned for the start of the next line."
[^Reader csv-reader delimiter quote-char strict end-of-line]
[^Reader csv-reader {:keys [delimiter quote-char escape-char strict end-of-line]}]
;; We build the last-field variable, and then add it to fields when we
;; encounter some event (delimiter/eol/eof) that signals the end of
;; the field. This lets us correctly handle input with empty fields, like
;; ",,,".
(let [delimiter (int delimiter)
quote-char (int quote-char)]
quote-char (int quote-char)
escape-char (int escape-char)]
(loop [fields (transient []) ;; Will return this as the vector of fields.
last-field ""
look-ahead (reader-peek csv-reader)]
(cond (== -1 look-ahead)
(persistent! (conj! fields last-field))

(== look-ahead (int delimiter))
(do (.skip csv-reader 1)
(recur (conj! fields last-field) "" (reader-peek csv-reader)))

(eol-at-reader-pos? csv-reader end-of-line)
(do (skip-past-eol csv-reader end-of-line)
(persistent! (conj! fields last-field)))

(== look-ahead (int quote-char))
(recur fields
(read-quoted-field csv-reader delimiter quote-char strict)
(read-quoted-field csv-reader delimiter quote-char escape-char strict)
(reader-peek csv-reader))

(= "" last-field) ;; Must be at beginning or just after comma.
(recur fields
(read-unquoted-field csv-reader delimiter quote-char
strict end-of-line)
(reader-peek csv-reader))

:else
(throw (Exception. (str "Unexpected character found: " look-ahead)))))))

(defn- parse-csv-with-options
([csv-reader {:keys [delimiter quote-char strict end-of-line]}]
(parse-csv-with-options csv-reader delimiter quote-char
strict end-of-line))
([csv-reader delimiter quote-char strict end-of-line]
(lazy-seq
(when (not (== -1 (reader-peek csv-reader)))
(let [row (parse-csv-line csv-reader delimiter quote-char
strict end-of-line)]
(cons row (parse-csv-with-options csv-reader delimiter quote-char
strict end-of-line)))))))
[csv-reader opts]
(lazy-seq
(when (not (== -1 (reader-peek csv-reader)))
(cons (parse-csv-line csv-reader opts)
(parse-csv-with-options csv-reader opts)))))

(defn parse-csv
"Takes a CSV as a string or Reader and returns a seq of the parsed CSV rows,
Expand All @@ -209,14 +210,17 @@ and quotes. The main functions are parse-csv and write-csv."}
\\n and \\r\\n are both accepted. Default value: nil
:quote-char - A character that is used to begin and end a quoted cell.
Default value: \\\"
:escape-char - A character that is used to escape quoting.
Default value: :quote-char
:strict - If this variable is true, the parser will throw an
exception on parse errors that are recoverable but
not to spec or otherwise nonsensical. Default value: false"
([csv & {:as opts}]
(let [csv-reader (if (string? csv) (StringReader. csv) csv)]
(parse-csv-with-options csv-reader (merge {:strict false
:delimiter \,
:quote-char \"}
:quote-char \"
:escape-char (or (:quote-char opts) \")}
opts)))))

;;
Expand All @@ -236,27 +240,28 @@ and quotes. The main functions are parse-csv and write-csv."}
"Given a character, returns the escaped version, whether that is the same
as the original character or a replacement. The return is a string or a
character, but it all gets passed into str anyways."
[chr delimiter quote-char]
(if (= quote-char chr) (str quote-char quote-char) chr))
[chr delimiter quote-char escape-char]
(if (= quote-char chr) (str escape-char quote-char) chr))

(defn- quote-and-escape
"Given a string (cell), returns a new string that has any necessary quoting
and escaping."
[cell delimiter quote-char force-quote]
[cell delimiter quote-char escape-char force-quote]
(if (or force-quote (needs-quote? cell delimiter quote-char))
(str quote-char
(apply str (map #(escape % delimiter quote-char)
(apply str (map #(escape % delimiter quote-char escape-char)
cell))
quote-char)
cell))

(defn- quote-and-escape-row
"Given a row (vector of strings), quotes and escapes any cells where that
is necessary and then joins all the text into a string for that entire row."
[row delimiter quote-char force-quote]
[row delimiter quote-char escape-char force-quote]
(string/join delimiter (map #(quote-and-escape %
delimiter
quote-char
escape-char
force-quote)
row)))

Expand All @@ -271,15 +276,18 @@ and quotes. The main functions are parse-csv and write-csv."}
for writing CSV files. Default value: \\n
:quote-char - A character that is used to begin and end a quoted cell.
Default value: \\\"
:escape-char - A character that is used to escape quoting.
Default value: :quote-char
:force-quote - Forces every cell to be quoted (useful for Excel interop)
Default value: false"
[table & {:keys [delimiter quote-char end-of-line force-quote]
[table & {:keys [delimiter quote-char escape-char end-of-line force-quote]
:or {delimiter \, quote-char \" end-of-line "\n"
force-quote false}}]
(loop [csv-string (StringBuilder.)
quoted-table (map #(quote-and-escape-row %
(str delimiter)
quote-char
(or escape-char quote-char)
force-quote)
table)]
(if (empty? quoted-table)
Expand Down
18 changes: 18 additions & 0 deletions test/clojure_csv/test/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@
(is (= "a,b\"c,d\n"
(write-csv [["a", "b\"c", "d"]] :quote-char \|))))

(deftest write-alternate-escape-char
(is (= "quoted:,\"escaped\"\"quotes\"\"\"\n"
(write-csv [["quoted:" "escaped\"quotes\""]])))
(is (= "quoted:,\"escaped\"\"quotes\"\"\"\n"
(write-csv [["quoted:" "escaped\"quotes\""]] :escape-char \")))
(is (= "quoted:,\"escaped\\\"quotes\\\"\"\n"
(write-csv [["quoted:" "escaped\"quotes\""]] :escape-char \\))))

(deftest read-alternate-escape-char
(is (= [["quoted:" "escaped\"quotes\""]]
(parse-csv "quoted:,\"escaped\"\"quotes\"\"\"\n")))
(is (= [["quoted:" "escaped\"quotes\""]]
(parse-csv "quoted:,\"escaped\"\"quotes\"\"\"\n" :escape-char \")))
(is (= [["quoted:" "escaped\"quotes\""]]
(parse-csv "quoted:,\"escaped\\\"quotes\\\"\"\n" :escape-char \\)))
(is (= [["\"foo\"" "\"bar\""]]
(parse-csv "\"\\\"foo\\\"\",\"\\\"bar\\\"\"\n" :escape-char \\))))

(deftest strictness
(is (thrown? Exception (dorun (parse-csv "a,b,c,\"d" :strict true))))
(is (thrown? Exception (dorun (parse-csv "a,b,c,d\"e" :strict true))))
Expand Down