From 37dea6c5bdd2927cf17f188770993d6987f1efbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?= Date: Sun, 7 Dec 2025 15:08:02 +0100 Subject: [PATCH] * Don't return a parsed value from `parsecustom!` since that is type unstable * Don't attempt to grow the result buffer if we have enough capacity * v0.2.1 --- Project.toml | 2 +- src/populate_result_buffer.jl | 10 ++++++---- src/result_buffer.jl | 1 + 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index f5ae221..812b0b0 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ChunkedCSV" uuid = "c0d0730e-6432-44b2-a51e-6ec55e1c8b99" authors = ["Tomáš Drvoštěp "] -version = "0.2.0" +version = "0.2.1" [deps] ChunkedBase = "a380dd43-0ebf-4429-88d6-6f06ea920732" diff --git a/src/populate_result_buffer.jl b/src/populate_result_buffer.jl index 2a80fdb..8f5c9d9 100644 --- a/src/populate_result_buffer.jl +++ b/src/populate_result_buffer.jl @@ -35,6 +35,7 @@ end @inline function parsecustom!(::Type{customtypes}, row_bytes, pos, len, col_idx, cols, options, _type) where {customtypes} if @generated block = Expr(:block) + pushfirst!(block.args, :(error(lazy"Unreachable: type not matched"))) for i = 1:fieldcount(customtypes) T = fieldtype(customtypes, i) pushfirst!(block.args, quote @@ -42,7 +43,7 @@ end res = Parsers.xparse($T, row_bytes, pos, len, options)::Parsers.Result{$T} (val, tlen, code) = res.val, res.tlen, res.code unsafe_push!(cols[col_idx]::BufferedVector{$T}, val) - return val, tlen, code + return tlen, code end end) end @@ -55,7 +56,7 @@ end res = Parsers.xparse(_type, row_bytes, pos, len, options)::Parsers.Result{_type} (val, tlen, code) = res.val, res.tlen, res.code unsafe_push!(cols[col_idx]::BufferedVector{_type}, val) - return val, tlen, code + return tlen, code end end @@ -74,7 +75,8 @@ function ChunkedBase.populate_result_buffer!( errored_idx = 1 options = parsing_ctx.options - Base.ensureroom(result_buf, ceil(Int, length(newlines_segment) * 1.01)) + # If we need to grow the buffer, we add a bit of extra room to avoid having to reallocate too often + capacity(result_buf) < (length(newlines_segment)-1) && Base.ensureroom(result_buf, ceil(Int, length(newlines_segment) * 1.01)) ignorerepeated = options.ignorerepeated::Bool ignoreemptyrows = options.ignoreemptylines::Bool @@ -163,7 +165,7 @@ function ChunkedBase.populate_result_buffer!( (val, tlen, code) = res.val, res.tlen, res.code unsafe_push!(cols[col_idx]::BufferedVector{Parsers.PosLen31}, Parsers.PosLen31(prev_newline+val.pos, val.len, val.missingvalue, val.escapedvalue)) else - (val, tlen, code) = parsecustom!(CT, row_bytes, pos, len, col_idx, cols, options, schema[col_idx]) + (tlen, code) = parsecustom!(CT, row_bytes, pos, len, col_idx, cols, options, schema[col_idx]) end if Parsers.sentinel(code) row_status |= RowStatus.MissingValues diff --git a/src/result_buffer.jl b/src/result_buffer.jl index 35e1872..c55f8c0 100644 --- a/src/result_buffer.jl +++ b/src/result_buffer.jl @@ -220,6 +220,7 @@ function _push_buffers!(::Type{T}, out, i, n) where {T} end Base.length(buf::TaskResultBuffer) = length(buf.row_statuses) +capacity(buf::TaskResultBuffer) = length(buf.row_statuses.elements) function Base.empty!(buf::TaskResultBuffer) foreach(empty!, buf.cols)