From 9ad8e3cc8eaec92169d334039fe0fd00807a72af Mon Sep 17 00:00:00 2001 From: Tony Lian <1040424979@qq.com> Date: Sun, 10 Mar 2019 14:27:30 -0700 Subject: [PATCH 1/5] Add writing support --- src/C_interface.jl | 28 +++++++++++++++++++++++ src/ReadStat.jl | 55 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/C_interface.jl b/src/C_interface.jl index aee677e..926ffd8 100644 --- a/src/C_interface.jl +++ b/src/C_interface.jl @@ -69,4 +69,32 @@ end function readstat_parse(filename::String, type::Val{:sas7bdat}, parser::Ptr{Nothing}, ds::ReadStatDataFrame) return ccall((:readstat_parse_sas7bdat, libreadstat), Int, (Ptr{Nothing}, Cstring, Any), parser, string(filename), ds) +end + +function readstat_begin_row(writer) + return ccall((:readstat_begin_row), Int, (Ptr{Nothing},), writer) +end + +function readstat_end_row(writer) + return ccall((:readstat_end_row), Int, (Ptr{Nothing},), writer) +end + +function readstat_begin_writing(writer, filetype::Val{:dta}, io, row_count) + return ccall((:readstat_begin_writing_dta, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, io, Cint(row_count)) +end + +function readstat_begin_writing(writer, filetype::Val{:sav}, io, row_count) + return ccall((:readstat_begin_writing_sav, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, io, Cint(row_count)) +end + +function readstat_begin_writing(writer, filetype::Val{:por}, io, row_count) + return ccall((:readstat_begin_writing_por, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, io, Cint(row_count)) +end + +function readstat_begin_writing(writer, filetype::Val{:sas7bdat}, io, row_count) + return ccall((:readstat_begin_writing_sas7bdat, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, io, Cint(row_count)) +end + +function readstat_insert_double_value(writer, variable, item) + return ccall((:readstat_insert_double_value, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Any), writer, variable, item) end \ No newline at end of file diff --git a/src/ReadStat.jl b/src/ReadStat.jl index a56931f..250997f 100644 --- a/src/ReadStat.jl +++ b/src/ReadStat.jl @@ -248,7 +248,7 @@ function Parser() ccall((:readstat_set_value_handler, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}), parser, val_fxn) ccall((:readstat_set_value_label_handler, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}), parser, label_fxn) return parser -end +end function parse_data_file!(ds::ReadStatDataFrame, parser::Ptr{Nothing}, filename::AbstractString, filetype::Val) retval = readstat_parse(filename, filetype, parser, ds) @@ -256,9 +256,62 @@ function parse_data_file!(ds::ReadStatDataFrame, parser::Ptr{Nothing}, filename: retval == 0 || error("Error parsing $filename: $retval") end + +function handle_write!() + +end + +function Writer(source; file_label="File Label") + writer = ccall((:readstat_writer_init, libreadstat), Ptr{Nothing}, ()) + write_bytes = @cfunction(handle_write!, Cint, (Cint, Cint, Ptr{ReadStatDataFrame})) + ccall((:readstat_set_data_writer, libreadstat), Int, (Ptr{Nothing}, ), parser, write_bytes) + ccall((:readstat_writer_set_file_label, libreadstat, Cvoid, (Ptr{Nothing}, Cstring), writer, file_label) + return writer +end + +function write_data_file(filename::AbstractString, filetype::Val, io::IO, source) + writer = Writer(source) + fields = fieldnames(eltype(source)) + variables_array = [] + + for field in fields: + variable = ccall((:readstat_add_variable, libreadstat), Ptr{Nothing}, (Ptr{Nothing}, Cstring, Cint, Cint), writer, String(field), READSTAT_TYPE_DOUBLE, 0); # TODO: know width + readstat_variable_set_label(variable, String(field)) + variables_array.push!(variable) + + variables = NamedTuple{(fields...,)}((variables...,)) # generate a NamedTuple for variables + + + if Base.IteratorSize(source) == Base.HasLength(): # TODO: what about HasShape + row_count = length(q) + else: #fallback + row_count = 0 + for _ in source + row_count += 1 + end + + readstat_begin_writing(writer, filetype, io, row_count) + + for row in source + readstat_begin_row(writer) + for field in fields + readstat_insert_double_value(writer, variables[field], row[field]) # TODO: more than double + end + readstat_end_row(writer); + end + + ccall((:readstat_end_writing, libreadstat), Int, (Ptr{Nothing}), writer) + ccall((:readstat_writer_free, libreadstat), Cvoid, (Ptr{Nothing}), writer) +end + read_dta(filename::AbstractString) = read_data_file(filename, Val(:dta)) read_sav(filename::AbstractString) = read_data_file(filename, Val(:sav)) read_por(filename::AbstractString) = read_data_file(filename, Val(:por)) read_sas7bdat(filename::AbstractString) = read_data_file(filename, Val(:sas7bdat)) +write_dta(filename::AbstractString, io::IO, source) = write_data_file(filename, Val(:dta), io, source) +write_sav(filename::AbstractString, io::IO, source) = write_data_file(filename, Val(:sav), io, source) +write_por(filename::AbstractString, io::IO, source) = write_data_file(filename, Val(:por), io, source) +write_sas7bdat(filename::AbstractString, io::IO, source) = write_data_file(filename, Val(:sas7bdat), io, source) + end #module ReadStat From 1bbaef1f2ab8d7608a461dfa53714087ff542736 Mon Sep 17 00:00:00 2001 From: Tony Lian <1040424979@qq.com> Date: Tue, 19 Mar 2019 23:00:30 -0700 Subject: [PATCH 2/5] Add libreadstat into readstat_begin_row and readstat_end_row, pass pointer into readstat_begin_writing --- src/C_interface.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/C_interface.jl b/src/C_interface.jl index 926ffd8..c9bc705 100644 --- a/src/C_interface.jl +++ b/src/C_interface.jl @@ -72,27 +72,27 @@ function readstat_parse(filename::String, type::Val{:sas7bdat}, parser::Ptr{Noth end function readstat_begin_row(writer) - return ccall((:readstat_begin_row), Int, (Ptr{Nothing},), writer) + return ccall((:readstat_begin_row, libreadstat), Int, (Ptr{Nothing},), writer) end function readstat_end_row(writer) - return ccall((:readstat_end_row), Int, (Ptr{Nothing},), writer) + return ccall((:readstat_end_row, libreadstat), Int, (Ptr{Nothing},), writer) end function readstat_begin_writing(writer, filetype::Val{:dta}, io, row_count) - return ccall((:readstat_begin_writing_dta, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, io, Cint(row_count)) + return ccall((:readstat_begin_writing_dta, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, pointer_from_objref(io), Cint(row_count)) end function readstat_begin_writing(writer, filetype::Val{:sav}, io, row_count) - return ccall((:readstat_begin_writing_sav, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, io, Cint(row_count)) + return ccall((:readstat_begin_writing_sav, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, pointer_from_objref(io), Cint(row_count)) end function readstat_begin_writing(writer, filetype::Val{:por}, io, row_count) - return ccall((:readstat_begin_writing_por, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, io, Cint(row_count)) + return ccall((:readstat_begin_writing_por, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, pointer_from_objref(io), Cint(row_count)) end function readstat_begin_writing(writer, filetype::Val{:sas7bdat}, io, row_count) - return ccall((:readstat_begin_writing_sas7bdat, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, io, Cint(row_count)) + return ccall((:readstat_begin_writing_sas7bdat, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}, Cint), writer, pointer_from_objref(io), Cint(row_count)) end function readstat_insert_double_value(writer, variable, item) From 150e1cde46e1a57b5cd0716d34e638f09b13d542 Mon Sep 17 00:00:00 2001 From: Tony Lian <1040424979@qq.com> Date: Tue, 19 Mar 2019 23:00:51 -0700 Subject: [PATCH 3/5] Add handle_write and fix several syntax errors --- src/ReadStat.jl | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/src/ReadStat.jl b/src/ReadStat.jl index 250997f..38aa5b7 100644 --- a/src/ReadStat.jl +++ b/src/ReadStat.jl @@ -256,16 +256,17 @@ function parse_data_file!(ds::ReadStatDataFrame, parser::Ptr{Nothing}, filename: retval == 0 || error("Error parsing $filename: $retval") end - -function handle_write!() - -end +function handle_write!(data::Ptr, len::Int, ctx::Ptr) + io = unsafe_pointer_to_objref(ctx) # restore io + actual_data = unsafe_wrap(Array{Any}, data, len) # we may want to specify the type later + write(io, actual_data) + end function Writer(source; file_label="File Label") writer = ccall((:readstat_writer_init, libreadstat), Ptr{Nothing}, ()) write_bytes = @cfunction(handle_write!, Cint, (Cint, Cint, Ptr{ReadStatDataFrame})) - ccall((:readstat_set_data_writer, libreadstat), Int, (Ptr{Nothing}, ), parser, write_bytes) - ccall((:readstat_writer_set_file_label, libreadstat, Cvoid, (Ptr{Nothing}, Cstring), writer, file_label) + ccall((:readstat_set_data_writer, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}), parser, write_bytes) + ccall((:readstat_writer_set_file_label, libreadstat), Cvoid, (Ptr{Nothing}, Cstring), writer, file_label) return writer end @@ -274,21 +275,25 @@ function write_data_file(filename::AbstractString, filetype::Val, io::IO, source fields = fieldnames(eltype(source)) variables_array = [] - for field in fields: - variable = ccall((:readstat_add_variable, libreadstat), Ptr{Nothing}, (Ptr{Nothing}, Cstring, Cint, Cint), writer, String(field), READSTAT_TYPE_DOUBLE, 0); # TODO: know width + for field in fields + variable = ccall((:readstat_add_variable, libreadstat), + Ptr{Nothing}, (Ptr{Nothing}, Cstring, Cint, Cint), + writer, String(field), READSTAT_TYPE_DOUBLE, Cint(0)) # TODO: know width readstat_variable_set_label(variable, String(field)) variables_array.push!(variable) + end variables = NamedTuple{(fields...,)}((variables...,)) # generate a NamedTuple for variables - if Base.IteratorSize(source) == Base.HasLength(): # TODO: what about HasShape + if Base.IteratorSize(source) == Base.HasLength() # TODO: what about HasShape row_count = length(q) - else: #fallback + else #fallback row_count = 0 for _ in source row_count += 1 end + end readstat_begin_writing(writer, filetype, io, row_count) @@ -300,8 +305,8 @@ function write_data_file(filename::AbstractString, filetype::Val, io::IO, source readstat_end_row(writer); end - ccall((:readstat_end_writing, libreadstat), Int, (Ptr{Nothing}), writer) - ccall((:readstat_writer_free, libreadstat), Cvoid, (Ptr{Nothing}), writer) + ccall((:readstat_end_writing, libreadstat), Int, (Ptr{Nothing},), writer) + ccall((:readstat_writer_free, libreadstat), Cvoid, (Ptr{Nothing},), writer) end read_dta(filename::AbstractString) = read_data_file(filename, Val(:dta)) From fcfc6fe418cf3e0a9fc38214aee9d420191334c4 Mon Sep 17 00:00:00 2001 From: Tony Lian <1040424979@qq.com> Date: Sat, 23 Mar 2019 20:49:43 -0700 Subject: [PATCH 4/5] Update writing --- src/ReadStat.jl | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/ReadStat.jl b/src/ReadStat.jl index 38aa5b7..963e880 100644 --- a/src/ReadStat.jl +++ b/src/ReadStat.jl @@ -256,21 +256,29 @@ function parse_data_file!(ds::ReadStatDataFrame, parser::Ptr{Nothing}, filename: retval == 0 || error("Error parsing $filename: $retval") end -function handle_write!(data::Ptr, len::Int, ctx::Ptr) +function handle_write!(data::Ptr{UInt8}, len::Cint, ctx::Ptr) io = unsafe_pointer_to_objref(ctx) # restore io - actual_data = unsafe_wrap(Array{Any}, data, len) # we may want to specify the type later + actual_data = unsafe_wrap(Array{UInt8}, data, (len, )) # we may want to specify the type later write(io, actual_data) + return len end function Writer(source; file_label="File Label") writer = ccall((:readstat_writer_init, libreadstat), Ptr{Nothing}, ()) - write_bytes = @cfunction(handle_write!, Cint, (Cint, Cint, Ptr{ReadStatDataFrame})) - ccall((:readstat_set_data_writer, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}), parser, write_bytes) + write_bytes = @cfunction(handle_write!, Cint, (Ptr{UInt8}, Cint, Ptr{Nothing})) + ccall((:readstat_set_data_writer, libreadstat), Int, (Ptr{Nothing}, Ptr{Nothing}), writer, write_bytes) ccall((:readstat_writer_set_file_label, libreadstat), Cvoid, (Ptr{Nothing}, Cstring), writer, file_label) return writer end -function write_data_file(filename::AbstractString, filetype::Val, io::IO, source) +function write_data_file(filename::AbstractString, filetype::Val, source) + io = open(filename, "w") + write_data_file(filetype::Val, io, source) + close(io) +end + + +function write_data_file(filetype::Val, io::IO, source) writer = Writer(source) fields = fieldnames(eltype(source)) variables_array = [] @@ -279,15 +287,14 @@ function write_data_file(filename::AbstractString, filetype::Val, io::IO, source variable = ccall((:readstat_add_variable, libreadstat), Ptr{Nothing}, (Ptr{Nothing}, Cstring, Cint, Cint), writer, String(field), READSTAT_TYPE_DOUBLE, Cint(0)) # TODO: know width - readstat_variable_set_label(variable, String(field)) - variables_array.push!(variable) + # readstat_variable_set_label(variable, String(field)) TODO: label for a variable + push!(variables_array, variable) end - variables = NamedTuple{(fields...,)}((variables...,)) # generate a NamedTuple for variables + variables = NamedTuple{(fields...,)}((variables_array...,)) # generate a NamedTuple for variables - if Base.IteratorSize(source) == Base.HasLength() # TODO: what about HasShape - row_count = length(q) + row_count = length(source) else #fallback row_count = 0 for _ in source From 9f5c76e65c26761cc6a352ad825552e1832787a8 Mon Sep 17 00:00:00 2001 From: Tony Lian <1040424979@qq.com> Date: Sun, 24 Mar 2019 16:06:39 -0700 Subject: [PATCH 5/5] Change functions for writing and export write functions --- src/ReadStat.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ReadStat.jl b/src/ReadStat.jl index 963e880..ff4d540 100644 --- a/src/ReadStat.jl +++ b/src/ReadStat.jl @@ -18,7 +18,7 @@ include(depsjl_path) using DataValues: DataValueVector using Dates -export ReadStatDataFrame, read_dta, read_sav, read_por, read_sas7bdat +export ReadStatDataFrame, read_dta, read_sav, read_por, read_sas7bdat, write_dta, write_sav, write_por, write_sas7bdat ############################################################################## ## @@ -321,9 +321,9 @@ read_sav(filename::AbstractString) = read_data_file(filename, Val(:sav)) read_por(filename::AbstractString) = read_data_file(filename, Val(:por)) read_sas7bdat(filename::AbstractString) = read_data_file(filename, Val(:sas7bdat)) -write_dta(filename::AbstractString, io::IO, source) = write_data_file(filename, Val(:dta), io, source) -write_sav(filename::AbstractString, io::IO, source) = write_data_file(filename, Val(:sav), io, source) -write_por(filename::AbstractString, io::IO, source) = write_data_file(filename, Val(:por), io, source) -write_sas7bdat(filename::AbstractString, io::IO, source) = write_data_file(filename, Val(:sas7bdat), io, source) +write_dta(filename::AbstractString, source) = write_data_file(filename, Val(:dta), source) +write_sav(filename::AbstractString, source) = write_data_file(filename, Val(:sav), source) +write_por(filename::AbstractString, source) = write_data_file(filename, Val(:por), source) +write_sas7bdat(filename::AbstractString, source) = write_data_file(filename, Val(:sas7bdat), source) end #module ReadStat