diff --git a/DESCRIPTION b/DESCRIPTION index 6dd2882..fd6d5c4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: readspss Type: Package Title: Importing and Exporting SPSS Files -Version: 0.18.1 +Version: 0.19 Authors@R: c( person("Jan Marvin", "Garbuszus", email = "jan.garbuszus@ruhr-uni-bochum.de", role = c("aut", "cre")), @@ -22,18 +22,17 @@ LazyData: TRUE Language: en-US Imports: Rcpp (>= 0.11.2) -Suggests: - covr, +Suggests: datasets, foreign, knitr, rmarkdown, roxygen2, testthat -LinkingTo: Rcpp, BH +LinkingTo: Rcpp ByteCompile: yes SystemRequirements: OpenSSL >= 1.0.2 VignetteBuilder: knitr Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 13babc2..9c31ea9 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -49,7 +49,7 @@ BEGIN_RCPP END_RCPP } // readpor -List readpor(const char * filePath, const bool debug, std::string encStr, bool override); +Rcpp::List readpor(const char * filePath, const bool debug, std::string encStr, bool override); RcppExport SEXP _readspss_readpor(SEXP filePathSEXP, SEXP debugSEXP, SEXP encStrSEXP, SEXP overrideSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; @@ -63,7 +63,7 @@ BEGIN_RCPP END_RCPP } // readsav -List readsav(const char * filePath, const bool debug, std::string encStr, std::string const ownEnc); +Rcpp::List readsav(const char * filePath, const bool debug, std::string encStr, std::string const ownEnc); RcppExport SEXP _readspss_readsav(SEXP filePathSEXP, SEXP debugSEXP, SEXP encStrSEXP, SEXP ownEncSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; diff --git a/src/boost_split.cpp b/src/boost_split.cpp index 3810c8f..bc88e2a 100644 --- a/src/boost_split.cpp +++ b/src/boost_split.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Jan Marvin Garbuszus + * Copyright (C) 2018-2025 Jan Marvin Garbuszus * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -15,11 +15,7 @@ * with this program. If not, see . */ -#include -#include - -#include -#include +#include "spss.h" //' split character vector at "=" //' @@ -32,8 +28,7 @@ Rcpp::CharacterVector boost_split(std::string val_s) { std::vector vec_r; - boost::split(vec_r, val_s, - boost::is_any_of("="), boost::token_compress_on); + vec_r = split(val_s, "=", true); return(Rcpp::wrap(vec_r)); } diff --git a/src/fast_factor.cpp b/src/fast_factor.cpp index cfcb003..ac35fbb 100644 --- a/src/fast_factor.cpp +++ b/src/fast_factor.cpp @@ -1,11 +1,10 @@ #include -using namespace Rcpp; template -IntegerVector fast_factor_template( const Vector& x, - const Vector& y) { - IntegerVector out = match(x, y); +Rcpp::IntegerVector fast_factor_template( const Rcpp::Vector& x, + const Rcpp::Vector& y) { + Rcpp::IntegerVector out = match(x, y); out.attr("levels") = y.attr("names"); out.attr("class") = "factor"; diff --git a/src/read_sav_encrypted.cpp b/src/read_sav_encrypted.cpp index 9cb87e7..45fa12f 100644 --- a/src/read_sav_encrypted.cpp +++ b/src/read_sav_encrypted.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Jan Marvin Garbuszus + * Copyright (C) 2018-2025 Jan Marvin Garbuszus * Copyright (c) 2013 Ben Pfaff * * This program is free software; you can redistribute it and/or modify it @@ -22,12 +22,9 @@ #include #include -#include - #include #include -using namespace Rcpp; #include "spss.h" #include "read_sav_encrypted.h" @@ -52,12 +49,10 @@ int encryptfile (const char * filePath, std::string &outpath, std::string pass) std::string fileheader(36, '\0'); fileheader = readstring(fileheader, sav); - if (!boost::regex_search(fileheader, boost::regex("ENCRYPTEDSAV"))) { - stop("The file header indicates that it is not an SPSS sav file."); + if (fileheader.find("ENCRYPTEDSAV") == std::string::npos) { + Rcpp::stop("The file header indicates that it is not an SPSS sav file."); } - - /* Read first ciphertext block and use it to verify the password. Try the password as plaintext first, then try decoding it. */ @@ -125,7 +120,7 @@ Rcpp::List readencrypted(const char * filePath, const bool debug, // remove encrypted sav-file std::remove(outPath.c_str()); } else { - stop("stopping"); + Rcpp::stop("stopping"); } return df; diff --git a/src/read_sav_known_n.cpp b/src/read_sav_known_n.cpp index 29e85fa..51d989e 100644 --- a/src/read_sav_known_n.cpp +++ b/src/read_sav_known_n.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Jan Marvin Garbuszus + * Copyright (C) 2018-2025 Jan Marvin Garbuszus * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -19,8 +19,6 @@ #include #include -#include - #include "spss.h" Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav, @@ -165,8 +163,7 @@ Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav, if (res_i == res_kk-1) { // trim additional whitespaces to the right - start = boost::regex_replace(start, - boost::regex(" +$"), "$1"); + rtrim(start); Rcpp::as(df[kk])[nn] = start; @@ -250,8 +247,7 @@ Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav, if (res_i == res_kk-1) { // trim additional whitespaces to the right - start = boost::regex_replace(start, - boost::regex(" +$"), "$1"); + rtrim(start); Rcpp::as(df[kk])[nn] = start; @@ -294,8 +290,7 @@ Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav, if (res_i == res_kk-1) { // trim additional whitespaces to the right - start = boost::regex_replace(start, - boost::regex(" +$"), "$1"); + rtrim(start); Rcpp::as(df[kk])[nn] = start; @@ -418,8 +413,7 @@ Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav, val_s.erase(type, std::string::npos); // trim additional whitespaces - val_s = boost::regex_replace(val_s, - boost::regex("^ +| +$"), "$1"); + trim(val_s); // Rcpp::Rcout << val_s << std::endl; Rcpp::as(df[kk])[nn] = val_s; diff --git a/src/read_sav_uncompress.cpp b/src/read_sav_uncompress.cpp index 7b3fd2b..72a07f2 100644 --- a/src/read_sav_uncompress.cpp +++ b/src/read_sav_uncompress.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Jan Marvin Garbuszus + * Copyright (C) 2018-2025 Jan Marvin Garbuszus * * zlib header information by Evan Miller * diff --git a/src/readpor.cpp b/src/readpor.cpp index 137b84d..9007243 100644 --- a/src/readpor.cpp +++ b/src/readpor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Jan Marvin Garbuszus + * Copyright (C) 2018-2025 Jan Marvin Garbuszus * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -21,10 +21,6 @@ #include #include -#include - -using namespace Rcpp; - #include "spss.h" //' Reads the binary SPSS file @@ -37,7 +33,7 @@ using namespace Rcpp; //' @keywords internal //' @noRd // [[Rcpp::export]] -List readpor(const char * filePath, const bool debug, std::string encStr, +Rcpp::List readpor(const char * filePath, const bool debug, std::string encStr, bool override) { @@ -64,7 +60,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, } } else { - stop ("No file was read."); + Rcpp::stop ("No file was read."); } por_file.close(); @@ -110,12 +106,12 @@ List readpor(const char * filePath, const bool debug, std::string encStr, std::string spss (200, '\0'); spss = readstring(spss, por); - if (!override){ - if (!boost::regex_search(spss, boost::regex("ASCII SPSS PORT FILE")) && - !boost::regex_search(spss, boost::regex("EBCDIC SPSS PORT FILE"))) { - stop("The file header indicates that it is not an SPSS por file. " - "Use 'override = TRUE' to ignore this check."); - } + if (!override) { + if (spss.find("ASCII SPSS PORT FILE") == std::string::npos && + spss.find("EBCDIC SPSS PORT FILE") == std::string::npos) { + Rcpp::stop("The file header indicates that it is not an SPSS por file. " + "Use 'override = TRUE' to ignore this check."); + } } // Controll characters @@ -129,28 +125,28 @@ List readpor(const char * filePath, const bool debug, std::string encStr, digits = readstring(digits, por); if (debug) - Rcout << "digits: " << digits << std::endl; + Rcpp::Rcout << "digits: " << digits << std::endl; // Capitals std::string capitals (26, '\0'); capitals = readstring(capitals, por); if (debug) - Rcout << "capitals: " << capitals << std::endl; + Rcpp::Rcout << "capitals: " << capitals << std::endl; // lowercase std::string lower (26, '\0'); lower = readstring(lower, por); if (debug) - Rcout << "lower: " << lower << std::endl; + Rcpp::Rcout << "lower: " << lower << std::endl; // random std::string random (61, '\0'); random = readstring(random, por); if (debug) - Rcout << "random: " << random << std::endl; + Rcpp::Rcout << "random: " << random << std::endl; // Reserved std::string reserved (69, '\0'); @@ -161,10 +157,10 @@ List readpor(const char * filePath, const bool debug, std::string encStr, tag = readstring(tag, por); if (debug) - Rcout << "tag: " << tag << std::endl; + Rcpp::Rcout << "tag: " << tag << std::endl; if (debug) - Rcout << "Pos: " << por.tellg() << std::endl; + Rcpp::Rcout << "Pos: " << por.tellg() << std::endl; // end of header ----------------------------------------------------------- @@ -193,7 +189,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, if (debug) - Rcout << vers << " " << filedate << " " << filetime << std::endl; + Rcpp::Rcout << vers << " " << filedate << " " << filetime << std::endl; std::string varrec (1, '\0'); @@ -213,7 +209,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, if (debug) - Rcout << prod << std::endl; + Rcpp::Rcout << prod << std::endl; // optional // 2 or 3 : author and extra record @@ -234,7 +230,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, file_info.push_back(author); if (debug) - Rcout << author << std::endl; + Rcpp::Rcout << author << std::endl; varrec = readstring(varrec, por); } @@ -253,7 +249,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, file_info.push_back(extra); if (debug) - Rcout << extra << std::endl; + Rcpp::Rcout << extra << std::endl; varrec = readstring(varrec, por); } @@ -284,7 +280,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, if (varrec.compare("5") == 0) { if (debug) - Rcout << "--- 5 ---" << std::endl; + Rcpp::Rcout << "--- 5 ---" << std::endl; std::string prec; prec = readtostring(por); @@ -298,7 +294,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, if (varrec.compare("6") == 0) { if (debug) - Rcout << "--- 6 ---" << std::endl; + Rcpp::Rcout << "--- 6 ---" << std::endl; // single string std::string len; @@ -317,7 +313,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, { if (debug) - Rcout << "--- 7 ---" << std::endl; + Rcpp::Rcout << "--- 7 ---" << std::endl; // 0 or 1-255 std::string vartyp; @@ -386,8 +382,8 @@ List readpor(const char * filePath, const bool debug, std::string encStr, if (debug) { - Rcout << varname << std::endl; - Rcout << varnamelen << std::endl; + Rcpp::Rcout << varname << std::endl; + Rcpp::Rcout << varnamelen << std::endl; } } @@ -396,7 +392,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, if (varrec.compare("8") == 0) { if (debug) - Rcout << "--- 8 ---" << std::endl; + Rcpp::Rcout << "--- 8 ---" << std::endl; int vartyp = 0; std::string misslen; @@ -491,7 +487,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, if (varrec.compare("B") == 0) { if (debug) - Rcout << "--- B ---" << std::endl; + Rcpp::Rcout << "--- B ---" << std::endl; std::string varname; ptrdiff_t pos = 0; @@ -526,7 +522,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, if (varrec.compare("C") == 0) { if (debug) - Rcout << "--- C ---" << std::endl; + Rcpp::Rcout << "--- C ---" << std::endl; std::string labellen; @@ -536,7 +532,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, label = readstring(label, por); if (debug) - Rcout << label << std::endl; + Rcpp::Rcout << label << std::endl; varlabels.push_back(label); @@ -548,7 +544,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, if (varrec.compare("D") == 0) { if (debug) - Rcout << "--- D ---" << std::endl; + Rcpp::Rcout << "--- D ---" << std::endl; std::string unk1; unk1 = readtostring(por); @@ -566,7 +562,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, labelsetnam = readstring(labelsetnam, por); if (debug) - Rcout << labelsetnam << std::endl; + Rcpp::Rcout << labelsetnam << std::endl; labelsetnams.push_back(labelsetnam); ++nlabelsetnams; @@ -577,7 +573,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, labelnum = readtostring(por); if (debug) - Rcout << labelnum << std::endl; + Rcpp::Rcout << labelnum << std::endl; int labnums = 0; labnums = b30int(labelnum); @@ -603,7 +599,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr, labtxtlen = readtostring(por); if (debug) { - Rcout << "l & t: " << labval << " " << labtxtlen << std::endl; + Rcpp::Rcout << "l & t: " << labval << " " << labtxtlen << std::endl; } std::string labtxt ( b30int(labtxtlen), '\0'); @@ -642,8 +638,8 @@ List readpor(const char * filePath, const bool debug, std::string encStr, } if (debug) { - Rcout << labtxts << std::endl; - Rcout << labvals < #include -#include -#include -#include - -using namespace Rcpp; - #include "spss.h" #include "read_sav_known_n.h" #include "read_sav_unknown_n.h" @@ -42,7 +36,7 @@ using namespace Rcpp; //' @keywords internal //' @noRd // [[Rcpp::export]] -List readsav(const char * filePath, const bool debug, std::string encStr, +Rcpp::List readsav(const char * filePath, const bool debug, std::string encStr, std::string const ownEnc) { @@ -77,10 +71,12 @@ List readsav(const char * filePath, const bool debug, std::string encStr, std::string spss (8, '\0'); spss = readstring(spss, sav); - is_sav = boost::regex_match(spss, boost::regex("^\\$FL2@\\(#\\)$")); - is_zsav = boost::regex_match(spss, boost::regex("^\\$FL3@\\(#\\)$")); - ml_sav = boost::regex_match(spss.substr(0,4), boost::regex("^\\$FL2$")); - ml_zsav = boost::regex_match(spss.substr(0,4), boost::regex("^\\$FL3$")); + is_sav = (spss == "$FL2@(#)"); + is_zsav = (spss == "$FL3@(#)"); + + ml_sav = (spss.size() >= 4 && spss.compare(0, 4, "$FL2") == 0); + ml_zsav = (spss.size() >= 4 && spss.compare(0, 4, "$FL3") == 0); + // most likely: "$FL2" can be followed by "SPSS" is_spss = (is_sav == true) || (is_zsav == true) || (ml_sav == true) || (ml_zsav == true); @@ -92,10 +88,10 @@ List readsav(const char * filePath, const bool debug, std::string encStr, fileheader = readstring(fileheader, sav); fileheader = spss + fileheader; + if (fileheader.find("ENCRYPTEDSAV") != std::string::npos) + Rcpp::stop("The file header indicates that this file is encrypted. " + "A password is required to decode this file"); - if (boost::regex_search(fileheader, boost::regex("ENCRYPTEDSAV"))) - stop("The file header indicates that this file is encrypted. " - "A password is required to decode this file"); throw std::range_error("Can not read this file. Is it no SPSS sav file?"); } @@ -108,13 +104,12 @@ List readsav(const char * filePath, const bool debug, std::string encStr, datalabel = readstring(datalabel, sav); // trim additional whitespaces - datalabel = boost::regex_replace(datalabel, - boost::regex("^ +| +$"), "$1"); + trim(datalabel); if (doenc) datalabel = Riconv(datalabel, encStr); if (debug) - Rcout << "Datalabel:" << datalabel << std::endl; + Rcpp::Rcout << "Datalabel:" << datalabel << std::endl; // file format? should be 2 or 3 arch = readbin(arch, sav, swapit); @@ -164,9 +159,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, std::string filelabel (67, '\0'); filelabel = readstring(filelabel, sav); - - filelabel = boost::regex_replace(filelabel, - boost::regex("^ +| +$"), "$1"); + trim(filelabel); if (doenc) filelabel = Riconv(filelabel, encStr); @@ -272,8 +265,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, nvarname = readstring(nvarname, sav); // trim additional whitespaces - nvarname = boost::regex_replace(nvarname, - boost::regex("^ +| +$"), "$1"); + trim(nvarname); varnames.push_back(nvarname); @@ -290,8 +282,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, // trim additional whitespaces on the right - vallabel = boost::regex_replace(vallabel, - boost::regex("^ +| +$"), "$1"); + trim(vallabel); if (vtype > -1) // -1 is of no further useage vallabels.push_back(vallabel); @@ -309,7 +300,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, if (debug) { - Rcout << nvarname << " "; + Rcpp::Rcout << nvarname << " "; Rprintf("nmistype %d ", nmisstype); Rprintf("vflag %d\n", vlflag); } @@ -335,7 +326,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, std::string mV (8, '\0'); mV = readstring(mV, sav); - mV = boost::regex_replace(mV, boost::regex("^ +| +$"), "$1"); + trim(mV); missingV(0) = nmiss; missingV(i + 1) = mV; @@ -383,14 +374,15 @@ List readsav(const char * filePath, const bool debug, std::string encStr, // check for characters in the string lets hope SPSS does not allow // characters starting with a numeric or special character - noNum = boost::regex_search(cV, boost::regex("^[A-Za-z0-9]")) && - !boost::regex_search(cV, boost::regex("@$")); + bool startsWithAlnum = std::isalnum(static_cast(cV.front())); + bool endsWithAt = (cV.back() == '@'); + noNum = startsWithAlnum && !endsWithAt; // if its a double, do a memcpy, else trim whitespaces if (noNum) { if (doenc) cV = Riconv(cV, encStr); - cV = boost::regex_replace(cV, boost::regex("^ +| +$"), "$1"); + trim(cV); // return something so that we can later create a factor if (cV.compare(empty) != 0) @@ -411,7 +403,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, std::string lab (lablen, '\0'); lab = readstring(lab, sav); - lab = boost::regex_replace(lab, boost::regex("^ +| +$"), "$1"); + trim(lab); if (doenc) lab = Riconv(lab, encStr); @@ -463,15 +455,14 @@ List readsav(const char * filePath, const bool debug, std::string encStr, Rcpp::CharacterVector Document(nlines); std::string document (80, '\0'); - // Rcout << " --- Documentation --- " << std::endl; + // Rcpp::Rcout << " --- Documentation --- " << std::endl; for (int32_t i = 0; i < nlines; ++i) { std::string docline = readstring(document, sav); // if (doenc) docline = Riconv(docline, encStr); // trim additional whitespaces to the right - docline = boost::regex_replace(docline, - boost::regex(" +$"), "$1"); + rtrim(docline); Document(i) = docline; } @@ -649,7 +640,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, vn = readstringsize(vn, sav, len); // Rprintf("vn %d \n", len); - // Rcout << vn << std::endl; + // Rcpp::Rcout << vn << std::endl; // 8 is the minimal value int32_t varw = 0, nvars = 0; @@ -659,8 +650,8 @@ List readsav(const char * filePath, const bool debug, std::string encStr, // Rprintf("varw %d\n", varw); // set size - CharacterVector longv(nvars); - CharacterVector longl(nvars); + Rcpp::CharacterVector longv(nvars); + Rcpp::CharacterVector longl(nvars); for (int32_t i = 0; i < nvars; ++i) { @@ -670,14 +661,14 @@ List readsav(const char * filePath, const bool debug, std::string encStr, std::string val (len1, '\0'); val = readstringsize(val, sav, len1); - val = boost::regex_replace(val, boost::regex(" +$"), "$1"); + rtrim(val); len2 = readbin(len2, sav, swapit); std::string lab (len2, '\0'); lab = readstringsize(lab, sav, len2); - // Rcout << val << " : "<< lab << std::endl; + // Rcpp::Rcout << val << " : "<< lab << std::endl; longv(i) = val; longl(i) = lab; @@ -712,7 +703,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, mv = readbin(mv, sav, swapit); // set size - CharacterVector longmissing(mv); + Rcpp::CharacterVector longmissing(mv); len = readbin(len, sav, swapit); // should be 8 if (debug) @@ -722,8 +713,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, std::string val (len, '\0'); val = readstring(val, sav); - - val = boost::regex_replace(val, boost::regex(" +$"), "$1"); + rtrim(val); longmissing(mm) = val; } @@ -752,10 +742,10 @@ List readsav(const char * filePath, const bool debug, std::string encStr, // ignore this readstring(data, sav); - Rcout << data << std::endl; + Rcpp::Rcout << data << std::endl; - Rcout << "unknown subtype " << subtyp << " detected." << std::endl; - Rcout << "most likely no readson to worry. but if you want\n" << + Rcpp::Rcout << "unknown subtype " << subtyp << " detected." << std::endl; + Rcpp::Rcout << "most likely no readson to worry. but if you want\n" << "to help me out and can share a row of this datafile, \n" << "please mail me!" << std::endl; @@ -771,14 +761,14 @@ List readsav(const char * filePath, const bool debug, std::string encStr, if (debug) - Rcout << "-- end of header" << std::endl; + Rcpp::Rcout << "-- end of header" << std::endl; // encStr should not be empty otherwise // the iconv call would be useless if (doenc && encStr.compare(empty) != 0) { if (debug) - Rcout << "encoding" << std::endl; + Rcpp::Rcout << "encoding" << std::endl; longstring = Riconv(longstring, encStr); longvarname = Riconv(longvarname, encStr); @@ -788,10 +778,8 @@ List readsav(const char * filePath, const bool debug, std::string encStr, } // split. could fail for some locales if encoding is suppressed - boost::split(lstr, longstring, - boost::is_any_of("\t"), boost::token_compress_on); - boost::split(lvname, longvarname, - boost::is_any_of("\t"), boost::token_compress_on); + lstr = split(longstring, "\t", true); + lvname = split(longvarname, "\t", true); // Data Part -------------------------------------------------------------// @@ -803,27 +791,27 @@ List readsav(const char * filePath, const bool debug, std::string encStr, unk8 = readbin(unk8, sav, swapit); // 0 // c++ vector to Rcpp Vector - IntegerVector Vartype = wrap(vartype); - CharacterVector Varnames = wrap(varnames); + Rcpp::IntegerVector Vartype = Rcpp::wrap(vartype); + Rcpp::CharacterVector Varnames = Rcpp::wrap(varnames); // select only numerics or the beginning of strings. This enables // reading into fewer columns and reduces the overhead in the R code - CharacterVector vnam = Varnames[Vartype >= 0]; - IntegerVector vtyp = Vartype[Vartype >= 0]; + Rcpp::CharacterVector vnam = Varnames[Vartype >= 0]; + Rcpp::IntegerVector vtyp = Vartype[Vartype >= 0]; // if k is set to be the number of available numerics and string variables int32_t kv = vnam.size(); // wrangling around to get the length of the strings - NumericVector vtyp2 = wrap(vtyp); - NumericVector res = ceil(vtyp2 / 8); + Rcpp::NumericVector vtyp2 = wrap(vtyp); + Rcpp::NumericVector res = ceil(vtyp2 / 8); if (debug) { - Rcout << vnam << std::endl; - Rcout << vtyp << std::endl; - Rcout << res << std::endl; + Rcpp::Rcout << vnam << std::endl; + Rcpp::Rcout << vtyp << std::endl; + Rcpp::Rcout << res << std::endl; } if (debug) @@ -887,7 +875,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, Rcpp::Environment base("package:base"); Rcpp::Function iconv = base["iconv"]; - CharacterVector tmp = df[i]; + Rcpp::CharacterVector tmp = df[i]; tmp = iconv(tmp, Rcpp::Named("from", encStr), Rcpp::Named("to","")); SET_VECTOR_ELT(df, i, tmp); @@ -899,7 +887,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr, // 3. Create a data.frame R_xlen_t nrows = Rf_length(df[0]); - df.attr("row.names") = IntegerVector::create(NA_INTEGER, nrows); + df.attr("row.names") = Rcpp::IntegerVector::create(NA_INTEGER, nrows); df.attr("names") = vnam; df.attr("class") = "data.frame"; diff --git a/src/spss.h b/src/spss.h index 9c9a7ab..d54f626 100644 --- a/src/spss.h +++ b/src/spss.h @@ -5,8 +5,52 @@ #include #include #include +#include #include "swap_endian.h" +inline void rtrim(std::string& s) { + s.erase(std::find_if(s.rbegin(), s.rend(), + [](unsigned char ch) { return !std::isspace(ch); }).base(), + s.end()); +} + +inline void trim(std::string& s) { + // Trim leading spaces + s.erase(s.begin(), std::find_if(s.begin(), s.end(), + [](unsigned char ch) { return !std::isspace(ch); })); + // Trim trailing spaces + rtrim(s); +} + +inline std::vector split( + const std::string& input, + const std::string& delimiters, + bool compress = true +) { + std::vector result; + std::string token; + std::unordered_set delims(delimiters.begin(), delimiters.end()); + + for (char c : input) { + if (delims.count(c)) { + if (!token.empty() || !compress) { + result.push_back(token); + token.clear(); + } + // if compress == true, skip consecutive delimiters + } else { + token += c; + } + } + if (!token.empty() || !compress) + result.push_back(token); + + if (result.empty()) + result.push_back(""); + + return result; +} + struct info_t { Rcpp::IntegerVector vtyp; Rcpp::IntegerVector cc; @@ -23,6 +67,10 @@ struct info_t { template T readbin( T t , std::istream& sav, bool swapit) { + if (sav.peek() == EOF) { + Rcpp::stop("Reached EOF"); + } + if (!sav.read ((char*)&t, sizeof(t))) Rcpp::stop("readbin: a binary read error occurred"); if (swapit==0) diff --git a/src/write_data.cpp b/src/write_data.cpp index 75c264c..7f62f53 100644 --- a/src/write_data.cpp +++ b/src/write_data.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Jan Marvin Garbuszus + * Copyright (C) 2014-2025 Jan Marvin Garbuszus * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -21,8 +21,6 @@ #include #include -using namespace Rcpp; - #include "spss.h" void write_data(Rcpp::DataFrame dat, int32_t cflag, @@ -109,7 +107,7 @@ void write_data(Rcpp::DataFrame dat, int32_t cflag, // Rcout << "--- string ---" << std::endl; - std::string val_s = as(as(dat[j])[i]); + std::string val_s = Rcpp::as(Rcpp::as(dat[j])[i]); int strlen = type; if (strlen == 255) strlen = 256; @@ -322,13 +320,13 @@ void write_data(Rcpp::DataFrame dat, int32_t cflag, default: { - CharacterVector cv_s = NA_STRING; - cv_s = as(dat[j])[i]; + Rcpp::CharacterVector cv_s = NA_STRING; + cv_s = Rcpp::as(dat[j])[i]; std::string val_s = ""; if (cv_s[0] != NA_STRING) - val_s = as(cv_s); + val_s = Rcpp::as(cv_s); int size = type; if (size == 255) diff --git a/src/write_sav_compress.cpp b/src/write_sav_compress.cpp index 5912622..9f9e0b8 100644 --- a/src/write_sav_compress.cpp +++ b/src/write_sav_compress.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019 Jan Marvin Garbuszus + * Copyright (C) 2019-2025 Jan Marvin Garbuszus * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/src/writepor.cpp b/src/writepor.cpp index e3abc03..60f23f1 100644 --- a/src/writepor.cpp +++ b/src/writepor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2018 Jan Marvin Garbuszus + * Copyright (C) 2014-2025 Jan Marvin Garbuszus * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -22,8 +22,6 @@ #include #include -using namespace Rcpp; - #include "spss.h" //' writes the binary SPSS file @@ -100,14 +98,14 @@ void writepor(const char * filePath, Rcpp::DataFrame dat) for (int i = 0; i < k; ++i) { if (debug) - Rcout << "--- 7 ---" << std::endl; + Rcpp::Rcout << "--- 7 ---" << std::endl; file += "7"; //var int vartypi = vtyp(i); int isdate = vartyp(i); - std::string nvarname = as(nvarnames(i)); + std::string nvarname = Rcpp::as(nvarnames(i)); file += pnum1(vartypi); file += "/"; @@ -153,11 +151,11 @@ void writepor(const char * filePath, Rcpp::DataFrame dat) if (!Rf_isNull(label) && (Rf_length(label) == k )) { if (debug) - Rcout << "--- C ---" << std::endl; + Rcpp::Rcout << "--- C ---" << std::endl; file += "C"; //var - std::string lab = as(label(i)); + std::string lab = Rcpp::as(label(i)); file += writestr(lab,0); } @@ -167,7 +165,7 @@ void writepor(const char * filePath, Rcpp::DataFrame dat) if (!Rf_isNull(labtabs) && (Rf_length(labtabs) > 0)) { if (debug) - Rcout << "--- D ---" << std::endl; + Rcpp::Rcout << "--- D ---" << std::endl; file += "D"; @@ -176,7 +174,7 @@ void writepor(const char * filePath, Rcpp::DataFrame dat) Rcpp::IntegerVector labtab = labtabs[nolabtab]; Rcpp::CharacterVector labtn = labtab.attr("names"); - const std::string nlabs = as(labtabnams[nolabtab]); + const std::string nlabs = Rcpp::as(labtabnams[nolabtab]); file += pnum1(1); // nolab file += "/"; @@ -190,13 +188,13 @@ void writepor(const char * filePath, Rcpp::DataFrame dat) for (int j = 0; j < labtab.size(); ++j) { if (debug) { - Rcout << labtab(j) << std::endl; // val - Rcout << labtn(j) << std::endl; // lab + Rcpp::Rcout << labtab(j) << std::endl; // val + Rcpp::Rcout << labtn(j) << std::endl; // lab } file += pnum1(labtab(j)); file += "/"; - file += writestr(as(labtn(j)), 0); + file += writestr(Rcpp::as(labtn(j)), 0); } @@ -208,7 +206,7 @@ void writepor(const char * filePath, Rcpp::DataFrame dat) if (debug) - Rcout << "--- F ---" << std::endl; + Rcpp::Rcout << "--- F ---" << std::endl; for (int64_t i = 0; i < n; ++i) { @@ -243,13 +241,13 @@ void writepor(const char * filePath, Rcpp::DataFrame dat) default: { - CharacterVector cv_s = NA_STRING; - cv_s = as(dat[j])[i]; + Rcpp::CharacterVector cv_s = NA_STRING; + cv_s = Rcpp::as(dat[j])[i]; std::string val_s = ""; if (cv_s[0] != NA_STRING) - val_s = as(cv_s); + val_s = Rcpp::as(cv_s); file += writestr(val_s, 0); break; diff --git a/src/writesav.cpp b/src/writesav.cpp index a803940..f1d0d62 100644 --- a/src/writesav.cpp +++ b/src/writesav.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2019 Jan Marvin Garbuszus + * Copyright (C) 2014-2025 Jan Marvin Garbuszus * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -21,8 +21,6 @@ #include #include -using namespace Rcpp; - #include "spss.h" #include "write_data.h" #include "write_sav_compress.h" @@ -265,7 +263,7 @@ void writesav(const char * filePath, Rcpp::DataFrame dat, uint8_t compress, uint8_t lablen = lab.size(); if (lablen > 120) { lablen = 120; - warning("Label longer than 120 characters found. Trimmed to 120."); + Rcpp::warning("Label longer than 120 characters found. Trimmed to 120."); } writebin(lablen, sav, swapit);