diff --git a/DESCRIPTION b/DESCRIPTION
index 6dd2882..fd6d5c4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: readspss
Type: Package
Title: Importing and Exporting SPSS Files
-Version: 0.18.1
+Version: 0.19
Authors@R: c(
person("Jan Marvin", "Garbuszus",
email = "jan.garbuszus@ruhr-uni-bochum.de", role = c("aut", "cre")),
@@ -22,18 +22,17 @@ LazyData: TRUE
Language: en-US
Imports:
Rcpp (>= 0.11.2)
-Suggests:
- covr,
+Suggests:
datasets,
foreign,
knitr,
rmarkdown,
roxygen2,
testthat
-LinkingTo: Rcpp, BH
+LinkingTo: Rcpp
ByteCompile: yes
SystemRequirements: OpenSSL >= 1.0.2
VignetteBuilder: knitr
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 13babc2..9c31ea9 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -49,7 +49,7 @@ BEGIN_RCPP
END_RCPP
}
// readpor
-List readpor(const char * filePath, const bool debug, std::string encStr, bool override);
+Rcpp::List readpor(const char * filePath, const bool debug, std::string encStr, bool override);
RcppExport SEXP _readspss_readpor(SEXP filePathSEXP, SEXP debugSEXP, SEXP encStrSEXP, SEXP overrideSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
@@ -63,7 +63,7 @@ BEGIN_RCPP
END_RCPP
}
// readsav
-List readsav(const char * filePath, const bool debug, std::string encStr, std::string const ownEnc);
+Rcpp::List readsav(const char * filePath, const bool debug, std::string encStr, std::string const ownEnc);
RcppExport SEXP _readspss_readsav(SEXP filePathSEXP, SEXP debugSEXP, SEXP encStrSEXP, SEXP ownEncSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
diff --git a/src/boost_split.cpp b/src/boost_split.cpp
index 3810c8f..bc88e2a 100644
--- a/src/boost_split.cpp
+++ b/src/boost_split.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2018 Jan Marvin Garbuszus
+ * Copyright (C) 2018-2025 Jan Marvin Garbuszus
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -15,11 +15,7 @@
* with this program. If not, see .
*/
-#include
-#include
-
-#include
-#include
+#include "spss.h"
//' split character vector at "="
//'
@@ -32,8 +28,7 @@ Rcpp::CharacterVector boost_split(std::string val_s) {
std::vector vec_r;
- boost::split(vec_r, val_s,
- boost::is_any_of("="), boost::token_compress_on);
+ vec_r = split(val_s, "=", true);
return(Rcpp::wrap(vec_r));
}
diff --git a/src/fast_factor.cpp b/src/fast_factor.cpp
index cfcb003..ac35fbb 100644
--- a/src/fast_factor.cpp
+++ b/src/fast_factor.cpp
@@ -1,11 +1,10 @@
#include
-using namespace Rcpp;
template
-IntegerVector fast_factor_template( const Vector& x,
- const Vector& y) {
- IntegerVector out = match(x, y);
+Rcpp::IntegerVector fast_factor_template( const Rcpp::Vector& x,
+ const Rcpp::Vector& y) {
+ Rcpp::IntegerVector out = match(x, y);
out.attr("levels") = y.attr("names");
out.attr("class") = "factor";
diff --git a/src/read_sav_encrypted.cpp b/src/read_sav_encrypted.cpp
index 9cb87e7..45fa12f 100644
--- a/src/read_sav_encrypted.cpp
+++ b/src/read_sav_encrypted.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2018 Jan Marvin Garbuszus
+ * Copyright (C) 2018-2025 Jan Marvin Garbuszus
* Copyright (c) 2013 Ben Pfaff
*
* This program is free software; you can redistribute it and/or modify it
@@ -22,12 +22,9 @@
#include
#include
-#include
-
#include
#include
-using namespace Rcpp;
#include "spss.h"
#include "read_sav_encrypted.h"
@@ -52,12 +49,10 @@ int encryptfile (const char * filePath, std::string &outpath, std::string pass)
std::string fileheader(36, '\0');
fileheader = readstring(fileheader, sav);
- if (!boost::regex_search(fileheader, boost::regex("ENCRYPTEDSAV"))) {
- stop("The file header indicates that it is not an SPSS sav file.");
+ if (fileheader.find("ENCRYPTEDSAV") == std::string::npos) {
+ Rcpp::stop("The file header indicates that it is not an SPSS sav file.");
}
-
-
/* Read first ciphertext block and use it to verify the password. Try the
password as plaintext first, then try decoding it. */
@@ -125,7 +120,7 @@ Rcpp::List readencrypted(const char * filePath, const bool debug,
// remove encrypted sav-file
std::remove(outPath.c_str());
} else {
- stop("stopping");
+ Rcpp::stop("stopping");
}
return df;
diff --git a/src/read_sav_known_n.cpp b/src/read_sav_known_n.cpp
index 29e85fa..51d989e 100644
--- a/src/read_sav_known_n.cpp
+++ b/src/read_sav_known_n.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2018 Jan Marvin Garbuszus
+ * Copyright (C) 2018-2025 Jan Marvin Garbuszus
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -19,8 +19,6 @@
#include
#include
-#include
-
#include "spss.h"
Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav,
@@ -165,8 +163,7 @@ Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav,
if (res_i == res_kk-1) {
// trim additional whitespaces to the right
- start = boost::regex_replace(start,
- boost::regex(" +$"), "$1");
+ rtrim(start);
Rcpp::as(df[kk])[nn] = start;
@@ -250,8 +247,7 @@ Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav,
if (res_i == res_kk-1) {
// trim additional whitespaces to the right
- start = boost::regex_replace(start,
- boost::regex(" +$"), "$1");
+ rtrim(start);
Rcpp::as(df[kk])[nn] = start;
@@ -294,8 +290,7 @@ Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav,
if (res_i == res_kk-1) {
// trim additional whitespaces to the right
- start = boost::regex_replace(start,
- boost::regex(" +$"), "$1");
+ rtrim(start);
Rcpp::as(df[kk])[nn] = start;
@@ -418,8 +413,7 @@ Rcpp::List read_sav_known_n (Rcpp::List& df, std::fstream& sav,
val_s.erase(type, std::string::npos);
// trim additional whitespaces
- val_s = boost::regex_replace(val_s,
- boost::regex("^ +| +$"), "$1");
+ trim(val_s);
// Rcpp::Rcout << val_s << std::endl;
Rcpp::as(df[kk])[nn] = val_s;
diff --git a/src/read_sav_uncompress.cpp b/src/read_sav_uncompress.cpp
index 7b3fd2b..72a07f2 100644
--- a/src/read_sav_uncompress.cpp
+++ b/src/read_sav_uncompress.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2018 Jan Marvin Garbuszus
+ * Copyright (C) 2018-2025 Jan Marvin Garbuszus
*
* zlib header information by Evan Miller
*
diff --git a/src/readpor.cpp b/src/readpor.cpp
index 137b84d..9007243 100644
--- a/src/readpor.cpp
+++ b/src/readpor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2018 Jan Marvin Garbuszus
+ * Copyright (C) 2018-2025 Jan Marvin Garbuszus
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -21,10 +21,6 @@
#include
#include
-#include
-
-using namespace Rcpp;
-
#include "spss.h"
//' Reads the binary SPSS file
@@ -37,7 +33,7 @@ using namespace Rcpp;
//' @keywords internal
//' @noRd
// [[Rcpp::export]]
-List readpor(const char * filePath, const bool debug, std::string encStr,
+Rcpp::List readpor(const char * filePath, const bool debug, std::string encStr,
bool override)
{
@@ -64,7 +60,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
}
} else {
- stop ("No file was read.");
+ Rcpp::stop ("No file was read.");
}
por_file.close();
@@ -110,12 +106,12 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
std::string spss (200, '\0');
spss = readstring(spss, por);
- if (!override){
- if (!boost::regex_search(spss, boost::regex("ASCII SPSS PORT FILE")) &&
- !boost::regex_search(spss, boost::regex("EBCDIC SPSS PORT FILE"))) {
- stop("The file header indicates that it is not an SPSS por file. "
- "Use 'override = TRUE' to ignore this check.");
- }
+ if (!override) {
+ if (spss.find("ASCII SPSS PORT FILE") == std::string::npos &&
+ spss.find("EBCDIC SPSS PORT FILE") == std::string::npos) {
+ Rcpp::stop("The file header indicates that it is not an SPSS por file. "
+ "Use 'override = TRUE' to ignore this check.");
+ }
}
// Controll characters
@@ -129,28 +125,28 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
digits = readstring(digits, por);
if (debug)
- Rcout << "digits: " << digits << std::endl;
+ Rcpp::Rcout << "digits: " << digits << std::endl;
// Capitals
std::string capitals (26, '\0');
capitals = readstring(capitals, por);
if (debug)
- Rcout << "capitals: " << capitals << std::endl;
+ Rcpp::Rcout << "capitals: " << capitals << std::endl;
// lowercase
std::string lower (26, '\0');
lower = readstring(lower, por);
if (debug)
- Rcout << "lower: " << lower << std::endl;
+ Rcpp::Rcout << "lower: " << lower << std::endl;
// random
std::string random (61, '\0');
random = readstring(random, por);
if (debug)
- Rcout << "random: " << random << std::endl;
+ Rcpp::Rcout << "random: " << random << std::endl;
// Reserved
std::string reserved (69, '\0');
@@ -161,10 +157,10 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
tag = readstring(tag, por);
if (debug)
- Rcout << "tag: " << tag << std::endl;
+ Rcpp::Rcout << "tag: " << tag << std::endl;
if (debug)
- Rcout << "Pos: " << por.tellg() << std::endl;
+ Rcpp::Rcout << "Pos: " << por.tellg() << std::endl;
// end of header -----------------------------------------------------------
@@ -193,7 +189,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
if (debug)
- Rcout << vers << " " << filedate << " " << filetime << std::endl;
+ Rcpp::Rcout << vers << " " << filedate << " " << filetime << std::endl;
std::string varrec (1, '\0');
@@ -213,7 +209,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
if (debug)
- Rcout << prod << std::endl;
+ Rcpp::Rcout << prod << std::endl;
// optional
// 2 or 3 : author and extra record
@@ -234,7 +230,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
file_info.push_back(author);
if (debug)
- Rcout << author << std::endl;
+ Rcpp::Rcout << author << std::endl;
varrec = readstring(varrec, por);
}
@@ -253,7 +249,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
file_info.push_back(extra);
if (debug)
- Rcout << extra << std::endl;
+ Rcpp::Rcout << extra << std::endl;
varrec = readstring(varrec, por);
}
@@ -284,7 +280,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
if (varrec.compare("5") == 0) {
if (debug)
- Rcout << "--- 5 ---" << std::endl;
+ Rcpp::Rcout << "--- 5 ---" << std::endl;
std::string prec;
prec = readtostring(por);
@@ -298,7 +294,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
if (varrec.compare("6") == 0) {
if (debug)
- Rcout << "--- 6 ---" << std::endl;
+ Rcpp::Rcout << "--- 6 ---" << std::endl;
// single string
std::string len;
@@ -317,7 +313,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
{
if (debug)
- Rcout << "--- 7 ---" << std::endl;
+ Rcpp::Rcout << "--- 7 ---" << std::endl;
// 0 or 1-255
std::string vartyp;
@@ -386,8 +382,8 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
if (debug) {
- Rcout << varname << std::endl;
- Rcout << varnamelen << std::endl;
+ Rcpp::Rcout << varname << std::endl;
+ Rcpp::Rcout << varnamelen << std::endl;
}
}
@@ -396,7 +392,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
if (varrec.compare("8") == 0) {
if (debug)
- Rcout << "--- 8 ---" << std::endl;
+ Rcpp::Rcout << "--- 8 ---" << std::endl;
int vartyp = 0;
std::string misslen;
@@ -491,7 +487,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
if (varrec.compare("B") == 0) {
if (debug)
- Rcout << "--- B ---" << std::endl;
+ Rcpp::Rcout << "--- B ---" << std::endl;
std::string varname;
ptrdiff_t pos = 0;
@@ -526,7 +522,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
if (varrec.compare("C") == 0) {
if (debug)
- Rcout << "--- C ---" << std::endl;
+ Rcpp::Rcout << "--- C ---" << std::endl;
std::string labellen;
@@ -536,7 +532,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
label = readstring(label, por);
if (debug)
- Rcout << label << std::endl;
+ Rcpp::Rcout << label << std::endl;
varlabels.push_back(label);
@@ -548,7 +544,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
if (varrec.compare("D") == 0) {
if (debug)
- Rcout << "--- D ---" << std::endl;
+ Rcpp::Rcout << "--- D ---" << std::endl;
std::string unk1;
unk1 = readtostring(por);
@@ -566,7 +562,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
labelsetnam = readstring(labelsetnam, por);
if (debug)
- Rcout << labelsetnam << std::endl;
+ Rcpp::Rcout << labelsetnam << std::endl;
labelsetnams.push_back(labelsetnam);
++nlabelsetnams;
@@ -577,7 +573,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
labelnum = readtostring(por);
if (debug)
- Rcout << labelnum << std::endl;
+ Rcpp::Rcout << labelnum << std::endl;
int labnums = 0;
labnums = b30int(labelnum);
@@ -603,7 +599,7 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
labtxtlen = readtostring(por);
if (debug) {
- Rcout << "l & t: " << labval << " " << labtxtlen << std::endl;
+ Rcpp::Rcout << "l & t: " << labval << " " << labtxtlen << std::endl;
}
std::string labtxt ( b30int(labtxtlen), '\0');
@@ -642,8 +638,8 @@ List readpor(const char * filePath, const bool debug, std::string encStr,
}
if (debug) {
- Rcout << labtxts << std::endl;
- Rcout << labvals <
#include
-#include
-#include
-#include
-
-using namespace Rcpp;
-
#include "spss.h"
#include "read_sav_known_n.h"
#include "read_sav_unknown_n.h"
@@ -42,7 +36,7 @@ using namespace Rcpp;
//' @keywords internal
//' @noRd
// [[Rcpp::export]]
-List readsav(const char * filePath, const bool debug, std::string encStr,
+Rcpp::List readsav(const char * filePath, const bool debug, std::string encStr,
std::string const ownEnc)
{
@@ -77,10 +71,12 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
std::string spss (8, '\0');
spss = readstring(spss, sav);
- is_sav = boost::regex_match(spss, boost::regex("^\\$FL2@\\(#\\)$"));
- is_zsav = boost::regex_match(spss, boost::regex("^\\$FL3@\\(#\\)$"));
- ml_sav = boost::regex_match(spss.substr(0,4), boost::regex("^\\$FL2$"));
- ml_zsav = boost::regex_match(spss.substr(0,4), boost::regex("^\\$FL3$"));
+ is_sav = (spss == "$FL2@(#)");
+ is_zsav = (spss == "$FL3@(#)");
+
+ ml_sav = (spss.size() >= 4 && spss.compare(0, 4, "$FL2") == 0);
+ ml_zsav = (spss.size() >= 4 && spss.compare(0, 4, "$FL3") == 0);
+
// most likely: "$FL2" can be followed by "SPSS"
is_spss = (is_sav == true) || (is_zsav == true) ||
(ml_sav == true) || (ml_zsav == true);
@@ -92,10 +88,10 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
fileheader = readstring(fileheader, sav);
fileheader = spss + fileheader;
+ if (fileheader.find("ENCRYPTEDSAV") != std::string::npos)
+ Rcpp::stop("The file header indicates that this file is encrypted. "
+ "A password is required to decode this file");
- if (boost::regex_search(fileheader, boost::regex("ENCRYPTEDSAV")))
- stop("The file header indicates that this file is encrypted. "
- "A password is required to decode this file");
throw std::range_error("Can not read this file. Is it no SPSS sav file?");
}
@@ -108,13 +104,12 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
datalabel = readstring(datalabel, sav);
// trim additional whitespaces
- datalabel = boost::regex_replace(datalabel,
- boost::regex("^ +| +$"), "$1");
+ trim(datalabel);
if (doenc) datalabel = Riconv(datalabel, encStr);
if (debug)
- Rcout << "Datalabel:" << datalabel << std::endl;
+ Rcpp::Rcout << "Datalabel:" << datalabel << std::endl;
// file format? should be 2 or 3
arch = readbin(arch, sav, swapit);
@@ -164,9 +159,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
std::string filelabel (67, '\0');
filelabel = readstring(filelabel, sav);
-
- filelabel = boost::regex_replace(filelabel,
- boost::regex("^ +| +$"), "$1");
+ trim(filelabel);
if (doenc) filelabel = Riconv(filelabel, encStr);
@@ -272,8 +265,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
nvarname = readstring(nvarname, sav);
// trim additional whitespaces
- nvarname = boost::regex_replace(nvarname,
- boost::regex("^ +| +$"), "$1");
+ trim(nvarname);
varnames.push_back(nvarname);
@@ -290,8 +282,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
// trim additional whitespaces on the right
- vallabel = boost::regex_replace(vallabel,
- boost::regex("^ +| +$"), "$1");
+ trim(vallabel);
if (vtype > -1) // -1 is of no further useage
vallabels.push_back(vallabel);
@@ -309,7 +300,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
if (debug) {
- Rcout << nvarname << " ";
+ Rcpp::Rcout << nvarname << " ";
Rprintf("nmistype %d ", nmisstype);
Rprintf("vflag %d\n", vlflag);
}
@@ -335,7 +326,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
std::string mV (8, '\0');
mV = readstring(mV, sav);
- mV = boost::regex_replace(mV, boost::regex("^ +| +$"), "$1");
+ trim(mV);
missingV(0) = nmiss;
missingV(i + 1) = mV;
@@ -383,14 +374,15 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
// check for characters in the string lets hope SPSS does not allow
// characters starting with a numeric or special character
- noNum = boost::regex_search(cV, boost::regex("^[A-Za-z0-9]")) &&
- !boost::regex_search(cV, boost::regex("@$"));
+ bool startsWithAlnum = std::isalnum(static_cast(cV.front()));
+ bool endsWithAt = (cV.back() == '@');
+ noNum = startsWithAlnum && !endsWithAt;
// if its a double, do a memcpy, else trim whitespaces
if (noNum) {
if (doenc) cV = Riconv(cV, encStr);
- cV = boost::regex_replace(cV, boost::regex("^ +| +$"), "$1");
+ trim(cV);
// return something so that we can later create a factor
if (cV.compare(empty) != 0)
@@ -411,7 +403,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
std::string lab (lablen, '\0');
lab = readstring(lab, sav);
- lab = boost::regex_replace(lab, boost::regex("^ +| +$"), "$1");
+ trim(lab);
if (doenc) lab = Riconv(lab, encStr);
@@ -463,15 +455,14 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
Rcpp::CharacterVector Document(nlines);
std::string document (80, '\0');
- // Rcout << " --- Documentation --- " << std::endl;
+ // Rcpp::Rcout << " --- Documentation --- " << std::endl;
for (int32_t i = 0; i < nlines; ++i) {
std::string docline = readstring(document, sav);
// if (doenc) docline = Riconv(docline, encStr);
// trim additional whitespaces to the right
- docline = boost::regex_replace(docline,
- boost::regex(" +$"), "$1");
+ rtrim(docline);
Document(i) = docline;
}
@@ -649,7 +640,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
vn = readstringsize(vn, sav, len);
// Rprintf("vn %d \n", len);
- // Rcout << vn << std::endl;
+ // Rcpp::Rcout << vn << std::endl;
// 8 is the minimal value
int32_t varw = 0, nvars = 0;
@@ -659,8 +650,8 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
// Rprintf("varw %d\n", varw);
// set size
- CharacterVector longv(nvars);
- CharacterVector longl(nvars);
+ Rcpp::CharacterVector longv(nvars);
+ Rcpp::CharacterVector longl(nvars);
for (int32_t i = 0; i < nvars; ++i) {
@@ -670,14 +661,14 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
std::string val (len1, '\0');
val = readstringsize(val, sav, len1);
- val = boost::regex_replace(val, boost::regex(" +$"), "$1");
+ rtrim(val);
len2 = readbin(len2, sav, swapit);
std::string lab (len2, '\0');
lab = readstringsize(lab, sav, len2);
- // Rcout << val << " : "<< lab << std::endl;
+ // Rcpp::Rcout << val << " : "<< lab << std::endl;
longv(i) = val;
longl(i) = lab;
@@ -712,7 +703,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
mv = readbin(mv, sav, swapit);
// set size
- CharacterVector longmissing(mv);
+ Rcpp::CharacterVector longmissing(mv);
len = readbin(len, sav, swapit); // should be 8
if (debug)
@@ -722,8 +713,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
std::string val (len, '\0');
val = readstring(val, sav);
-
- val = boost::regex_replace(val, boost::regex(" +$"), "$1");
+ rtrim(val);
longmissing(mm) = val;
}
@@ -752,10 +742,10 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
// ignore this
readstring(data, sav);
- Rcout << data << std::endl;
+ Rcpp::Rcout << data << std::endl;
- Rcout << "unknown subtype " << subtyp << " detected." << std::endl;
- Rcout << "most likely no readson to worry. but if you want\n" <<
+ Rcpp::Rcout << "unknown subtype " << subtyp << " detected." << std::endl;
+ Rcpp::Rcout << "most likely no readson to worry. but if you want\n" <<
"to help me out and can share a row of this datafile, \n" <<
"please mail me!" << std::endl;
@@ -771,14 +761,14 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
if (debug)
- Rcout << "-- end of header" << std::endl;
+ Rcpp::Rcout << "-- end of header" << std::endl;
// encStr should not be empty otherwise
// the iconv call would be useless
if (doenc && encStr.compare(empty) != 0) {
if (debug)
- Rcout << "encoding" << std::endl;
+ Rcpp::Rcout << "encoding" << std::endl;
longstring = Riconv(longstring, encStr);
longvarname = Riconv(longvarname, encStr);
@@ -788,10 +778,8 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
}
// split. could fail for some locales if encoding is suppressed
- boost::split(lstr, longstring,
- boost::is_any_of("\t"), boost::token_compress_on);
- boost::split(lvname, longvarname,
- boost::is_any_of("\t"), boost::token_compress_on);
+ lstr = split(longstring, "\t", true);
+ lvname = split(longvarname, "\t", true);
// Data Part -------------------------------------------------------------//
@@ -803,27 +791,27 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
unk8 = readbin(unk8, sav, swapit); // 0
// c++ vector to Rcpp Vector
- IntegerVector Vartype = wrap(vartype);
- CharacterVector Varnames = wrap(varnames);
+ Rcpp::IntegerVector Vartype = Rcpp::wrap(vartype);
+ Rcpp::CharacterVector Varnames = Rcpp::wrap(varnames);
// select only numerics or the beginning of strings. This enables
// reading into fewer columns and reduces the overhead in the R code
- CharacterVector vnam = Varnames[Vartype >= 0];
- IntegerVector vtyp = Vartype[Vartype >= 0];
+ Rcpp::CharacterVector vnam = Varnames[Vartype >= 0];
+ Rcpp::IntegerVector vtyp = Vartype[Vartype >= 0];
// if k is set to be the number of available numerics and string variables
int32_t kv = vnam.size();
// wrangling around to get the length of the strings
- NumericVector vtyp2 = wrap(vtyp);
- NumericVector res = ceil(vtyp2 / 8);
+ Rcpp::NumericVector vtyp2 = wrap(vtyp);
+ Rcpp::NumericVector res = ceil(vtyp2 / 8);
if (debug) {
- Rcout << vnam << std::endl;
- Rcout << vtyp << std::endl;
- Rcout << res << std::endl;
+ Rcpp::Rcout << vnam << std::endl;
+ Rcpp::Rcout << vtyp << std::endl;
+ Rcpp::Rcout << res << std::endl;
}
if (debug)
@@ -887,7 +875,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
Rcpp::Environment base("package:base");
Rcpp::Function iconv = base["iconv"];
- CharacterVector tmp = df[i];
+ Rcpp::CharacterVector tmp = df[i];
tmp = iconv(tmp, Rcpp::Named("from", encStr), Rcpp::Named("to",""));
SET_VECTOR_ELT(df, i, tmp);
@@ -899,7 +887,7 @@ List readsav(const char * filePath, const bool debug, std::string encStr,
// 3. Create a data.frame
R_xlen_t nrows = Rf_length(df[0]);
- df.attr("row.names") = IntegerVector::create(NA_INTEGER, nrows);
+ df.attr("row.names") = Rcpp::IntegerVector::create(NA_INTEGER, nrows);
df.attr("names") = vnam;
df.attr("class") = "data.frame";
diff --git a/src/spss.h b/src/spss.h
index 9c9a7ab..d54f626 100644
--- a/src/spss.h
+++ b/src/spss.h
@@ -5,8 +5,52 @@
#include
#include
#include
+#include
#include "swap_endian.h"
+inline void rtrim(std::string& s) {
+ s.erase(std::find_if(s.rbegin(), s.rend(),
+ [](unsigned char ch) { return !std::isspace(ch); }).base(),
+ s.end());
+}
+
+inline void trim(std::string& s) {
+ // Trim leading spaces
+ s.erase(s.begin(), std::find_if(s.begin(), s.end(),
+ [](unsigned char ch) { return !std::isspace(ch); }));
+ // Trim trailing spaces
+ rtrim(s);
+}
+
+inline std::vector split(
+ const std::string& input,
+ const std::string& delimiters,
+ bool compress = true
+) {
+ std::vector result;
+ std::string token;
+ std::unordered_set delims(delimiters.begin(), delimiters.end());
+
+ for (char c : input) {
+ if (delims.count(c)) {
+ if (!token.empty() || !compress) {
+ result.push_back(token);
+ token.clear();
+ }
+ // if compress == true, skip consecutive delimiters
+ } else {
+ token += c;
+ }
+ }
+ if (!token.empty() || !compress)
+ result.push_back(token);
+
+ if (result.empty())
+ result.push_back("");
+
+ return result;
+}
+
struct info_t {
Rcpp::IntegerVector vtyp;
Rcpp::IntegerVector cc;
@@ -23,6 +67,10 @@ struct info_t {
template
T readbin( T t , std::istream& sav, bool swapit)
{
+ if (sav.peek() == EOF) {
+ Rcpp::stop("Reached EOF");
+ }
+
if (!sav.read ((char*)&t, sizeof(t)))
Rcpp::stop("readbin: a binary read error occurred");
if (swapit==0)
diff --git a/src/write_data.cpp b/src/write_data.cpp
index 75c264c..7f62f53 100644
--- a/src/write_data.cpp
+++ b/src/write_data.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2019 Jan Marvin Garbuszus
+ * Copyright (C) 2014-2025 Jan Marvin Garbuszus
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -21,8 +21,6 @@
#include
#include
-using namespace Rcpp;
-
#include "spss.h"
void write_data(Rcpp::DataFrame dat, int32_t cflag,
@@ -109,7 +107,7 @@ void write_data(Rcpp::DataFrame dat, int32_t cflag,
// Rcout << "--- string ---" << std::endl;
- std::string val_s = as(as(dat[j])[i]);
+ std::string val_s = Rcpp::as(Rcpp::as(dat[j])[i]);
int strlen = type;
if (strlen == 255) strlen = 256;
@@ -322,13 +320,13 @@ void write_data(Rcpp::DataFrame dat, int32_t cflag,
default:
{
- CharacterVector cv_s = NA_STRING;
- cv_s = as(dat[j])[i];
+ Rcpp::CharacterVector cv_s = NA_STRING;
+ cv_s = Rcpp::as(dat[j])[i];
std::string val_s = "";
if (cv_s[0] != NA_STRING)
- val_s = as(cv_s);
+ val_s = Rcpp::as(cv_s);
int size = type;
if (size == 255)
diff --git a/src/write_sav_compress.cpp b/src/write_sav_compress.cpp
index 5912622..9f9e0b8 100644
--- a/src/write_sav_compress.cpp
+++ b/src/write_sav_compress.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2019 Jan Marvin Garbuszus
+ * Copyright (C) 2019-2025 Jan Marvin Garbuszus
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
diff --git a/src/writepor.cpp b/src/writepor.cpp
index e3abc03..60f23f1 100644
--- a/src/writepor.cpp
+++ b/src/writepor.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2018 Jan Marvin Garbuszus
+ * Copyright (C) 2014-2025 Jan Marvin Garbuszus
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -22,8 +22,6 @@
#include
#include
-using namespace Rcpp;
-
#include "spss.h"
//' writes the binary SPSS file
@@ -100,14 +98,14 @@ void writepor(const char * filePath, Rcpp::DataFrame dat)
for (int i = 0; i < k; ++i) {
if (debug)
- Rcout << "--- 7 ---" << std::endl;
+ Rcpp::Rcout << "--- 7 ---" << std::endl;
file += "7"; //var
int vartypi = vtyp(i);
int isdate = vartyp(i);
- std::string nvarname = as(nvarnames(i));
+ std::string nvarname = Rcpp::as(nvarnames(i));
file += pnum1(vartypi);
file += "/";
@@ -153,11 +151,11 @@ void writepor(const char * filePath, Rcpp::DataFrame dat)
if (!Rf_isNull(label) && (Rf_length(label) == k )) {
if (debug)
- Rcout << "--- C ---" << std::endl;
+ Rcpp::Rcout << "--- C ---" << std::endl;
file += "C"; //var
- std::string lab = as(label(i));
+ std::string lab = Rcpp::as(label(i));
file += writestr(lab,0);
}
@@ -167,7 +165,7 @@ void writepor(const char * filePath, Rcpp::DataFrame dat)
if (!Rf_isNull(labtabs) && (Rf_length(labtabs) > 0)) {
if (debug)
- Rcout << "--- D ---" << std::endl;
+ Rcpp::Rcout << "--- D ---" << std::endl;
file += "D";
@@ -176,7 +174,7 @@ void writepor(const char * filePath, Rcpp::DataFrame dat)
Rcpp::IntegerVector labtab = labtabs[nolabtab];
Rcpp::CharacterVector labtn = labtab.attr("names");
- const std::string nlabs = as(labtabnams[nolabtab]);
+ const std::string nlabs = Rcpp::as(labtabnams[nolabtab]);
file += pnum1(1); // nolab
file += "/";
@@ -190,13 +188,13 @@ void writepor(const char * filePath, Rcpp::DataFrame dat)
for (int j = 0; j < labtab.size(); ++j) {
if (debug) {
- Rcout << labtab(j) << std::endl; // val
- Rcout << labtn(j) << std::endl; // lab
+ Rcpp::Rcout << labtab(j) << std::endl; // val
+ Rcpp::Rcout << labtn(j) << std::endl; // lab
}
file += pnum1(labtab(j));
file += "/";
- file += writestr(as(labtn(j)), 0);
+ file += writestr(Rcpp::as(labtn(j)), 0);
}
@@ -208,7 +206,7 @@ void writepor(const char * filePath, Rcpp::DataFrame dat)
if (debug)
- Rcout << "--- F ---" << std::endl;
+ Rcpp::Rcout << "--- F ---" << std::endl;
for (int64_t i = 0; i < n; ++i) {
@@ -243,13 +241,13 @@ void writepor(const char * filePath, Rcpp::DataFrame dat)
default:
{
- CharacterVector cv_s = NA_STRING;
- cv_s = as(dat[j])[i];
+ Rcpp::CharacterVector cv_s = NA_STRING;
+ cv_s = Rcpp::as(dat[j])[i];
std::string val_s = "";
if (cv_s[0] != NA_STRING)
- val_s = as(cv_s);
+ val_s = Rcpp::as(cv_s);
file += writestr(val_s, 0);
break;
diff --git a/src/writesav.cpp b/src/writesav.cpp
index a803940..f1d0d62 100644
--- a/src/writesav.cpp
+++ b/src/writesav.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2019 Jan Marvin Garbuszus
+ * Copyright (C) 2014-2025 Jan Marvin Garbuszus
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -21,8 +21,6 @@
#include
#include
-using namespace Rcpp;
-
#include "spss.h"
#include "write_data.h"
#include "write_sav_compress.h"
@@ -265,7 +263,7 @@ void writesav(const char * filePath, Rcpp::DataFrame dat, uint8_t compress,
uint8_t lablen = lab.size();
if (lablen > 120) {
lablen = 120;
- warning("Label longer than 120 characters found. Trimmed to 120.");
+ Rcpp::warning("Label longer than 120 characters found. Trimmed to 120.");
}
writebin(lablen, sav, swapit);