diff --git a/.gitignore b/.gitignore index 9d79c41..82117d7 100644 --- a/.gitignore +++ b/.gitignore @@ -54,4 +54,6 @@ test/* *.o *.save *.dbf -*.so \ No newline at end of file +*.so +build +.vscode \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..a204c59 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.28) +project(dbf) +set(CMAKE_CXX_STANDARD 17) + +set(CMAKE_CXX_FLAGS -O3) + +include_directories(include) + +add_library(dbf SHARED src/DBaseColDef.cpp + src/DBaseField.cpp + src/DBaseFieldProperty.cpp + src/DBaseFile.cpp + src/DBaseHeader.cpp + src/DBaseRecord.cpp +) \ No newline at end of file diff --git a/include/DBaseFile.h b/include/DBaseFile.h index 9b2cfec..a682a26 100644 --- a/include/DBaseFile.h +++ b/include/DBaseFile.h @@ -12,9 +12,9 @@ struct DBaseFile { /**< Open file and get contents */ - bool openFile(const std::string fileName); + bool openFile(const std::string fileName, bool deferRecordLoading=false); /**< Open file and get contents */ - void stat(); + void stat(bool fileInformation=true, bool columnInformation=true, bool recordInformation=true); /**< \section Member variables */ /**< Header structure */ @@ -24,6 +24,13 @@ struct DBaseFile /**< Data records in the file */ std::vector m_records; + /***/ + bool readRecordDeferred(); + + inline unsigned long long getFileSize() const { + return m_fileSize; + } + private: /**< Read file header safely into std::string */ void readHeader(std::ifstream& iFile); @@ -50,6 +57,15 @@ struct DBaseFile unsigned int m_totalHeaderLength = 0; /**< Header contents (read raw from disk)*/ std::string m_headerData = ""; + /** */ + unsigned int m_dbcSize = 0; + /** */ + std::string m_fileName; + /**
*/ + bool m_headerLoaded = false; + /***/ + bool m_recordLoaded = false; + }; /**< \section Exceptions */ diff --git a/include/DBaseRecord.h b/include/DBaseRecord.h index 4920ecc..c72dd06 100644 --- a/include/DBaseRecord.h +++ b/include/DBaseRecord.h @@ -15,6 +15,9 @@ struct DBaseRecord DBaseRecord(std::string& recordStr, std::vector& iFileColDef); ~DBaseRecord(); void stat(); + std::vector>& getRecordData() { + return m_recordData; + } private: std::vector> m_recordData; diff --git a/src/DBaseColDef.cpp b/src/DBaseColDef.cpp index 347ff7a..6d5297b 100644 --- a/src/DBaseColDef.cpp +++ b/src/DBaseColDef.cpp @@ -37,7 +37,11 @@ DBaseColDef::DBaseColDef(std::string& oneColumn) { } // Length of field in bytes - m_fieldLength = (int)oneColumn.at(16); + m_fieldLength = (uint8_t)oneColumn.at(16); + if (m_fieldType == DBaseFieldType::Character && m_fieldLength > 254) { + std::cout << m_fieldName << " field length (" << m_fieldLength << ") exceeds limit of character type(254), truncate to 254" << std::endl; + m_fieldLength = 254; + } // Number of decimal places m_fieldDecCount = (int)oneColumn.at(17); diff --git a/src/DBaseFile.cpp b/src/DBaseFile.cpp index 0588377..5a08593 100644 --- a/src/DBaseFile.cpp +++ b/src/DBaseFile.cpp @@ -18,16 +18,19 @@ using namespace std; +constexpr unsigned int DBC_SIZE = 263; + /** \brief Constructs dBase structure from given file. * \param Name of the dBase file * \return True if succeded. Otherwise throws exception */ -bool DBaseFile::openFile(const std::string fileName) { +bool DBaseFile::openFile(const std::string fileName, bool deferRecordLoading) { //open file and get file size + m_fileName = fileName; std::ifstream iFile; if(!iFile && iFile.is_open()) { throw fileNotFoundEx("File is already open in another process."); } - iFile.open(fileName, std::ifstream::ate | std::ifstream::binary); + iFile.open(m_fileName, std::ifstream::ate | std::ifstream::binary); m_fileSize = (unsigned long long)iFile.tellg(); iFile.seekg(0, iFile.beg); @@ -39,16 +42,43 @@ bool DBaseFile::openFile(const std::string fileName) { //Read file contents into heap memory readHeader(iFile); + // compatable with viso + if (m_header.m_fileType.find("Visual FoxPro") != std::string::npos) { + m_dbcSize = DBC_SIZE; + } + //Check header - m_colDefLength = m_header.m_numBytesInHeader - m_totalHeaderLength -1; + m_colDefLength = m_header.m_numBytesInHeader - m_fileHeaderLength - m_dbcSize -1; validateBlockSize(m_colDefBlockSize, m_colDefLength); if(!(m_headerData.empty())) { m_header.parse(m_headerData);} validateBlockSize(m_colDefBlockSize, m_colDefLength); + m_headerLoaded = true; //Read rest of file readColDef(iFile, m_header); - readRecords(iFile, m_header); + if (!deferRecordLoading) { + readRecords(iFile, m_header); + m_recordLoaded = true; + } + + iFile.close(); + + return true; +} + + +bool DBaseFile::readRecordDeferred() { + if (!m_headerLoaded) { + return openFile(m_fileName); + } + if (m_recordLoaded) { + return true; + } + std::ifstream iFile; + if(!iFile && iFile.is_open()) { throw fileNotFoundEx("File is already open in another process."); } + iFile.open(m_fileName, std::ifstream::ate | std::ifstream::binary); + readRecords(iFile, m_header); iFile.close(); return true; @@ -65,7 +95,7 @@ void DBaseFile::readHeader(std::ifstream& iFile) { ///Read column definition void DBaseFile::readColDef(std::ifstream& iFile, DBaseHeader& iFileHeader) { //omit terminating byte at header end - unsigned int headerLengthWOTerminatingChar = iFileHeader.m_numBytesInHeader - 1; + unsigned int headerLengthWOTerminatingChar = iFileHeader.m_numBytesInHeader - 1 - (m_dbcSize + 1); iFile.seekg((m_fileHeaderLength), iFile.beg); std::string colDefBuf((headerLengthWOTerminatingChar - m_fileHeaderLength), ' '); iFile.read(&(colDefBuf.at(0)), (headerLengthWOTerminatingChar - m_fileHeaderLength)); @@ -114,21 +144,27 @@ inline void DBaseFile::validateBlockSize(unsigned int& blockSize, unsigned int& } ///Nice formatting for console output -void DBaseFile::stat() { +void DBaseFile::stat(bool fileInformation, bool columnInformation, bool recordInformation) { std::cout << std::endl; + if (fileInformation) { std::cout << "========== FILE INFORMATION ==========" << std::endl; std::cout << std::endl; - m_header.stat(); - std::cout << std::endl; - std::cout << "========= COLUMN INFORMATION =========" << std::endl; - std::cout << std::endl; - for(DBaseColDef d : m_colDef) { - d.stat(); + m_header.stat(); + std::cout << std::endl; } - std::cout << std::endl; - std::cout << "========= RECORD INFORMATION =========" << std::endl; - for(DBaseRecord& r : m_records) { - r.stat(); + if (columnInformation) { + std::cout << "========= COLUMN INFORMATION =========" << std::endl; + std::cout << std::endl; + for(DBaseColDef d : m_colDef) { + d.stat(); + } + std::cout << std::endl; + } + if (recordInformation) { + std::cout << "========= RECORD INFORMATION =========" << std::endl; + for(DBaseRecord& r : m_records) { + r.stat(); + } + std::cout << std::endl; } - std::cout << std::endl; } diff --git a/src/DBaseHeader.cpp b/src/DBaseHeader.cpp index cf4fca2..4d54331 100644 --- a/src/DBaseHeader.cpp +++ b/src/DBaseHeader.cpp @@ -12,11 +12,7 @@ void DBaseHeader::parse(std::string& headerData) { //TODO: read file into m_headerData for(unsigned int i = 0; i < headerData.size(); i++) { currentByte = headerData.at(i); - - //Read file header bit by bit. Spec of DBF files available at: - //http://www.dbf2002.com/dbf-file-format.html - if(currentByte == 0x0D) {break;} - + if(i < m_blockSize) { switch(i) { case 0:{