From aa86438a165e507a32d721ebe543b1cde80f03be Mon Sep 17 00:00:00 2001 From: Paul Kenjora Date: Thu, 24 Apr 2025 16:16:33 -0700 Subject: [PATCH 1/3] Rows not in JSON are just empty rows, return them to align row counts correctly with other tools. --- xlsx/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xlsx/__init__.py b/xlsx/__init__.py index a4ac436..1af115d 100644 --- a/xlsx/__init__.py +++ b/xlsx/__init__.py @@ -153,9 +153,16 @@ def rowsIter(self): sheetDoc = self.workbook.domzip["xl/worksheets/sheet%d.xml" % self.id] sheetData = sheetDoc.find("{http://schemas.openxmlformats.org/spreadsheetml/2006/main}sheetData") # @type sheetData Element + rowCount = 0 for rowNode in sheetData: rowNum = int(rowNode.get("r")) rowCells = [] + + rowCount += 1 + while rowCount < rowNum: + yield rowCount, [] + rowCount += 1 + for columnNode in rowNode: colType = columnNode.get("t") cellId = columnNode.get("r") From 4998f4ed81c0641784534e6330083be11ce4bce9 Mon Sep 17 00:00:00 2001 From: Paul Kenjora Date: Mon, 19 May 2025 15:53:29 -0700 Subject: [PATCH 2/3] Adding handler for null text value. --- xlsx/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xlsx/__init__.py b/xlsx/__init__.py index 1af115d..7452cd0 100644 --- a/xlsx/__init__.py +++ b/xlsx/__init__.py @@ -176,7 +176,7 @@ def rowsIter(self): stringIndex = columnNode[0].text data = self.workbook.sharedStrings[int(stringIndex)] #Built in date-formatted fields - elif cellS and re.match("^[\d\.]+$", columnNode[0].text): + elif cellS and columnNode[0].text and re.match("^[\d\.]+$", columnNode[0].text): if int(self.workbook.cellStyles[int(cellS)].get('numFmtId')) in range(14, 22+1): data = xldate_as_tuple( float(columnNode[0].text), From eb3adf8f4589b22570ec9a83c5f1c2986b4f8fc8 Mon Sep 17 00:00:00 2001 From: Paul Kenjora Date: Mon, 19 May 2025 19:18:01 -0700 Subject: [PATCH 3/3] Return dates as native python types. --- xlsx/__init__.py | 6 +++--- xlsx/xldate.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/xlsx/__init__.py b/xlsx/__init__.py index 7452cd0..eaf5fa2 100644 --- a/xlsx/__init__.py +++ b/xlsx/__init__.py @@ -7,7 +7,7 @@ import re import zipfile -from xlsx.xldate import xldate_as_tuple +from xlsx.xldate import xldate_as_python from xlsx.formatting import is_date_format_string from xlsx.timemachine import UnicodeMixin @@ -178,12 +178,12 @@ def rowsIter(self): #Built in date-formatted fields elif cellS and columnNode[0].text and re.match("^[\d\.]+$", columnNode[0].text): if int(self.workbook.cellStyles[int(cellS)].get('numFmtId')) in range(14, 22+1): - data = xldate_as_tuple( + data = xldate_as_python( float(columnNode[0].text), datemode=0) elif (self.workbook.cellStyles[int(cellS)].get('numFmtId') in self.workbook.numFmts) \ and is_date_format_string(self.workbook.numFmts[self.workbook.cellStyles[int(cellS)].get('numFmtId')]): - data = xldate_as_tuple( + data = xldate_as_python( float(columnNode[0].text), datemode=0) else: diff --git a/xlsx/xldate.py b/xlsx/xldate.py index ed4efd0..23de093 100644 --- a/xlsx/xldate.py +++ b/xlsx/xldate.py @@ -18,6 +18,7 @@ # Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0 # Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0 +from datetime import date, datetime from xlsx.timemachine import int_floor_div as ifd _JDN_delta = (2415080 - 61, 2416482 - 1) @@ -53,6 +54,15 @@ class XLDateBadTuple(XLDateError): pass # @throws XLDateBadDatemode datemode arg is neither 0 nor 1 # @throws XLDateError Covers the 4 specific errors + +def xldate_as_python(xldate, datemode): + value = xldate_as_tuple(xldate, datemode) + if any(value[3:]): + return datetime(*value) + else: + return date(*value[:3]) + + def xldate_as_tuple(xldate, datemode): if datemode not in (0, 1): raise XLDateBadDatemode(datemode)