From b0401e8b69c116f7fe99b4a5889f4ca9eab9ae44 Mon Sep 17 00:00:00 2001 From: gqcolli Date: Fri, 7 Nov 2025 14:45:31 -0700 Subject: [PATCH 1/2] use numpy to export/import sparse tensors --- pyttb/export_data.py | 15 +++++---------- pyttb/import_data.py | 15 ++++++--------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/pyttb/export_data.py b/pyttb/export_data.py index 520e89a2..15f18add 100644 --- a/pyttb/export_data.py +++ b/pyttb/export_data.py @@ -100,7 +100,7 @@ def export_sparse_size(fp: TextIO, A: ttb.sptensor): print(f"{len(A.shape)}", file=fp) # # of dimensions on one line shape_str = " ".join([str(d) for d in A.shape]) print(f"{shape_str}", file=fp) # size of each dimensions on the next line - print(f"{A.nnz}", file=fp) # number of nonzeros + print(f"{A.nnz}", file=fp) # number of nonzeros (could eliminate) def export_sparse_array( @@ -109,12 +109,7 @@ def export_sparse_array( """Export sparse array data in coordinate format.""" if not fmt_data: fmt_data = "%.16e" - # TODO: looping through all values may take a long time, can this be more efficient? - for i in range(A.nnz): - # 0-based indexing in package, 1-based indexing in file - subs = A.subs[i, :] + index_base - subs.tofile(fp, sep=" ", format="%d") - print(end=" ", file=fp) - val = A.vals[i][0] - val.tofile(fp, sep=" ", format=fmt_data) - print(file=fp) + # 0-based indexing in package, 1-based indexing in file + subs = A.subs + index_base + vals = A.vals[:, 0].reshape(-1, 1) + np.savetxt(fp, np.hstack((subs, vals)), fmt="%d " * subs.shape[1] + fmt_data) diff --git a/pyttb/import_data.py b/pyttb/import_data.py index 7c73c26d..757c87f1 100644 --- a/pyttb/import_data.py +++ b/pyttb/import_data.py @@ -46,8 +46,8 @@ def import_data( if data_type == "sptensor": shape = import_shape(fp) - nz = import_nnz(fp) - subs, vals = import_sparse_array(fp, len(shape), nz, index_base) + _ = import_nnz(fp) # Could remove this line from export + subs, vals = import_sparse_array(fp, index_base) return ttb.sptensor(subs, vals, shape) if data_type == "matrix": @@ -96,15 +96,12 @@ def import_rank(fp: TextIO) -> int: def import_sparse_array( - fp: TextIO, n: int, nz: int, index_base: int = 1 + fp: TextIO, index_base: int = 1 ) -> tuple[np.ndarray, np.ndarray]: """Extract sparse data subs and vals from coordinate format data.""" - subs = np.zeros((nz, n), dtype="int64") - vals = np.zeros((nz, 1)) - for k in range(nz): - line = fp.readline().strip().split(" ") - subs[k, :] = [np.int64(i) - index_base for i in line[:-1]] - vals[k, 0] = line[-1] + data = np.loadtxt(fp) + subs = data[:, :-1].astype("int64") - index_base + vals = data[:, -1].reshape(-1, 1) return subs, vals From d974d5c449bdb908f05552ac7f6929eb1256a821 Mon Sep 17 00:00:00 2001 From: gqcolli Date: Fri, 14 Nov 2025 10:03:43 -0700 Subject: [PATCH 2/2] check imported tensor dimensions --- pyttb/export_data.py | 2 +- pyttb/import_data.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pyttb/export_data.py b/pyttb/export_data.py index 15f18add..09dd3d36 100644 --- a/pyttb/export_data.py +++ b/pyttb/export_data.py @@ -100,7 +100,7 @@ def export_sparse_size(fp: TextIO, A: ttb.sptensor): print(f"{len(A.shape)}", file=fp) # # of dimensions on one line shape_str = " ".join([str(d) for d in A.shape]) print(f"{shape_str}", file=fp) # size of each dimensions on the next line - print(f"{A.nnz}", file=fp) # number of nonzeros (could eliminate) + print(f"{A.nnz}", file=fp) # number of nonzeros def export_sparse_array( diff --git a/pyttb/import_data.py b/pyttb/import_data.py index 757c87f1..c31cc167 100644 --- a/pyttb/import_data.py +++ b/pyttb/import_data.py @@ -46,8 +46,8 @@ def import_data( if data_type == "sptensor": shape = import_shape(fp) - _ = import_nnz(fp) # Could remove this line from export - subs, vals = import_sparse_array(fp, index_base) + nz = import_nnz(fp) + subs, vals = import_sparse_array(fp, len(shape), nz, index_base) return ttb.sptensor(subs, vals, shape) if data_type == "matrix": @@ -96,12 +96,16 @@ def import_rank(fp: TextIO) -> int: def import_sparse_array( - fp: TextIO, index_base: int = 1 + fp: TextIO, n: int, nz: int, index_base: int = 1 ) -> tuple[np.ndarray, np.ndarray]: """Extract sparse data subs and vals from coordinate format data.""" data = np.loadtxt(fp) subs = data[:, :-1].astype("int64") - index_base vals = data[:, -1].reshape(-1, 1) + if subs.shape[0] != nz: + raise ValueError("Imported nonzeros are not of expected size") + if subs.shape[1] != n: + raise ValueError("Imported tensor is not of expected shape") return subs, vals