From 349507f215a168c5eb4d2d00a129f8ae234a6e07 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 10 Sep 2025 16:49:08 +0200 Subject: [PATCH 1/4] fix: convert dtypes during joining --- src/lyscripts/data/join.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lyscripts/data/join.py b/src/lyscripts/data/join.py index d7eb62e..a48db9a 100644 --- a/src/lyscripts/data/join.py +++ b/src/lyscripts/data/join.py @@ -60,7 +60,9 @@ def cli_cmd(self) -> None: joined = None for data_config in self.inputs: - data = data_config.load() + # `.convert_dtypes()` ensures that e.g. boolean values are not suddenly + # converted to strings when a dataset with missing values is concatenated. + data = data_config.load().convert_dtypes() if joined is None: joined = data else: From e017b881403763850692059a04a161649e180865 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 10 Sep 2025 16:55:43 +0200 Subject: [PATCH 2/4] chore: update changelog --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b0fcda..76e866e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. +## [1.0.1] - 2025-09-10 + +### Bug Fixes + +- `.convert_dtypes()` during joining of tables. Not doing this caused pandas to interpret e.g. booleans with missing values as strings. + ## [1.0.0] - 2025-09-04 ### Bug Fixes @@ -909,6 +915,7 @@ returns `None` instead. Fixes [#11] ## [0.5.3] - 2022-08-22 +[1.0.1]: https://github.com/lycosystem/lyscripts/compare/1.0.0...1.0.1 [1.0.0]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc3...1.0.0 [1.0.0rc3]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc2...1.0.0rc3 [1.0.0rc2]: https://github.com/lycosystem/lyscripts/compare/1.0.0rc1...1.0.0rc2 From 8adbf4a4a6b351be7d422c1c031d2d1b123192bc Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 10 Sep 2025 17:00:04 +0200 Subject: [PATCH 3/4] change: use lydata's `cast_dtypes()` --- src/lyscripts/data/join.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lyscripts/data/join.py b/src/lyscripts/data/join.py index a48db9a..8b0547e 100644 --- a/src/lyscripts/data/join.py +++ b/src/lyscripts/data/join.py @@ -3,6 +3,7 @@ from pathlib import Path import pandas as pd +from lydata.validator import cast_dtypes from pydantic import Field from lyscripts.cli import assemble_main @@ -60,9 +61,10 @@ def cli_cmd(self) -> None: joined = None for data_config in self.inputs: - # `.convert_dtypes()` ensures that e.g. boolean values are not suddenly + data = data_config.load() + # `cast_dtypes()` ensures that e.g. boolean values are not suddenly # converted to strings when a dataset with missing values is concatenated. - data = data_config.load().convert_dtypes() + data = cast_dtypes(data) if joined is None: joined = data else: From 9848cb5def0ec56218b189ccb6b254879dc4c0cd Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 10 Sep 2025 17:00:47 +0200 Subject: [PATCH 4/4] chore: update changlog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76e866e..c5e5ef2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file. ### Bug Fixes -- `.convert_dtypes()` during joining of tables. Not doing this caused pandas to interpret e.g. booleans with missing values as strings. +- Convert dtypes during joining using lydata's `cast_dtypes()`. ## [1.0.0] - 2025-09-04