diff --git a/pyproject.toml b/pyproject.toml index 6bae7791d..f545dc077 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ dependencies = [ "matplotlib", "numpy", "opm>=2023.04", - "pandas", + "pandas >= 2", "pydantic", "pyscal", "pyyaml", diff --git a/src/subscript/fmuobs/parsers.py b/src/subscript/fmuobs/parsers.py index 1321db43b..6d9adfa0b 100644 --- a/src/subscript/fmuobs/parsers.py +++ b/src/subscript/fmuobs/parsers.py @@ -5,7 +5,6 @@ import re from pathlib import Path -import numpy as np import pandas as pd from subscript import getLogger @@ -431,20 +430,25 @@ def compute_date_from_days( to this starttime, and converted to DATE. Returns: - pd.DataFrame. DATE column is always of type datetime64 + pd.DataFrame. DATE column is always datetime-like + (datetime64 unit depends on pandas) """ assert isinstance(dframe, pd.DataFrame) - if starttime and "DAYS" in dframe: - if "DATE" not in dframe: - dframe["DATE"] = np.nan - start = pd.to_datetime(starttime) - date_needed_rows = ~dframe["DAYS"].isna() & dframe["DATE"].isna() - dframe["DATE"] = pd.to_datetime(dframe["DATE"]) - dframe.loc[date_needed_rows, "DATE"] = start + pd.to_timedelta( - dframe.loc[date_needed_rows, "DAYS"], "d" - ) + if "DATE" in dframe: dframe["DATE"] = pd.to_datetime(dframe["DATE"]) + + if not starttime or "DAYS" not in dframe: + return dframe + + start = pd.to_datetime(starttime) + computed_dates = start + pd.to_timedelta(dframe["DAYS"], unit="D") + + if "DATE" in dframe: + dframe["DATE"] = dframe["DATE"].combine_first(computed_dates) + else: + dframe["DATE"] = computed_dates + return dframe diff --git a/src/subscript/fmuobs/writers.py b/src/subscript/fmuobs/writers.py index 39bc15ccb..b1ebcacb9 100644 --- a/src/subscript/fmuobs/writers.py +++ b/src/subscript/fmuobs/writers.py @@ -292,14 +292,10 @@ def convert_dframe_date_to_str(dframe: pd.DataFrame) -> pd.DataFrame: pd.DataFrame: DATE as a string type """ if "DATE" in dframe: - with pd.option_context("future.no_silent_downcasting", True): - dframe = dframe.copy() - dframe["DATE"] = ( - dframe["DATE"] - .astype(str) - .replace(["NaT", "NaN", "nan"], np.nan) - .infer_objects(copy=False) - ) + dframe = dframe.copy() + dframe["DATE"] = ( + dframe["DATE"].astype(str).replace(["NaT", "NaN", "nan"], np.nan) + ) return dframe diff --git a/tests/test_check_swatinit.py b/tests/test_check_swatinit.py index 8cef86cf4..3e5d6d6cd 100644 --- a/tests/test_check_swatinit.py +++ b/tests/test_check_swatinit.py @@ -558,7 +558,9 @@ def test_eqlnum2(tmp_path, mocker): def test_reorder_dframe_for_nonnans(inputrows, expected): """Test that rows with less NaNs will be prioritized through the reorder function""" pd.testing.assert_frame_equal( - reorder_dframe_for_nonnans(pd.DataFrame(inputrows)), pd.DataFrame(expected) + reorder_dframe_for_nonnans(pd.DataFrame(inputrows)), + pd.DataFrame(expected), + check_column_type=False, ) diff --git a/tests/test_csv2ofmvol.py b/tests/test_csv2ofmvol.py index cfdb0dd9e..d9744641d 100644 --- a/tests/test_csv2ofmvol.py +++ b/tests/test_csv2ofmvol.py @@ -279,7 +279,9 @@ def test_df2vol(dframe, expected_lines): else: # (bogus columns in dframe must be ignored) pd.testing.assert_frame_equal( - dframe[backagain_df.columns].fillna(value=0.0), backagain_df + dframe[backagain_df.columns].fillna(value=0.0), + backagain_df, + check_index_type=False, ) diff --git a/tests/test_fmuobs.py b/tests/test_fmuobs.py index 8a4125e72..c8ddf5bc5 100644 --- a/tests/test_fmuobs.py +++ b/tests/test_fmuobs.py @@ -198,6 +198,7 @@ def test_roundtrip_yaml(filename, readonly_testdata_dir): yaml_roundtrip_dframe.sort_index(axis="columns").sort_values("LABEL"), dframe.sort_index(axis="columns").sort_values("LABEL"), check_like=True, + check_dtype=False, ) diff --git a/tests/test_fmuobs_parsers.py b/tests/test_fmuobs_parsers.py index fc96f1e63..b824dc5de 100644 --- a/tests/test_fmuobs_parsers.py +++ b/tests/test_fmuobs_parsers.py @@ -508,11 +508,13 @@ def test_ertobs2df_starttime(string, expected): pd.testing.assert_frame_equal( ertobs2df(string, starttime="2020-01-01").sort_index(axis=1), expected.sort_index(axis=1), + check_dtype=False, ) # Test again with datetime object passed, not string: pd.testing.assert_frame_equal( ertobs2df(string, starttime=datetime.date(2020, 1, 1)).sort_index(axis=1), expected.sort_index(axis=1), + check_dtype=False, ) diff --git a/tests/test_fmuobs_writers.py b/tests/test_fmuobs_writers.py index 1aa21bfc5..a61e99457 100644 --- a/tests/test_fmuobs_writers.py +++ b/tests/test_fmuobs_writers.py @@ -533,6 +533,7 @@ def test_convert_dframe_date_to_str(dframe, expected_dframe): pd.testing.assert_frame_equal( convert_dframe_date_to_str(dframe), expected_dframe, + check_dtype=False, ) diff --git a/tests/test_ofmvol2csv.py b/tests/test_ofmvol2csv.py index 15f8c1364..3581a8943 100644 --- a/tests/test_ofmvol2csv.py +++ b/tests/test_ofmvol2csv.py @@ -270,7 +270,7 @@ def test_parse_well(inputlines, expected): inputlines = ofmvol2csv.cleanse_ofm_lines(inputlines) colnames = ofmvol2csv.extract_columnnames(inputlines) dframe = ofmvol2csv.parse_well(inputlines[1:], colnames) - pd.testing.assert_frame_equal(dframe, expected) + pd.testing.assert_frame_equal(dframe, expected, check_index_type=False) @pytest.mark.parametrize( @@ -362,7 +362,11 @@ def test_process_volstr(inputlines, expected): expected["DATE"] = pd.to_datetime(expected["DATE"]) expected = expected.set_index(["WELL", "DATE"]) dframe = ofmvol2csv.process_volstr("\n".join(inputlines)) - pd.testing.assert_frame_equal(dframe, expected) + pd.testing.assert_frame_equal( + dframe, + expected, + check_index_type=False, + ) @pytest.mark.parametrize(