Skip to content

Commit b130bb7

Browse files
authored
HotFix: PG to PG is empty bug (#130)
There was an inconsistency with the types returned from postgres and those being passed in. PG.fetch returned `DataFrame` but PG.save expected `TypedDataFrame` Resulted in a bug where data.is_empty did not exist. This PR aligns the types naively, but this will all be improved in a follow up #129 (which assigns proper/non-empty types) to Postgres dataframes.
1 parent f0979a9 commit b130bb7

File tree

3 files changed

+17
-8
lines changed

3 files changed

+17
-8
lines changed

config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,11 @@ jobs:
4848
ref: PG
4949
table_name: cow.solvers
5050
if_exists: replace
51+
52+
- name: p2p-test
53+
source:
54+
ref: PG
55+
query_string: "SELECT 1 as number, '\\x1234'::bytea as my_bytes;"
56+
destination:
57+
ref: PG
58+
table_name: moo.p2p-test

src/sources/postgres.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from sqlalchemy import create_engine, text
1010
from sqlalchemy.exc import SQLAlchemyError
1111

12-
from src.interfaces import Source
12+
from src.interfaces import Source, TypedDataFrame
1313
from src.logger import log
1414

1515

@@ -41,7 +41,7 @@ def _convert_bytea_to_hex(df: DataFrame) -> DataFrame:
4141
return df
4242

4343

44-
class PostgresSource(Source[DataFrame]):
44+
class PostgresSource(Source[TypedDataFrame]):
4545
"""Represent PostgreSQL as a data source for retrieving data via SQL queries.
4646
4747
This class connects to a PostgreSQL database using SQLAlchemy and executes a query
@@ -100,7 +100,7 @@ def validate(self) -> bool:
100100
log.error("Invalid SQL query: %s", str(e))
101101
return False
102102

103-
async def fetch(self) -> DataFrame:
103+
async def fetch(self) -> TypedDataFrame:
104104
"""Execute the SQL query and retrieves the result as a DataFrame.
105105
106106
Returns
@@ -121,9 +121,10 @@ async def fetch(self) -> DataFrame:
121121
df = await loop.run_in_executor(
122122
None, lambda: pd.read_sql_query(self.query_string, con=self.engine)
123123
)
124-
return _convert_bytea_to_hex(df)
124+
# TODO include types.
125+
return TypedDataFrame(dataframe=_convert_bytea_to_hex(df), types={})
125126

126-
def is_empty(self, data: DataFrame) -> bool:
127+
def is_empty(self, data: TypedDataFrame) -> bool:
127128
"""Check if the provided DataFrame is empty.
128129
129130
Parameters
@@ -137,7 +138,7 @@ def is_empty(self, data: DataFrame) -> bool:
137138
True if the DataFrame is empty, False otherwise.
138139
139140
"""
140-
return data.empty
141+
return data.is_empty()
141142

142143
def _set_query_string(self, query_string: str) -> None:
143144
"""Set the SQL query string directly or from a file if it ends with '.sql'.

tests/unit/sources_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from sqlalchemy.dialects.postgresql import BYTEA
1010

1111
from src.config import RuntimeConfig
12+
from src.interfaces import TypedDataFrame
1213
from src.sources.dune import _reformat_varbinary_columns, dune_result_to_df
1314
from src.sources.postgres import PostgresSource, _convert_bytea_to_hex
1415
from tests import config_root, fixtures_root
@@ -136,5 +137,4 @@ def test_is_empty(self):
136137
db_url="postgresql://postgres:postgres@localhost:5432/postgres",
137138
query_string="SELECT 1",
138139
)
139-
df = pd.DataFrame([])
140-
self.assertTrue(src.is_empty(df))
140+
self.assertTrue(src.is_empty(TypedDataFrame(pd.DataFrame([]), {})))

0 commit comments

Comments
 (0)