From 4ac8be796c43cbfd56ac13eefa5ce4218cde634c Mon Sep 17 00:00:00 2001 From: Joshua Westgard Date: Thu, 16 Sep 2021 17:53:49 -0400 Subject: [PATCH 1/2] add untransferred option to export command --- patsy/commands/export.py | 9 +++++++++ patsy/core/db_gateway.py | 11 ++++++++++- patsy/core/export.py | 10 ++++++++-- patsy/core/load.py | 17 ++++++++--------- patsy/database.py | 8 ++++---- 5 files changed, 39 insertions(+), 16 deletions(-) diff --git a/patsy/commands/export.py b/patsy/commands/export.py index 9a60411..09ba18f 100644 --- a/patsy/commands/export.py +++ b/patsy/commands/export.py @@ -30,16 +30,25 @@ def configure_cli(subparsers) -> None: # type: ignore help='The (optional) file to write output to. Defaults to standard out' ) + parser.add_argument( + '-u', '--untransferred', + action='store_true', + default=False, + help='Export only those records with an empty storage_location.' + ) + class Command(patsy.core.command.Command): def __call__(self, args: argparse.Namespace, gateway: DbGateway) -> str: batch = args.batch output = args.output + untransferred = args.untransferred # Display batch configuration information to the user sys.stderr.write( f'Running export command with the following options:\n\n' f' - batch: {batch}\n' f' - output: {output}\n' + f' - untransferred: {untransferred}\n' '======\n' ) diff --git a/patsy/core/db_gateway.py b/patsy/core/db_gateway.py index 24b5397..05a1b52 100644 --- a/patsy/core/db_gateway.py +++ b/patsy/core/db_gateway.py @@ -20,6 +20,7 @@ def __init__(self, args: Namespace) -> None: use_database_file(args.database) self.session = Session() self.batch_ids: Dict[str, int] = {} + self.filter_untransferred = args.untransferred def add(self, patsy_record: PatsyRecord) -> AddResult: self.add_result = AddResult() @@ -116,7 +117,15 @@ def get_batch_records(self, batch_name: str) -> List[PatsyRecord]: """ SQL_PATSY_RECORD_BY_NAME = \ "SELECT * FROM patsy_records WHERE batch_name=:batch_name" - sql_stmt = text(SQL_PATSY_RECORD_BY_NAME) + SQL_PATSY_RECORD_BY_NAME_UNTRANSFERRED = \ + '''SELECT * FROM patsy_records + WHERE batch_name=:batch_name + AND storage_location IS NULL''' + + if self.filter_untransferred: + sql_stmt = text(SQL_PATSY_RECORD_BY_NAME_UNTRANSFERRED) + else: + sql_stmt = text(SQL_PATSY_RECORD_BY_NAME) sql_stmt = sql_stmt.bindparams(batch_name=batch_name) patsy_records: List[PatsyRecord] = [] diff --git a/patsy/core/export.py b/patsy/core/export.py index 27b5288..0dac51a 100644 --- a/patsy/core/export.py +++ b/patsy/core/export.py @@ -48,9 +48,15 @@ def export(self, batch: str, output: str) -> ExportResult: return self.export_result def export_entries(self, batch_list: List[str], file_stream: TextIO) -> None: - writer = csv.DictWriter(file_stream, fieldnames=Load.ALL_CSV_FIELDS, extrasaction='ignore') - + if self.gateway.filter_untransferred: + fieldnames = Load.TRANSFER_MANIFEST_CSV_FIELDS + else: + fieldnames = Load.ALL_CSV_FIELDS + writer = csv.DictWriter( + file_stream, fieldnames=fieldnames, extrasaction='ignore' + ) writer.writeheader() + for b in batch_list: batch_records = self.gateway.get_batch_records(b) if len(batch_records) > 0: diff --git a/patsy/core/load.py b/patsy/core/load.py index d58db1d..34bb57f 100644 --- a/patsy/core/load.py +++ b/patsy/core/load.py @@ -25,16 +25,19 @@ def __repr__(self) -> str: class Load: - ALL_CSV_FIELDS = [ + TRANSFER_MANIFEST_CSV_FIELDS = [ 'BATCH', 'PATH', 'DIRECTORY', 'RELPATH', 'FILENAME', 'EXTENSION', - 'BYTES', 'MTIME', 'MODDATE', 'MD5', 'SHA1', 'SHA256', - 'storageprovider', 'storagepath' + 'BYTES', 'MTIME', 'MODDATE', 'MD5', 'SHA1', 'SHA256' ] + # The following fields are not required in the CSV file + ALLOWED_MISSING_FIELDS = ['storageprovider', 'storagepath'] + + ALL_CSV_FIELDS = TRANSFER_MANIFEST_CSV_FIELDS + ALLOWED_MISSING_FIELDS + # Fields that must be present in the CSV, with non-empty content REQUIRED_CONTENT_CSV_FIELDS = [ - 'BATCH', 'RELPATH', 'FILENAME', 'BYTES', 'MD5', - + 'BATCH', 'RELPATH', 'FILENAME', 'BYTES', 'MD5' ] # Fields that must be present, but may be empty @@ -44,10 +47,6 @@ class Load: REQUIRED_CSV_FIELDS = REQUIRED_CONTENT_CSV_FIELDS + ALLOWED_EMPTY_CSV_FIELDS - # The following fields are not required in the CSV file - ALLOWED_MISSING_FIELDS = [ - 'storageprovider', 'storagepath' - ] def __init__(self, gateway: DbGateway) -> None: self.gateway = gateway diff --git a/patsy/database.py b/patsy/database.py index 20d867e..db8f318 100644 --- a/patsy/database.py +++ b/patsy/database.py @@ -12,18 +12,18 @@ def use_database_file(database: str) -> None: # Set up database file or use in-memory db if database == ":memory:": - sys.stderr.write(f"Using a transient in-memory database...") + sys.stderr.write(f"Using a transient in-memory database...\n") db_path = f"sqlite:///{database}" elif database.startswith('postgresql:'): - sys.stderr.write(f"Using postgres database at {database}") + sys.stderr.write(f"Using postgres database at {database}\n") db_path = database else: - sys.stderr.write(f"Using database at {database}...") + sys.stderr.write(f"Using database at {database}...\n") db_path = f"sqlite:///{database}" - sys.stderr.write("Binding the database session...") + sys.stderr.write("Binding the database session...\n") engine = create_engine(db_path) From c7715b361c14643aca26712eaecead58569af409 Mon Sep 17 00:00:00 2001 From: Joshua Westgard Date: Fri, 17 Sep 2021 13:05:02 -0400 Subject: [PATCH 2/2] pycodestyle fixes, fix unittest failures, move untransferred filter out of db_gateway init and into this chain of methods: Export.export(), Export.export_entries(), DbGateway.get_batch_records() --- patsy/commands/export.py | 9 +++++---- patsy/core/db_gateway.py | 9 +++++---- patsy/core/export.py | 13 +++++++------ patsy/core/load.py | 22 ++++++++++++++-------- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/patsy/commands/export.py b/patsy/commands/export.py index 09ba18f..6920550 100644 --- a/patsy/commands/export.py +++ b/patsy/commands/export.py @@ -42,20 +42,21 @@ class Command(patsy.core.command.Command): def __call__(self, args: argparse.Namespace, gateway: DbGateway) -> str: batch = args.batch output = args.output - untransferred = args.untransferred + filter = args.untransferred # Display batch configuration information to the user sys.stderr.write( f'Running export command with the following options:\n\n' f' - batch: {batch}\n' f' - output: {output}\n' - f' - untransferred: {untransferred}\n' + f' - untransferred: {filter}\n' '======\n' ) export_impl = Export(gateway) - export_result = export_impl.export(batch, output) + export_result = export_impl.export(batch, output, filter) result_messages = [ - f"Total (non-empty) Batches exported: {export_result.batches_exported}", + f"Total (non-empty) Batches exported: " + + f"{export_result.batches_exported}", f"Total rows exported: {export_result.rows_exported}", "\nEXPORT COMPLETE" ] diff --git a/patsy/core/db_gateway.py b/patsy/core/db_gateway.py index 05a1b52..f5a63a4 100644 --- a/patsy/core/db_gateway.py +++ b/patsy/core/db_gateway.py @@ -20,7 +20,6 @@ def __init__(self, args: Namespace) -> None: use_database_file(args.database) self.session = Session() self.batch_ids: Dict[str, int] = {} - self.filter_untransferred = args.untransferred def add(self, patsy_record: PatsyRecord) -> AddResult: self.add_result = AddResult() @@ -107,7 +106,9 @@ def get_batch_by_name(self, name: str) -> Optional[Batch]: """ return cast(Optional[Batch], self.session.query(Batch).filter(Batch.name == name).first()) - def get_batch_records(self, batch_name: str) -> List[PatsyRecord]: + def get_batch_records( + self, batch_name: str, untransferred_only=False + ) -> List[PatsyRecord]: """ Returns a (possibly empty) List of PatsyRecord objects representing the data from the given batch. @@ -118,11 +119,11 @@ def get_batch_records(self, batch_name: str) -> List[PatsyRecord]: SQL_PATSY_RECORD_BY_NAME = \ "SELECT * FROM patsy_records WHERE batch_name=:batch_name" SQL_PATSY_RECORD_BY_NAME_UNTRANSFERRED = \ - '''SELECT * FROM patsy_records + '''SELECT * FROM patsy_records WHERE batch_name=:batch_name AND storage_location IS NULL''' - if self.filter_untransferred: + if untransferred_only: sql_stmt = text(SQL_PATSY_RECORD_BY_NAME_UNTRANSFERRED) else: sql_stmt = text(SQL_PATSY_RECORD_BY_NAME) diff --git a/patsy/core/export.py b/patsy/core/export.py index 0dac51a..d16c643 100644 --- a/patsy/core/export.py +++ b/patsy/core/export.py @@ -29,7 +29,7 @@ def __init__(self, gateway: DbGateway) -> None: self.gateway = gateway self.export_result = ExportResult() - def export(self, batch: str, output: str) -> ExportResult: + def export(self, batch: str, output: str, untransferred: bool) -> ExportResult: batch_list = [] if batch is None: @@ -40,15 +40,16 @@ def export(self, batch: str, output: str) -> ExportResult: if output is None: out = sys.stdout - self.export_entries(batch_list, out) + self.export_entries(batch_list, out, filter) return self.export_result else: with open(output, mode='w') as file_stream: - self.export_entries(batch_list, file_stream) + self.export_entries(batch_list, file_stream, untransferred) return self.export_result - def export_entries(self, batch_list: List[str], file_stream: TextIO) -> None: - if self.gateway.filter_untransferred: + def export_entries(self, batch_list: List[str], + file_stream: TextIO, untransferred: bool) -> None: + if untransferred: fieldnames = Load.TRANSFER_MANIFEST_CSV_FIELDS else: fieldnames = Load.ALL_CSV_FIELDS @@ -58,7 +59,7 @@ def export_entries(self, batch_list: List[str], file_stream: TextIO) -> None: writer.writeheader() for b in batch_list: - batch_records = self.gateway.get_batch_records(b) + batch_records = self.gateway.get_batch_records(b, untransferred) if len(batch_records) > 0: self.export_result.batches_exported += 1 for patsy_record in batch_records: diff --git a/patsy/core/load.py b/patsy/core/load.py index 34bb57f..6f0bc07 100644 --- a/patsy/core/load.py +++ b/patsy/core/load.py @@ -20,7 +20,6 @@ def __repr__(self) -> str: f"locations_added='{self.locations_added}'", f"errors='{self.errors}'" ] - return f"" @@ -47,7 +46,6 @@ class Load: REQUIRED_CSV_FIELDS = REQUIRED_CONTENT_CSV_FIELDS + ALLOWED_EMPTY_CSV_FIELDS - def __init__(self, gateway: DbGateway) -> None: self.gateway = gateway self.load_result = LoadResult() @@ -63,20 +61,27 @@ def process_file(self, file: str) -> LoadResult: csv_line_index += 1 if add_result: - self.load_result.batches_added += add_result.batches_added - self.load_result.accessions_added += add_result.accessions_added - self.load_result.locations_added += add_result.locations_added + self.load_result.batches_added += \ + add_result.batches_added + self.load_result.accessions_added += \ + add_result.accessions_added + self.load_result.locations_added += \ + add_result.locations_added return self.load_result - def process_csv_row(self, csv_line_index: int, row: Dict[str, str]) -> Optional[AddResult]: + def process_csv_row( + self, csv_line_index: int, row: Dict[str, str] + ) -> Optional[AddResult]: if not self.is_row_valid(csv_line_index, row): return None patsy_record = PatsyUtils.from_inventory_csv(row) return self.gateway.add(patsy_record) - def is_row_valid(self, csv_line_index: int, row_dict: Dict[str, str]) -> bool: + def is_row_valid( + self, csv_line_index: int, row_dict: Dict[str, str] + ) -> bool: """ Returns True if the given row is valid, False otherwise. """ @@ -94,7 +99,8 @@ def is_row_valid(self, csv_line_index: int, row_dict: Dict[str, str]) -> bool: if missing_fields or missing_values: self.load_result.errors.append( - f"Line {csv_line_index}, missing_fields: {missing_fields}, missing_values = {missing_values}" + f"Line {csv_line_index}, missing_fields:" + + f" {missing_fields}, missing_values = {missing_values}" ) return False return True