diff --git a/patsy/commands/export.py b/patsy/commands/export.py index 9a60411..6920550 100644 --- a/patsy/commands/export.py +++ b/patsy/commands/export.py @@ -30,23 +30,33 @@ def configure_cli(subparsers) -> None: # type: ignore help='The (optional) file to write output to. Defaults to standard out' ) + parser.add_argument( + '-u', '--untransferred', + action='store_true', + default=False, + help='Export only those records with an empty storage_location.' + ) + class Command(patsy.core.command.Command): def __call__(self, args: argparse.Namespace, gateway: DbGateway) -> str: batch = args.batch output = args.output + filter = args.untransferred # Display batch configuration information to the user sys.stderr.write( f'Running export command with the following options:\n\n' f' - batch: {batch}\n' f' - output: {output}\n' + f' - untransferred: {filter}\n' '======\n' ) export_impl = Export(gateway) - export_result = export_impl.export(batch, output) + export_result = export_impl.export(batch, output, filter) result_messages = [ - f"Total (non-empty) Batches exported: {export_result.batches_exported}", + f"Total (non-empty) Batches exported: " + + f"{export_result.batches_exported}", f"Total rows exported: {export_result.rows_exported}", "\nEXPORT COMPLETE" ] diff --git a/patsy/core/db_gateway.py b/patsy/core/db_gateway.py index 24b5397..f5a63a4 100644 --- a/patsy/core/db_gateway.py +++ b/patsy/core/db_gateway.py @@ -106,7 +106,9 @@ def get_batch_by_name(self, name: str) -> Optional[Batch]: """ return cast(Optional[Batch], self.session.query(Batch).filter(Batch.name == name).first()) - def get_batch_records(self, batch_name: str) -> List[PatsyRecord]: + def get_batch_records( + self, batch_name: str, untransferred_only=False + ) -> List[PatsyRecord]: """ Returns a (possibly empty) List of PatsyRecord objects representing the data from the given batch. @@ -116,7 +118,15 @@ def get_batch_records(self, batch_name: str) -> List[PatsyRecord]: """ SQL_PATSY_RECORD_BY_NAME = \ "SELECT * FROM patsy_records WHERE batch_name=:batch_name" - sql_stmt = text(SQL_PATSY_RECORD_BY_NAME) + SQL_PATSY_RECORD_BY_NAME_UNTRANSFERRED = \ + '''SELECT * FROM patsy_records + WHERE batch_name=:batch_name + AND storage_location IS NULL''' + + if untransferred_only: + sql_stmt = text(SQL_PATSY_RECORD_BY_NAME_UNTRANSFERRED) + else: + sql_stmt = text(SQL_PATSY_RECORD_BY_NAME) sql_stmt = sql_stmt.bindparams(batch_name=batch_name) patsy_records: List[PatsyRecord] = [] diff --git a/patsy/core/export.py b/patsy/core/export.py index 27b5288..d16c643 100644 --- a/patsy/core/export.py +++ b/patsy/core/export.py @@ -29,7 +29,7 @@ def __init__(self, gateway: DbGateway) -> None: self.gateway = gateway self.export_result = ExportResult() - def export(self, batch: str, output: str) -> ExportResult: + def export(self, batch: str, output: str, untransferred: bool) -> ExportResult: batch_list = [] if batch is None: @@ -40,19 +40,26 @@ def export(self, batch: str, output: str) -> ExportResult: if output is None: out = sys.stdout - self.export_entries(batch_list, out) + self.export_entries(batch_list, out, filter) return self.export_result else: with open(output, mode='w') as file_stream: - self.export_entries(batch_list, file_stream) + self.export_entries(batch_list, file_stream, untransferred) return self.export_result - def export_entries(self, batch_list: List[str], file_stream: TextIO) -> None: - writer = csv.DictWriter(file_stream, fieldnames=Load.ALL_CSV_FIELDS, extrasaction='ignore') - + def export_entries(self, batch_list: List[str], + file_stream: TextIO, untransferred: bool) -> None: + if untransferred: + fieldnames = Load.TRANSFER_MANIFEST_CSV_FIELDS + else: + fieldnames = Load.ALL_CSV_FIELDS + writer = csv.DictWriter( + file_stream, fieldnames=fieldnames, extrasaction='ignore' + ) writer.writeheader() + for b in batch_list: - batch_records = self.gateway.get_batch_records(b) + batch_records = self.gateway.get_batch_records(b, untransferred) if len(batch_records) > 0: self.export_result.batches_exported += 1 for patsy_record in batch_records: diff --git a/patsy/core/load.py b/patsy/core/load.py index d58db1d..6f0bc07 100644 --- a/patsy/core/load.py +++ b/patsy/core/load.py @@ -20,21 +20,23 @@ def __repr__(self) -> str: f"locations_added='{self.locations_added}'", f"errors='{self.errors}'" ] - return f"" class Load: - ALL_CSV_FIELDS = [ + TRANSFER_MANIFEST_CSV_FIELDS = [ 'BATCH', 'PATH', 'DIRECTORY', 'RELPATH', 'FILENAME', 'EXTENSION', - 'BYTES', 'MTIME', 'MODDATE', 'MD5', 'SHA1', 'SHA256', - 'storageprovider', 'storagepath' + 'BYTES', 'MTIME', 'MODDATE', 'MD5', 'SHA1', 'SHA256' ] + # The following fields are not required in the CSV file + ALLOWED_MISSING_FIELDS = ['storageprovider', 'storagepath'] + + ALL_CSV_FIELDS = TRANSFER_MANIFEST_CSV_FIELDS + ALLOWED_MISSING_FIELDS + # Fields that must be present in the CSV, with non-empty content REQUIRED_CONTENT_CSV_FIELDS = [ - 'BATCH', 'RELPATH', 'FILENAME', 'BYTES', 'MD5', - + 'BATCH', 'RELPATH', 'FILENAME', 'BYTES', 'MD5' ] # Fields that must be present, but may be empty @@ -44,11 +46,6 @@ class Load: REQUIRED_CSV_FIELDS = REQUIRED_CONTENT_CSV_FIELDS + ALLOWED_EMPTY_CSV_FIELDS - # The following fields are not required in the CSV file - ALLOWED_MISSING_FIELDS = [ - 'storageprovider', 'storagepath' - ] - def __init__(self, gateway: DbGateway) -> None: self.gateway = gateway self.load_result = LoadResult() @@ -64,20 +61,27 @@ def process_file(self, file: str) -> LoadResult: csv_line_index += 1 if add_result: - self.load_result.batches_added += add_result.batches_added - self.load_result.accessions_added += add_result.accessions_added - self.load_result.locations_added += add_result.locations_added + self.load_result.batches_added += \ + add_result.batches_added + self.load_result.accessions_added += \ + add_result.accessions_added + self.load_result.locations_added += \ + add_result.locations_added return self.load_result - def process_csv_row(self, csv_line_index: int, row: Dict[str, str]) -> Optional[AddResult]: + def process_csv_row( + self, csv_line_index: int, row: Dict[str, str] + ) -> Optional[AddResult]: if not self.is_row_valid(csv_line_index, row): return None patsy_record = PatsyUtils.from_inventory_csv(row) return self.gateway.add(patsy_record) - def is_row_valid(self, csv_line_index: int, row_dict: Dict[str, str]) -> bool: + def is_row_valid( + self, csv_line_index: int, row_dict: Dict[str, str] + ) -> bool: """ Returns True if the given row is valid, False otherwise. """ @@ -95,7 +99,8 @@ def is_row_valid(self, csv_line_index: int, row_dict: Dict[str, str]) -> bool: if missing_fields or missing_values: self.load_result.errors.append( - f"Line {csv_line_index}, missing_fields: {missing_fields}, missing_values = {missing_values}" + f"Line {csv_line_index}, missing_fields:" + + f" {missing_fields}, missing_values = {missing_values}" ) return False return True diff --git a/patsy/database.py b/patsy/database.py index 20d867e..db8f318 100644 --- a/patsy/database.py +++ b/patsy/database.py @@ -12,18 +12,18 @@ def use_database_file(database: str) -> None: # Set up database file or use in-memory db if database == ":memory:": - sys.stderr.write(f"Using a transient in-memory database...") + sys.stderr.write(f"Using a transient in-memory database...\n") db_path = f"sqlite:///{database}" elif database.startswith('postgresql:'): - sys.stderr.write(f"Using postgres database at {database}") + sys.stderr.write(f"Using postgres database at {database}\n") db_path = database else: - sys.stderr.write(f"Using database at {database}...") + sys.stderr.write(f"Using database at {database}...\n") db_path = f"sqlite:///{database}" - sys.stderr.write("Binding the database session...") + sys.stderr.write("Binding the database session...\n") engine = create_engine(db_path)