From c5d03542dafcc039e36233f9014d38e2832449de Mon Sep 17 00:00:00 2001 From: Gvinfinity Date: Sun, 19 Jun 2022 19:55:34 -0300 Subject: [PATCH 1/2] Parallelism and minor typing fix --- main.py | 29 ++++++++++++++++++----------- src/PPTXtoPDF.py | 17 +++++++++++++---- src/generatePPTX.py | 2 +- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/main.py b/main.py index 41bad96..6bf431b 100644 --- a/main.py +++ b/main.py @@ -2,6 +2,7 @@ from pathlib import Path import glob import shutil +import concurrent.futures from typing import Dict, List import click from pptx.enum.text import PP_ALIGN @@ -73,21 +74,27 @@ def main( os.makedirs(output_dir, exist_ok=True) os.makedirs(Path(output_dir).joinpath('pptx'), exist_ok=True) - if multiple_fields: - parsed_data = readCSVConfig(data) - for value_row in parsed_data['values']: - print(f"Generating certificate for {value_row[0]}") - generateCertificate(model, parsed_data['fields'], - value_row, options, output_dir) - else: - parsed_data = readTXTConfig(data) - for name in parsed_data: - print(f"Generating certificate for {name}") - generateCertificate(model, ['name'], [name], options, output_dir) + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [] + if multiple_fields: + parsed_data = readCSVConfig(data) + for value_row in parsed_data['values']: + print(f"Generating certificate for {value_row[0]}") + futures.append(executor.submit( + generateCertificate, model, parsed_data['fields'], + value_row, options, output_dir)) + else: + parsed_data = readTXTConfig(data) + for name in parsed_data: + print(f"Generating certificate for {name}") + futures.append(executor.submit( + generateCertificate, model, ['name'], [name], options, output_dir)) print("All certificates generated. Merging...") file_paths = glob.glob(f"{output_dir}/*.pdf") mergePDFs(file_paths, output_file_path) + print("Cleaning tmp files...") + shutil.rmtree("./tmp") print("Done.") diff --git a/src/PPTXtoPDF.py b/src/PPTXtoPDF.py index ce70be1..3ba33cf 100644 --- a/src/PPTXtoPDF.py +++ b/src/PPTXtoPDF.py @@ -4,12 +4,21 @@ from PyPDF2 import PdfFileReader, PdfFileWriter -def PPTXtoPDF(file_path: str, dir: str) -> None: +def PPTXtoPDF(file_path: Path, dir: str) -> None: + + dir: Path = Path(dir).resolve() + + # TODO Improve tmpfolder generation + tmpfolder = str(dir.parent) + "/tmp/" + "".join(str(file_path.stem).split()[0] + str(file_path.stem).split()[-1]) + "/0/" + + subprocess.run(["libreoffice", "--headless", "--convert-to", - "pdf", "--outdir", dir, file_path], stdout=subprocess.DEVNULL) + "pdf", "--outdir", str(dir), file_path, + f"-env:UserInstallation=file://{tmpfolder}"], stdout=subprocess.DEVNULL) + - generated_file_path = Path(dir).joinpath( - Path(file_path).stem + ".pdf") + generated_file_path = dir.joinpath( + file_path.stem + ".pdf") reader = PdfFileReader(generated_file_path) writer = PdfFileWriter() diff --git a/src/generatePPTX.py b/src/generatePPTX.py index 5861eb7..296e984 100644 --- a/src/generatePPTX.py +++ b/src/generatePPTX.py @@ -5,7 +5,7 @@ from pptx.dml.color import RGBColor -def generatePPTX(model: str, fields: List[str], data: List[str], options: Dict[str, str], output_dir: str) -> str: +def generatePPTX(model: str, fields: List[str], data: List[str], options: Dict[str, str], output_dir: str) -> Path: prs = Presentation(model) slide = prs.slides[0] From 6960bf8d5bf53a1bb02167ab65d5deafef7e127e Mon Sep 17 00:00:00 2001 From: Gvinfinity Date: Tue, 21 Jun 2022 18:45:04 -0300 Subject: [PATCH 2/2] ProcessPoolExecutor --- data.example.csv | 31 ++++++++++++++++++++++++++- main.py | 52 ++++++++++++++++++--------------------------- src/generatePPTX.py | 17 +++++++++++++-- 3 files changed, 66 insertions(+), 34 deletions(-) diff --git a/data.example.csv b/data.example.csv index a5013dc..fcb47b9 100644 --- a/data.example.csv +++ b/data.example.csv @@ -1,4 +1,33 @@ name,id NAME 1, 001 NAME 2, 002 -NAME 3, 003 \ No newline at end of file +NAME 3, 003 +NAME 4, 004 +NAME 5, 005 +NAME 6, 006 +NAME 7, 007 +NAME 8, 008 +NAME 9, 009 +NAME 10, 010 +NAME 11, 011 +NAME 12, 012 +NAME 13, 013 +NAME 14, 014 +NAME 15, 015 +NAME 16, 016 +NAME 17, 017 +NAME 18, 018 +NAME 19, 019 +NAME 20, 020 +NAME 21, 021 +NAME 22, 022 +NAME 23, 023 +NAME 24, 024 +NAME 25, 025 +NAME 26, 026 +NAME 27, 027 +NAME 28, 028 +NAME 29, 029 +NAME 30, 030 +NAME 31, 031 +NAME 32, 032 diff --git a/main.py b/main.py index 6bf431b..eca1009 100644 --- a/main.py +++ b/main.py @@ -5,7 +5,6 @@ import concurrent.futures from typing import Dict, List import click -from pptx.enum.text import PP_ALIGN from src.log import Log from src.generatePPTX import generatePPTX @@ -51,7 +50,7 @@ def main( OUTPUT_FILE_PATH: Path to the output PDF file with the certificates. (default: ./output/certificates.pdf) """ options = {} - options['align'] = handleAlignOption(align) + options['align'] = align options['font_size'] = font_size options['color'] = color @@ -74,21 +73,25 @@ def main( os.makedirs(output_dir, exist_ok=True) os.makedirs(Path(output_dir).joinpath('pptx'), exist_ok=True) - with concurrent.futures.ThreadPoolExecutor() as executor: - futures = [] - if multiple_fields: - parsed_data = readCSVConfig(data) - for value_row in parsed_data['values']: - print(f"Generating certificate for {value_row[0]}") - futures.append(executor.submit( - generateCertificate, model, parsed_data['fields'], - value_row, options, output_dir)) - else: - parsed_data = readTXTConfig(data) - for name in parsed_data: - print(f"Generating certificate for {name}") - futures.append(executor.submit( - generateCertificate, model, ['name'], [name], options, output_dir)) + try: + with concurrent.futures.ProcessPoolExecutor() as executor: + futures = [] + if multiple_fields: + parsed_data = readCSVConfig(data) + for value_row in parsed_data['values']: + print(f"Generating certificate for {value_row[0]}") + futures.append(executor.submit( + generateCertificate, model, parsed_data['fields'], + value_row, options, output_dir)) + else: + parsed_data = readTXTConfig(data) + for name in parsed_data: + print(f"Generating certificate for {name}") + futures.append(executor.submit( + generateCertificate, model, ['name'], [name], options, output_dir)) + except Exception as e: + print(e) + print(type(e)) print("All certificates generated. Merging...") file_paths = glob.glob(f"{output_dir}/*.pdf") @@ -98,20 +101,7 @@ def main( print("Done.") -def handleAlignOption(align: str): - if align == "left": - return PP_ALIGN.LEFT - elif align == "center": - return PP_ALIGN.CENTER - elif align == "right": - return PP_ALIGN.RIGHT - elif align == "justify": - return PP_ALIGN.JUSTIFY - else: - return None - - -def generateCertificate(model: str, fields: List[str], data: List[str], options: Dict[str, str], output_dir: str): +def generateCertificate(model: str, fields: List[str], data: List[str], options, output_dir: str): pptx_path = generatePPTX(model, fields, data, options, output_dir) PPTXtoPDF(pptx_path, output_dir) diff --git a/src/generatePPTX.py b/src/generatePPTX.py index 296e984..4a5dd64 100644 --- a/src/generatePPTX.py +++ b/src/generatePPTX.py @@ -3,9 +3,22 @@ from pptx import Presentation from pptx.util import Pt from pptx.dml.color import RGBColor +from pptx.enum.text import PP_ALIGN +def handleAlignOption(align: str): + if align == "left": + return PP_ALIGN.LEFT + elif align == "center": + return PP_ALIGN.CENTER + elif align == "right": + return PP_ALIGN.RIGHT + elif align == "justify": + return PP_ALIGN.JUSTIFY + else: + return None -def generatePPTX(model: str, fields: List[str], data: List[str], options: Dict[str, str], output_dir: str) -> Path: +def generatePPTX(model: str, fields: List[str], data: List[str], options, output_dir: str) -> Path: + align = handleAlignOption(options['align']) prs = Presentation(model) slide = prs.slides[0] @@ -21,7 +34,7 @@ def generatePPTX(model: str, fields: List[str], data: List[str], options: Dict[s frame.text = frame.text.replace(field_placeholder, data[field_index]) for paragraph in frame.paragraphs: - paragraph.alignment = options['align'] + paragraph.alignment = align paragraph.font.size = Pt(options['font_size']) paragraph.font.color.rgb = RGBColor.from_string( options['color'])