From 3035afc9482ce5f6f73df5cd13dc53ca0dd87bdb Mon Sep 17 00:00:00 2001 From: Baptiste Rajaut Date: Tue, 9 Apr 2024 17:35:13 +0200 Subject: [PATCH] feat: added a script launcher fix : the canary scripts now output their whole file path --- canary_launcher.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++ docx_canary.py | 2 +- pptx_canary.py | 2 +- xlsx_canary.py | 2 +- 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 canary_launcher.py diff --git a/canary_launcher.py b/canary_launcher.py new file mode 100644 index 0000000..d7d19be --- /dev/null +++ b/canary_launcher.py @@ -0,0 +1,70 @@ +import os +import subprocess +import json +import tempfile +import time + +# Mapping file extensions to corresponding Python scripts +file_mapping = { + '.docx': 'docx_canary.py', + '.sql': 'mysql_canary.py', + '.pptx': 'pptx_canary.py', + '.xlsx': 'xlsx_canary.py', +} + +def process_file(file_path, temp_json_path, generate_json): + _, extension = os.path.splitext(file_path) + + if extension in file_mapping: + script_name = file_mapping[extension] + command = f"python {script_name} --input \"{file_path}\"" + # Add JSON output option if JSON generation is requested + if generate_json: + temp_output = f"{temp_json_path}/{int(time.time() * 1000)}.json" + command += f" --json \"{temp_output}\"" + print(f"Executing: {command}") + try: + subprocess.run(command, shell=True, check=True) + except subprocess.CalledProcessError as e: + print(f"Error executing {script_name} on {file_path}: {e}") + +def explore_directory(directory, temp_json_path, generate_json): + for root, _, files in os.walk(directory): + for file in files: + file_path = os.path.join(root, file) + process_file(file_path, temp_json_path, generate_json) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Explore a directory and execute scripts based on file extensions.") + parser.add_argument('--directory', '-d', type=str, required=True, help="Path to the directory to explore.") + parser.add_argument('--json', '-j', type=str, help="Output path for the combined JSON file.") + args = parser.parse_args() + + generate_json = bool(args.json) + + if generate_json: + with tempfile.TemporaryDirectory() as tmp_dir: + explore_directory(args.directory, tmp_dir, generate_json) + + # Combine all the JSON results into one file + combined_results = [] + for root, dirs, files in os.walk(tmp_dir): + for file in files: + if file.endswith('.json'): + file_path = os.path.join(root, file) + with open(file_path, 'r') as f: + try: + json_data = json.load(f) + combined_results.extend(json_data if isinstance(json_data, list) else [json_data]) + except json.JSONDecodeError as e: + print(f"Error decoding JSON from {file_path}: {e}") + + with open(args.json, 'w') as f: + json.dump(combined_results, f, indent=4) + + print(f"All results have been combined into {args.json}") + else: + # Just execute the scripts without generating JSON output + explore_directory(args.directory, None, generate_json) diff --git a/docx_canary.py b/docx_canary.py index 8693747..e9401de 100644 --- a/docx_canary.py +++ b/docx_canary.py @@ -81,7 +81,7 @@ def main(): "md5": md5, "sha1": sha1 } - write_to_json(args.json, args.input.split('/')[-1], data_to_export) + write_to_json(args.json, args.input, data_to_export) print(f"\nResults have been written to {args.json}") if __name__ == "__main__": diff --git a/pptx_canary.py b/pptx_canary.py index a729f71..e1d195f 100644 --- a/pptx_canary.py +++ b/pptx_canary.py @@ -80,7 +80,7 @@ def main(): "md5": md5, "sha1": sha1 } - write_to_json(args.json, args.input.split('/')[-1], data_to_export) + write_to_json(args.json, args.input, data_to_export) print(f"\nResults have been written to {args.json}") if __name__ == "__main__": diff --git a/xlsx_canary.py b/xlsx_canary.py index 72baf38..ee7fcd4 100644 --- a/xlsx_canary.py +++ b/xlsx_canary.py @@ -102,7 +102,7 @@ def main(): "md5": md5, "sha1": sha1 } - write_to_json(args.json, args.input.split('/')[-1], data_to_export) + write_to_json(args.json, args.input, data_to_export) print(f"\nResults have been written to {args.json}") if __name__ == "__main__":