diff --git a/README.md b/README.md index f1b877e..2eac86a 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,54 @@ -# scripts +# Assembly Implode/Explode Scripts -Disclaimer: I can't fully vouch for the quality of these scripts, -which were generated with AI chatbot assistance. If -you are a human with feedback, feel free to ping me on -Slack! +These scripts enable a workflow for preparing OpenShift AsciiDoc assemblies for DITA conversion validation and correction. -I created these scripts with CCS goals in mind (such -as performing Content Quality Assessments), and you -are welcome to use them. +## Overview -### important +The workflow allows you to: +1. **Implode** an assembly - combine all included modules/snippets into a single file +2. **Validate** with DITA-vale or other validation tools +3. **Edit** the combined file to fix DITA conversion issues +4. **Explode** back to original files - extract changes to original module/snippet files -This script is based on the structure of the openshift-docs -repo. If your repo structure is different, you might need -to make changes to your copy of the script. +**End Goal:** Individual `.adoc` files (assemblies, modules, snippets) that are DITA-compatible and ready to commit to GitHub. The large combined file is temporary workspace only. -Also, the current version of this script probably can't handle ifevals around module include statements... sorry! +## Quick Start -It also doesn't support nested assemblies. +```bash +# 1. Implode assembly into single file +./implode-assembly.sh assemblies/my-assembly.adoc -## implode-assembly.sh +# 2. Validate the imploded file +dita-vale ~/imploded_assemblies/assemblies/my-assembly_branch_v1.txt -**implode-assembly.sh** prepares documentation for use -with AI such as NotebookLM by "inlining" the contents -of included modules/snippets while retaining the -assembly context. This way, the AI can also analyze -the markup/raw files in addition to the content. +# 3. Edit the imploded file to fix issues +code ~/imploded_assemblies/assemblies/my-assembly_branch_v1.txt + +# 4. Explode back to original files (requires -o for openshift-docs repo) +./explode-assembly.sh -o ~/openshift-docs ~/imploded_assemblies/assemblies/my-assembly_branch_v1.txt + +# 5. Verify and commit DITA-ready files +cd ~/openshift-docs +git diff +git add assemblies/ modules/ snippets/ +git commit -m "Fix DITA conversion issues" +``` + +See [WORKFLOW.md](WORKFLOW.md) for complete documentation. + +## Important Notes + +**Disclaimer:** These scripts were created with AI chatbot assistance. If you have feedback, feel free to reach out! + +**Requirements:** +- Based on openshift-docs repo structure - may need modifications for other repos +- Cannot handle ifevals around module include statements + +## Scripts + +### implode-assembly.sh + +Combines an assembly and all included modules/snippets into a single file for validation. _usage_ @@ -35,14 +58,18 @@ First, make the script executable: $ chmod +x ./implode-assembly.sh ``` +### Basic Usage + Run the script, passing one or more arguments: -``` -$ ./implode-assembly.sh +```bash +$ ./implode-assembly.sh # or, to implode all assemblies in a directory: - $ ./implode-assembly.sh + +# or, multiple files: +$ ./implode-assembly.sh file1.adoc file2.adoc directory/ ``` The output is saved to a file in a directory called @@ -51,7 +78,7 @@ it is created. If you passed a directory as an argument, all relevant directories are created in `imploded_assemblies`. The imploded assembly document has a default filename -pattern of `__v.txt`, where _n_ +pattern of `__v.txt`, where _n_ increments if a file already exists. For example: @@ -71,15 +98,155 @@ For example: ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ``` -To suppress this output and only print a list of generated -files, run the command with the `-q` or `--quiet` flag. +### Advanced Options -For example: +#### Quiet Mode -``` +To suppress detailed output and only print a list of generated +files, run the command with the `-q` or `--quiet` flag: + +```bash ❯ ~/implode-assembly.sh virt/install -q Generated files: /Users/panousley/imploded_assemblies/virt/install/installing-virt_main_v8.txt /Users/panousley/imploded_assemblies/virt/install/uninstalling-virt_main_v8.txt /Users/panousley/imploded_assemblies/virt/install/preparing-cluster-for-virt_main_v8.txt ``` + +#### File List Mode + +Process multiple files from a text file using the `-f` or `--file-list` option. +Create a text file with one `.adoc` file path per line: + +```bash +# Create a file list +$ cat > myfiles.txt < (with markers) │ +│ snippets/snip1.adoc (for validation) │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 3: VALIDATE & EDIT │ +│ │ +│ Combined File → Edited Combined File │ +│ ───────────── ─────────────────── │ +│ my_v1.txt my_v1.txt │ +│ (DITA validation issues) EDIT (DITA-compatible) │ +│ ═══> (all fixes applied) │ +│ Run: dita-vale my_v1.txt │ +│ Fix: code/vi my_v1.txt │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 4-5: EXPLODE BACK TO INDIVIDUAL FILES │ +│ │ +│ Edited Combined → Updated Individual Files│ +│ ───────────────── ─────────────────────── │ +│ my_v1.txt assemblies/my.adoc │ +│ (with all fixes) EXPLODE modules/mod1.adoc │ +│ ═══> modules/mod2.adoc │ +│ snippets/snip1.adoc │ +│ (ALL DITA-READY!) │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ STEP 6-8: VERIFY, BUILD, COMMIT TO GITHUB │ +│ │ +│ git status → GitHub Repository │ +│ git diff ────────────────── │ +│ asciibinder build COMMIT assemblies/my.adoc │ +│ git add . ═══> modules/mod1.adoc │ +│ git commit modules/mod2.adoc │ +│ git push snippets/snip1.adoc │ +│ │ +│ ✅ DITA-READY FOR │ +│ CONVERSION! │ +└─────────────────────────────────────────────────────────────────────┘ + +KEY POINT: The combined text file (my_v1.txt) is TEMPORARY workspace only. + Your deliverable is the updated individual .adoc files on GitHub. +``` + +## Scripts + +### implode-assembly.sh + +**Purpose:** Combines an assembly file and all its included modules/snippets into a single text file for analysis. + +**Key Features:** +- Recursively inlines all `include::modules/` and `include::snippets/` references +- Preserves original `include::` statements for reference +- Adds `// BEGIN inlined:` and `// END inlined:` markers around inlined content +- Handles nested includes +- Preserves `include::_attributes/` statements (not inlined) +- Adds metadata (timestamp, git branch, source file path) +- Supports combined output mode (multiple assemblies in one file) + +**Usage:** +```bash +# Single assembly file +./implode-assembly.sh assembly.adoc + +# Multiple files from a list +./implode-assembly.sh -f file_list.txt + +# Combined mode (all assemblies in one file) +./implode-assembly.sh -c combined_output.txt -f file_list.txt + +# Quiet mode +./implode-assembly.sh -q assembly.adoc +``` + +**Output Location:** `~/imploded_assemblies/` + +**Output Format:** +``` +// Imploded on: 2025-10-24 14:30:00 +// Git branch: my-feature-branch +// Source file: assemblies/my-assembly.adoc + +[assembly content] +include::modules/my-module.adoc[] +// BEGIN inlined: modules/my-module.adoc +[module content] +// END inlined: modules/my-module.adoc +``` + +### explode-assembly.sh + +**Purpose:** Reverses the implode process by extracting edited content from an imploded file back to original source files. + +**Key Features:** +- Parses `// BEGIN inlined:` and `// END inlined:` markers +- Extracts content for each module/snippet +- Handles nested includes correctly +- Creates backups by default (`.bak` suffix) +- Dry-run mode to preview changes +- Verbose mode for detailed output +- Reconstructs assembly file (without inlined content) +- Preserves final newlines (POSIX text file requirement) + +**Usage:** +```bash +# Basic usage for openshift-docs (REQUIRED: use -o flag) +./explode-assembly.sh -o ~/openshift-docs imploded_file.txt + +# Dry run (see what would happen) +./explode-assembly.sh -n -o ~/openshift-docs imploded_file.txt + +# Verbose output +./explode-assembly.sh -v -o ~/openshift-docs imploded_file.txt + +# No backups +./explode-assembly.sh --no-backup -o ~/openshift-docs imploded_file.txt + +# Dry run + verbose +./explode-assembly.sh -n -v -o ~/openshift-docs imploded_file.txt +``` + +**Important for openshift-docs:** The `-o ~/openshift-docs` flag is REQUIRED because modules and snippets directories are at the repository root, not in the assembly's subdirectory. Without this flag, the script cannot locate the correct output paths. + +## Complete Workflow + +### Step 1: Implode the Assembly + +First, identify the assembly file you want to work with and create an imploded version: + +```bash +cd ~/openshift-docs +./path/to/implode-assembly.sh assemblies/my-assembly.adoc +``` + +This creates: `~/imploded_assemblies/assemblies/my-assembly__v1.txt` + +### Step 2: Validate with DITA Tools + +Run your DITA validation tool (e.g., dita-vale) on the imploded file: + +```bash +dita-vale ~/imploded_assemblies/assemblies/my-assembly_main_v1.txt > validation_report.txt +``` + +### Step 3: Fix Issues in the Imploded File + +Edit the imploded file to fix any DITA conversion issues: + +```bash +code ~/imploded_assemblies/assemblies/my-assembly_main_v1.txt +# or +vi ~/imploded_assemblies/assemblies/my-assembly_main_v1.txt +``` + +**Important editing guidelines:** +- Only edit content BETWEEN the `// BEGIN inlined:` and `// END inlined:` markers +- DO NOT modify the markers themselves +- DO NOT modify the metadata at the top of the file +- DO NOT modify the `include::` statements (they're for reference only) +- Fix AsciiDoc syntax issues as identified by validation + +### Step 4: Dry-Run the Explode + +Before actually writing files, preview what will happen: + +```bash +./path/to/explode-assembly.sh -n -v -o ~/openshift-docs ~/imploded_assemblies/assemblies/my-assembly_main_v1.txt +``` + +Review the output to ensure: +- Correct files will be updated +- Paths are resolved correctly +- Expected number of modules/snippets found + +**Note:** The `-o ~/openshift-docs` flag is required for the openshift-docs repository structure. + +### Step 5: Explode Back to Source Files + +Extract the edited content back to the original files: + +```bash +./path/to/explode-assembly.sh -o ~/openshift-docs ~/imploded_assemblies/assemblies/my-assembly_main_v1.txt +``` + +This will: +- Create `.bak` backups of all modified files +- Write updated content to original module/snippet files +- Update the assembly file (if edited) +- Print summary of files processed + +**Important:** Always use the `-o ~/openshift-docs` flag to ensure files are written to the correct locations in the repository. + +### Step 6: Verify Changes + +Check the changes in your git repository: + +```bash +cd ~/openshift-docs +git status +git diff +``` + +Review each changed file to ensure edits were applied correctly. + +### Step 7: Test the Documentation Build + +Build the documentation to ensure no syntax errors: + +```bash +cd ~/openshift-docs +asciibinder build +``` + +### Step 8: Commit DITA-Ready Files to GitHub + +**This is your end goal!** At this point, your individual `.adoc` files contain all the DITA-compatible fixes. + +If everything looks good, commit your changes: + +```bash +cd ~/openshift-docs + +# Stage all the corrected individual .adoc files +git add assemblies/my-assembly.adoc +git add modules/*.adoc # All updated modules +git add snippets/*.adoc # All updated snippets + +# Commit the DITA-ready files +git commit -m "Fix DITA conversion issues in my-assembly + +- Corrected AsciiDoc elements for DITA XML compatibility +- Validated with DITA-vale +- Ready for DITA conversion" + +# Push to your GitHub fork +git push origin my-branch +``` + +**What you've accomplished:** +- ✅ Your individual `.adoc` files are now DITA-compatible +- ✅ Files are committed to version control +- ✅ Content is ready for clean DITA XML conversion +- ✅ The imploded text file served its purpose and can be deleted or archived + +**Note:** The large combined text file (`~/imploded_assemblies/...`) was only a temporary workspace. You don't need to commit it or keep it after Step 8. Your actual work product is the updated individual `.adoc` files in your git repository. + +## Working with Multiple Assemblies + +### Create a File List + +Create a text file with paths to multiple assemblies: + +```bash +cat > assembly_list.txt <" +**Cause:** The BEGIN/END markers were modified or removed during editing. + +**Solution:** Check that markers are intact: +``` +// BEGIN inlined: modules/my-module.adoc +[content here] +// END inlined: modules/my-module.adoc +``` + +### Issue: Nested includes not handled correctly +**Cause:** Complex nesting structure. + +**Solution:** Use verbose mode (`-v`) to see what's being extracted. The script tracks depth correctly, so this is usually a marker formatting issue. + +### Issue: Wrong base directory / Files not found +**Cause:** The explode script cannot locate modules/snippets directories. This is the expected behavior for openshift-docs repository where these directories are at the root. + +**Solution:** ALWAYS use `-o` flag to specify the repository root: +```bash +./explode-assembly.sh -o ~/openshift-docs imploded_file.txt +``` + +This is REQUIRED for openshift-docs, not optional. The repository structure has modules/ and snippets/ at the root level, while assemblies are in subdirectories like `scalability_and_performance/`. + +### Issue: Files show changes but no visible diff / Missing final newlines +**Cause:** Files are missing their final newline character, which is required by POSIX text file standards. + +**Background:** The explode script now automatically ensures all files end with a newline. In earlier versions, files would lose their final newline during the explode process due to bash command substitution behavior. + +**What this means:** +- All text files should end with a newline character (`\n`) +- Git may show files as changed even when content appears identical +- Editors may warn "No newline at end of file" + +**Solution:** The current version of the explode script automatically handles this: +1. Uses a sentinel technique (`$(cat file; echo x)` + `${var%x}`) to preserve newlines when reading the imploded file +2. Automatically adds a final newline to any file that doesn't have one + +**If you're using an older version of the script:** Update to the latest version from the repository, which includes the final newline preservation fix. + +## File Structure Reference + +### Imploded File Structure +``` +// [AI comment block - multi-line explanation] + +// Imploded on: 2025-10-24 14:30:00 +// Git branch: my-branch +// Source file: assemblies/my-assembly.adoc + +[assembly content with includes preserved] + +include::modules/module1.adoc[] +// BEGIN inlined: modules/module1.adoc +[module1 content] + include::snippets/snippet1.adoc[] + // BEGIN inlined: snippets/snippet1.adoc + [snippet content] + // END inlined: snippets/snippet1.adoc +// END inlined: modules/module1.adoc +``` + +### Directory Structure + +**Imploded Files Location:** +``` +~/imploded_assemblies/ + / + my-assembly_main_v1.txt + my-assembly_main_v2.txt +``` + +**OpenShift-Docs Repository Structure:** +``` +~/openshift-docs/ ← Use this path with -o flag + modules/ ← At repository root + module1.adoc + module2.adoc + cnf-about-*.adoc + [thousands of module files] + snippets/ ← At repository root + snippet1.adoc + scalability_and_performance/ ← Assemblies in subdirectories + cnf-tuning-low-latency-nodes-with-perf-profile.adoc + networking/ + assembly-file.adoc + [other topic directories with assemblies] +``` + +**Why `-o ~/openshift-docs` is Required:** + +The openshift-docs repository has a flat structure where ALL modules and snippets are in top-level directories (`modules/` and `snippets/`), while assemblies are organized in topic-specific subdirectories. When exploding an assembly from `scalability_and_performance/`, the script must write modules to `~/openshift-docs/modules/`, not `~/openshift-docs/scalability_and_performance/modules/`. The `-o` flag tells the script to use the repository root as the base path. + +## Advanced Usage + +### Batch Processing with Error Handling + +```bash +#!/bin/bash +# batch_process.sh + +file_list="assembly_list.txt" +output_dir="$HOME/imploded_for_dita" + +# Implode all assemblies +while IFS= read -r assembly; do + echo "Processing: $assembly" + ./implode-assembly.sh -q "$assembly" || { + echo "ERROR: Failed to implode $assembly" + continue + } +done < "$file_list" + +echo "All assemblies imploded. Run DITA validation, then use explode-assembly.sh to extract changes." +``` + +### Integration with DITA Validation + +```bash +#!/bin/bash +# validate_and_report.sh + +imploded_file="$1" + +if [[ ! -f "$imploded_file" ]]; then + echo "Usage: $0 " + exit 1 +fi + +# Run validation +validation_output="${imploded_file%.txt}_validation.txt" +dita-vale "$imploded_file" > "$validation_output" + +# Show summary +echo "Validation complete. Results:" +grep -c "ERROR" "$validation_output" && echo "Errors found" || echo "No errors" +grep -c "WARNING" "$validation_output" && echo "Warnings found" || echo "No warnings" + +echo "Full report: $validation_output" +``` + +## Summary + +This workflow provides a safe, reversible way to: +1. Consolidate complex multi-file assemblies for validation +2. Fix DITA conversion issues in a single location +3. Automatically distribute fixes back to source files +4. Maintain git history and traceability + +The scripts handle the complexity of nested includes and preserve the structure needed to reverse the process accurately. + +## Final Deliverables Checklist + +After completing the full workflow, you should have: + +### ✅ On GitHub (your deliverables): +- [ ] Updated `assemblies/*.adoc` files with DITA-compatible fixes +- [ ] Updated `modules/*.adoc` files with DITA-compatible fixes +- [ ] Updated `snippets/*.adoc` files with DITA-compatible fixes +- [ ] Git commit with clear message about DITA conversion fixes +- [ ] All files ready for clean DITA XML conversion + +### ✅ In Your Working Directory: +- [ ] Original files have `.bak` backups (created by explode script) +- [ ] `git diff` shows only intentional DITA-compatibility changes +- [ ] `asciibinder build` completes successfully +- [ ] Documentation renders correctly in preview + +### 🗑️ Can Be Deleted (temporary files): +- [ ] `~/imploded_assemblies/*.txt` - Large combined files (workspace only) +- [ ] Validation report files (if no longer needed) +- [ ] `.bak` backup files (after confirming changes are correct) + +### 🎯 Mission Accomplished: +Your individual AsciiDoc files are now DITA-compatible and committed to GitHub, ready for seamless DITA XML conversion! diff --git a/explode-assembly.sh b/explode-assembly.sh new file mode 100755 index 0000000..c596c15 --- /dev/null +++ b/explode-assembly.sh @@ -0,0 +1,382 @@ +#!/bin/bash + +# explode-assembly.sh +# Reverse of implode-assembly.sh - extracts edited content from an imploded file +# back to the original module and snippet files + +set -euo pipefail + +# Configuration +backup_suffix=".bak" +dry_run=false +verbose=false +create_backups=true +output_base_dir="" + +# Parse command line arguments +declare -a args=() +while [[ $# -gt 0 ]]; do + case "$1" in + -n|--dry-run) + dry_run=true + shift + ;; + -v|--verbose) + verbose=true + shift + ;; + --no-backup) + create_backups=false + shift + ;; + -o|--output-dir) + output_base_dir="$2" + shift 2 + ;; + -h|--help) + cat < + +Explodes an imploded assembly file back to its original module and snippet files. + +OPTIONS: + -n, --dry-run Show what would be done without actually writing files + -v, --verbose Show detailed processing information + --no-backup Don't create .bak backup files before overwriting + -o, --output-dir Override output directory (default: extracts from metadata) + -h, --help Show this help message + +EXAMPLES: + $0 imploded_assembly.txt + $0 -n -v imploded_assembly.txt # Dry run with verbose output + $0 --no-backup imploded_assembly.txt # Don't create backups + $0 -o ~/openshift-docs imploded_assembly.txt # Override output directory + +NOTES: + - The script uses markers like "// BEGIN inlined:" and "// END inlined:" to identify content + - Source file path is extracted from metadata at the top of the imploded file + - Nested includes are handled recursively + - By default, creates .bak backups of all modified files +EOF + exit 0 + ;; + *) + args+=("$1") + shift + ;; + esac +done + +if [[ "${#args[@]}" -ne 1 ]]; then + echo "Error: Expected exactly one input file" + echo "Use -h or --help for usage information" + exit 1 +fi + +imploded_file="${args[0]}" + +if [[ ! -f "$imploded_file" ]]; then + echo "Error: File not found: $imploded_file" + exit 1 +fi + +# Extract metadata from imploded file +extract_metadata() { + local file="$1" + local source_file="" + + while IFS= read -r line; do + if [[ "$line" =~ ^//[[:space:]]*Source[[:space:]]*file:[[:space:]]*(.+)$ ]]; then + source_file="${BASH_REMATCH[1]}" + source_file=$(echo "$source_file" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + break + fi + done < "$file" + + echo "$source_file" +} + +# Extract the directory where original assembly lives +get_base_directory() { + local source_file="$1" + + if [[ -n "$output_base_dir" ]]; then + echo "$output_base_dir" + return + fi + + # Extract directory from source file path + local dir_path="${source_file%/*}" + + # Try to find the directory + if [[ -d "$dir_path" ]]; then + echo "$dir_path" + else + # Try to find openshift-docs directory as fallback + if [[ -d "$HOME/openshift-docs" ]]; then + echo "$HOME/openshift-docs" + else + echo "." + fi + fi +} + +# Extract content for a specific file from imploded content +# This handles nested BEGIN/END markers correctly +extract_file_content() { + local file_path="$1" + local input_content="$2" + local depth=0 + local capturing=false + local result="" + + while IFS= read -r line; do + if [[ "$line" =~ ^//[[:space:]]*BEGIN[[:space:]]*inlined:[[:space:]]*(.+)$ ]]; then + local marker_path="${BASH_REMATCH[1]}" + marker_path=$(echo "$marker_path" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + + if [[ "$marker_path" == "$file_path" ]]; then + if [[ $depth -eq 0 ]]; then + capturing=true + fi + ((depth++)) + elif [[ "$capturing" == true ]]; then + # Nested BEGIN marker + result+="$line"$'\n' + ((depth++)) + fi + elif [[ "$line" =~ ^//[[:space:]]*END[[:space:]]*inlined:[[:space:]]*(.+)$ ]]; then + local marker_path="${BASH_REMATCH[1]}" + marker_path=$(echo "$marker_path" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + + if [[ "$capturing" == true ]]; then + ((depth--)) + if [[ $depth -eq 0 ]]; then + capturing=false + break + else + # Nested END marker + result+="$line"$'\n' + fi + fi + elif [[ "$capturing" == true ]]; then + result+="$line"$'\n' + fi + done <<< "$input_content" + + printf '%s' "$result" +} + +# Find all unique module and snippet paths in the imploded file +find_all_included_files() { + local file="$1" + local files=() + + while IFS= read -r line; do + if [[ "$line" =~ ^//[[:space:]]*BEGIN[[:space:]]*inlined:[[:space:]]*(.+)$ ]]; then + local file_path="${BASH_REMATCH[1]}" + file_path=$(echo "$file_path" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + + # Only capture top-level modules and snippets + if [[ "$file_path" =~ ^(modules|snippets)/ ]]; then + files+=("$file_path") + fi + fi + done < "$file" + + # Remove duplicates while preserving order + printf '%s\n' "${files[@]}" | awk '!seen[$0]++' +} + +# Process the assembly file itself (extract content that's not in modules/snippets) +extract_assembly_content() { + local input_content="$1" + local result="" + local in_metadata=true + local skip_depth=0 + + while IFS= read -r line; do + # Skip initial comment block and metadata + if [[ "$in_metadata" == true ]]; then + if [[ "$line" =~ ^//[[:space:]]*Imploded[[:space:]]*on: ]] || \ + [[ "$line" =~ ^//[[:space:]]*Git[[:space:]]*branch: ]] || \ + [[ "$line" =~ ^//[[:space:]]*Source[[:space:]]*file: ]] || \ + [[ "$line" =~ ^//[[:space:]]*This[[:space:]]*file[[:space:]]*contains ]] || \ + [[ "$line" =~ ^//[[:space:]]*It[[:space:]]*is[[:space:]]*not ]] || \ + [[ "$line" =~ ^//[[:space:]]*and[[:space:]]*the[[:space:]]*full ]] || \ + [[ "$line" =~ ^//[[:space:]]*Includes[[:space:]]*that ]] || \ + [[ "$line" =~ ^//[[:space:]]*The[[:space:]]*purpose ]] || \ + [[ "$line" =~ ^//[[:space:]]*so[[:space:]]*that ]] || \ + [[ "$line" =~ ^//[[:space:]]*without[[:space:]]*needing ]] || \ + [[ -z "$line" ]]; then + continue + else + in_metadata=false + fi + fi + + # Track BEGIN/END markers to skip inlined content + if [[ "$line" =~ ^//[[:space:]]*BEGIN[[:space:]]*inlined:[[:space:]]*(modules|snippets)/ ]]; then + ((skip_depth++)) + elif [[ "$line" =~ ^//[[:space:]]*END[[:space:]]*inlined:[[:space:]]*(modules|snippets)/ ]]; then + ((skip_depth--)) + elif [[ $skip_depth -eq 0 ]]; then + # Only add lines that aren't inside a module/snippet block + result+="$line"$'\n' + fi + done <<< "$input_content" + + printf '%s' "$result" +} + +# Main processing +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Exploding assembly: $imploded_file" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +# Extract metadata +source_file=$(extract_metadata "$imploded_file") +if [[ -z "$source_file" ]]; then + echo "Error: Could not find source file metadata in imploded file" + exit 1 +fi + +echo "Source assembly: $source_file" + +# Determine base directory +base_dir=$(get_base_directory "$source_file") +echo "Base directory: $base_dir" + +if [[ "$dry_run" == true ]]; then + echo "DRY RUN MODE - No files will be modified" +fi + +if [[ "$create_backups" == false ]]; then + echo "Backups disabled" +fi + +echo "" + +# Read entire imploded file (preserving final newline) +# Note: Command substitution strips trailing newlines, so we add a sentinel +imploded_content=$(cat "$imploded_file"; echo x) +imploded_content="${imploded_content%x}" + +# Find all modules and snippets +echo "Finding included files..." +included_files=$(find_all_included_files "$imploded_file") + +if [[ -z "$included_files" ]]; then + echo "Warning: No modules or snippets found in imploded file" +else + echo "Found $(echo "$included_files" | wc -l) included file(s)" + echo "" +fi + +# Extract and write each module/snippet +file_count=0 +while IFS= read -r file_path; do + # Skip empty lines + [[ -n "$file_path" ]] || continue + + file_count=$((file_count + 1)) + + output_path="$base_dir/$file_path" + output_dir=$(dirname "$output_path") + + if [[ "$verbose" == true ]]; then + echo "Processing: $file_path" + fi + + # Extract content for this file + content=$(extract_file_content "$file_path" "$imploded_content") + + if [[ -z "$content" ]]; then + echo "Warning: No content extracted for $file_path" + continue + fi + + # Create directory if it doesn't exist + if [[ "$dry_run" == false ]]; then + mkdir -p "$output_dir" + fi + + # Create backup if file exists and backups are enabled + if [[ -f "$output_path" ]] && [[ "$create_backups" == true ]] && [[ "$dry_run" == false ]]; then + cp "$output_path" "${output_path}${backup_suffix}" + if [[ "$verbose" == true ]]; then + echo " Created backup: ${output_path}${backup_suffix}" + fi + fi + + # Write the file + if [[ "$dry_run" == false ]]; then + # Ensure content ends with a newline (POSIX requirement for text files) + if [[ -n "$content" && "${content: -1}" != $'\n' ]]; then + content+=$'\n' + fi + printf '%s' "$content" > "$output_path" + echo " ✓ Written: $output_path" + else + echo " [DRY RUN] Would write: $output_path" + fi + + if [[ "$verbose" == true ]]; then + echo " Content size: $(echo -n "$content" | wc -c) bytes, $(echo "$content" | wc -l) lines" + echo "" + fi +done <<< "$included_files" + +# Extract and write assembly file +echo "" +echo "Processing assembly file..." +assembly_content=$(extract_assembly_content "$imploded_content") + +# Handle assembly path correctly +if [[ "$source_file" == /* ]]; then + # source_file is absolute path + if [[ -n "$output_base_dir" ]]; then + # Extract relative path from source_file by removing the common prefix + # Find the openshift-docs (or similar) directory and use everything after it + relative_source=$(echo "$source_file" | sed "s|^$base_dir/||") + assembly_path="$base_dir/$relative_source" + else + assembly_path="$source_file" + fi +else + # source_file is relative path + assembly_path="$base_dir/$source_file" +fi + +assembly_dir=$(dirname "$assembly_path") + +if [[ "$dry_run" == false ]]; then + mkdir -p "$assembly_dir" +fi + +if [[ -f "$assembly_path" ]] && [[ "$create_backups" == true ]] && [[ "$dry_run" == false ]]; then + cp "$assembly_path" "${assembly_path}${backup_suffix}" + if [[ "$verbose" == true ]]; then + echo " Created backup: ${assembly_path}${backup_suffix}" + fi +fi + +if [[ "$dry_run" == false ]]; then + # Ensure content ends with a newline (POSIX requirement for text files) + if [[ -n "$assembly_content" && "${assembly_content: -1}" != $'\n' ]]; then + assembly_content+=$'\n' + fi + printf '%s' "$assembly_content" > "$assembly_path" + echo " ✓ Written: $assembly_path" +else + echo " [DRY RUN] Would write: $assembly_path" +fi + +echo "" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "Explosion complete!" +echo "Files processed: $file_count modules/snippets + 1 assembly" +if [[ "$dry_run" == false ]] && [[ "$create_backups" == true ]]; then + echo "Backups created with suffix: $backup_suffix" +fi +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" diff --git a/implode-assembly.sh b/implode-assembly.sh index eb239b9..26e03ea 100755 --- a/implode-assembly.sh +++ b/implode-assembly.sh @@ -4,15 +4,27 @@ output_root="$HOME/imploded_assemblies" mkdir -p "$output_root" suppress_output=false +file_list="" +combined_output="" declare -a args=() -for arg in "$@"; do - case "$arg" in +while [[ $# -gt 0 ]]; do + case "$1" in -q|--quiet) suppress_output=true + shift + ;; + -f|--file-list) + file_list="$2" + shift 2 + ;; + -c|--combined) + combined_output="$2" + shift 2 ;; *) - args+=("$arg") + args+=("$1") + shift ;; esac done @@ -75,14 +87,19 @@ implode_file() { local output_file="" local counter=1 - while true; do - output_file="$output_subdir/${base_name}_${git_branch}_v${counter}.txt" - [[ ! -e "$output_file" ]] && break - ((counter++)) - done + + # If combined mode is enabled, skip individual file creation + if [[ -z "$combined_output" ]]; then + while true; do + output_file="$output_subdir/${base_name}_${git_branch}_v${counter}.txt" + [[ ! -e "$output_file" ]] && break + ((counter++)) + done + fi echo "// Imploded on: $timestamp" >> "$temp_content" echo "// Git branch: $git_branch" >> "$temp_content" + echo "// Source file: $label" >> "$temp_content" echo "" >> "$temp_content" while IFS= read -r line || [[ -n "$line" ]]; do @@ -120,36 +137,109 @@ implode_file() { echo "" >> "$temp_final" cat "$temp_content" >> "$temp_final" - mv "$temp_final" "$output_file" - rm "$temp_content" + # If combined mode, append to combined file instead of creating individual file + if [[ -n "$combined_output" ]]; then + echo "" >> "$combined_output" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >> "$combined_output" + echo "// Assembly: $label" >> "$combined_output" + echo "// Imploded on: $timestamp" >> "$combined_output" + echo "// Git branch: $git_branch" >> "$combined_output" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >> "$combined_output" + echo "" >> "$combined_output" + cat "$temp_final" >> "$combined_output" + rm "$temp_content" "$temp_final" - if [[ "$suppress_output" == false ]]; then - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo " Output file: $output_file" - echo " Source: $label" - echo " Timestamp: $timestamp" - echo " Git branch: $git_branch" - echo " Modules: ${#included_modules[@]}" - for m in "${included_modules[@]}"; do echo " • $m"; done - echo " Snippets: ${#included_snippets[@]}" - for s in "${included_snippets[@]}"; do echo " • $s"; done - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo "" + if [[ "$suppress_output" == false ]]; then + echo "Appended: $label" + fi else - if [[ "$quiet_header_shown" != true ]]; then - echo "Generated files:" - quiet_header_shown=true + mv "$temp_final" "$output_file" + rm "$temp_content" + + if [[ "$suppress_output" == false ]]; then + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo " Output file: $output_file" + echo " Source: $label" + echo " Timestamp: $timestamp" + echo " Git branch: $git_branch" + echo " Modules: ${#included_modules[@]}" + for m in "${included_modules[@]}"; do echo " • $m"; done + echo " Snippets: ${#included_snippets[@]}" + for s in "${included_snippets[@]}"; do echo " • $s"; done + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + else + if [[ "$quiet_header_shown" != true ]]; then + echo "Generated files:" + quiet_header_shown=true + fi + echo "$output_file" fi - echo "$output_file" fi } # MAIN -if [[ "${#args[@]}" -eq 0 ]]; then - echo "Usage: $0 [--quiet|-q] [more_files_or_dirs...]" +if [[ "${#args[@]}" -eq 0 && -z "$file_list" ]]; then + echo "Usage: $0 [--quiet|-q] [--file-list|-f ] [--combined|-c ] [more_files_or_dirs...]" + echo "" + echo "Options:" + echo " -q, --quiet Suppress detailed output, show only file paths" + echo " -f, --file-list Read .adoc file paths from a text file (one per line)" + echo " -c, --combined Combine all outputs into a single file instead of individual files" + echo "" + echo "Examples:" + echo " $0 assembly.adoc" + echo " $0 -f filelist.txt" + echo " $0 -q -f filelist.txt" + echo " $0 -c combined.txt -f filelist.txt" + echo " $0 --combined all_assemblies.txt file1.adoc file2.adoc directory/" exit 1 fi +# If combined mode is enabled, initialize the combined file +if [[ -n "$combined_output" ]]; then + # Create the combined output file with header + timestamp="$(date '+%Y-%m-%d %H:%M:%S')" + echo "// Combined Imploded Assemblies" > "$combined_output" + echo "// Generated on: $timestamp" >> "$combined_output" + echo "// This file contains multiple imploded OpenShift documentation assemblies" >> "$combined_output" + echo "" >> "$combined_output" + echo "$ai_comment_block" >> "$combined_output" + + if [[ "$suppress_output" == false ]]; then + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo " Combined mode enabled" + echo " Output file: $combined_output" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo "" + fi +fi + +# Process files from file list if provided +if [[ -n "$file_list" ]]; then + if [[ ! -f "$file_list" ]]; then + echo "Error: File list '$file_list' not found" + exit 1 + fi + + while IFS= read -r line || [[ -n "$line" ]]; do + # Skip empty lines and comments + [[ -z "$line" || "$line" =~ ^[[:space:]]*# ]] && continue + + # Trim whitespace + file=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + [[ -z "$file" ]] && continue + + if [[ -f "$file" && "$file" == *.adoc ]]; then + abs_path="$(cd "$(dirname "$file")" && pwd)/$(basename "$file")" + implode_file "$abs_path" "$file" + else + echo "Warning: Skipping invalid or non-existent file from list: $file" + fi + done < "$file_list" +fi + +# Process files from command-line arguments for arg in "${args[@]}"; do if [[ -f "$arg" && "$arg" == *.adoc ]]; then abs_path="$(cd "$(dirname "$arg")" && pwd)/$(basename "$arg")" @@ -164,3 +254,14 @@ for arg in "${args[@]}"; do fi done + +# If combined mode is enabled, print final summary +if [[ -n "$combined_output" ]]; then + if [[ "$suppress_output" == false ]]; then + echo "" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + echo " Combined file created successfully" + echo " Output file: $combined_output" + echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + fi +fi