From 9475a60dcce15ab64556ff33c0b03c732b0cffa1 Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Tue, 9 Sep 2025 11:46:35 +0000
Subject: [PATCH 01/10] Test BLHawk workflow on test branch
---
.github/workflows/blhawk-ci.yml | 25 +++++++++++++++++++++++++
blhawk.py | 11 +++++++++--
modules/scan.py | 21 ++++++++++++++++++++-
test_sources/vuln_html.html | 3 +++
test_sources/vuln_js.js | 3 +++
test_sources/vuln_md.md | 4 ++++
test_sources/vuln_python.py | 5 +++++
7 files changed, 69 insertions(+), 3 deletions(-)
create mode 100644 .github/workflows/blhawk-ci.yml
create mode 100644 test_sources/vuln_html.html
create mode 100644 test_sources/vuln_js.js
create mode 100644 test_sources/vuln_md.md
create mode 100644 test_sources/vuln_python.py
diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml
new file mode 100644
index 0000000..8a84bfa
--- /dev/null
+++ b/.github/workflows/blhawk-ci.yml
@@ -0,0 +1,25 @@
+# Sample GitHub Actions workflow for BLHawk CI integration
+name: BLHawk Scan
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+
+jobs:
+ scan:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v3
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
+ - name: Install dependencies
+ run: |
+ pip install -r requirements.txt
+ - name: Run BLHawk source scan
+ run: |
+ python blhawk.py --src ./modules
diff --git a/blhawk.py b/blhawk.py
index b7e191f..5c906dd 100644
--- a/blhawk.py
+++ b/blhawk.py
@@ -4,6 +4,7 @@
def main():
parser = argparse.ArgumentParser(prog='BLHawk', description='Dead links aren\'t always dead!', epilog='version: 0.3.0')
parser.add_argument('-u', '--url', type=str, help='example: https://www.target.com')
+ parser.add_argument('--src', type=str, help='Path to source code directory for scan')
#parser.add_argument('-l','--list', type=str, help='File containing URLs to check')
#parser.add_argument('-t', '--thread', type=int, default=10, help='Number of threads to use (default: 10)')
#parser.add_argument('-s', '--silent', help='show only result in output')
@@ -11,14 +12,20 @@ def main():
args = parser.parse_args()
try:
- inputLoader(
+ if args.url:
+ inputLoader(
url=args.url,
#raw_request=args.list,
#cookie=args.filename,
#thread=args.thread,
#silent=args.silent,
)
-
+ elif args.src:
+ from modules.scan import scan_source
+ scan_source(args.src)
+ else:
+ print("[!] Please provide either --url or --src argument.")
+ exit(1)
except KeyboardInterrupt:
print("\n[!] Scan interrupted by user (Ctrl+C). Exiting...")
diff --git a/modules/scan.py b/modules/scan.py
index 27c6672..839a696 100644
--- a/modules/scan.py
+++ b/modules/scan.py
@@ -1,4 +1,4 @@
-import requests
+import requests, os, re
from colorama import Fore, Style, init
from urllib.parse import urlparse
@@ -60,6 +60,25 @@ def get_service_by_host(host):
return service_name, service_info
return None, None
+def scan_source(path):
+ """
+ Scan source code files in the given directory for URLs and check their vulnerability.
+ """
+ url_pattern = re.compile(r'https?://[\w\.-]+(?:/[\w\./\-\?&%#=]*)?')
+ for root, dirs, files in os.walk(path):
+ for file in files:
+ file_path = os.path.join(root, file)
+ try:
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+ content = f.read()
+ urls = url_pattern.findall(content)
+ for url in set(urls):
+ print(f"[SCAN] {file_path}: {url}")
+ check_vulnerability(url)
+ except Exception as e:
+ print(f"[ERROR] Could not read {file_path}: {e}")
+
+
def check_vulnerability(url):
parsed = urlparse(url)
host = parsed.netloc
diff --git a/test_sources/vuln_html.html b/test_sources/vuln_html.html
new file mode 100644
index 0000000..b2a6acd
--- /dev/null
+++ b/test_sources/vuln_html.html
@@ -0,0 +1,3 @@
+
+SoundCloud
+BuyMeACoffee
diff --git a/test_sources/vuln_js.js b/test_sources/vuln_js.js
new file mode 100644
index 0000000..0ca8740
--- /dev/null
+++ b/test_sources/vuln_js.js
@@ -0,0 +1,3 @@
+// نمونه فایل آسیبپذیر جاوااسکریپت
+const link1 = "https://www.npmjs.com/package/nonexistent-package";
+const link2 = "https://play.google.com/store/apps/details?id=fake.app";
diff --git a/test_sources/vuln_md.md b/test_sources/vuln_md.md
new file mode 100644
index 0000000..89bfd2e
--- /dev/null
+++ b/test_sources/vuln_md.md
@@ -0,0 +1,4 @@
+# تست آسیبپذیری لینکها
+
+[PyPI](https://pypi.org/project/nonexistent-package)
+[Dribbble](https://dribbble.com/nonexistentprofile)
diff --git a/test_sources/vuln_python.py b/test_sources/vuln_python.py
new file mode 100644
index 0000000..1c3410b
--- /dev/null
+++ b/test_sources/vuln_python.py
@@ -0,0 +1,5 @@
+# نمونه فایل آسیبپذیر پایتون
+# لینکهای آسیبپذیر
+url1 = "https://t.me/deadlink"
+url2 = "https://github.com/nonexistentuser/nonexistentrepo"
+url3 = "https://medium.com/@deleteduser"
From 6a786283070c15b2a354cf67665d04894843a691 Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Tue, 9 Sep 2025 11:57:16 +0000
Subject: [PATCH 02/10] fix: update source directory for BLHawk scan to current
directory
---
.github/workflows/blhawk-ci.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml
index 8a84bfa..c444d86 100644
--- a/.github/workflows/blhawk-ci.yml
+++ b/.github/workflows/blhawk-ci.yml
@@ -22,4 +22,4 @@ jobs:
pip install -r requirements.txt
- name: Run BLHawk source scan
run: |
- python blhawk.py --src ./modules
+ python blhawk.py --src .
From 8c0e3b08901052660f3e7519a112038cafbc06af Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Tue, 9 Sep 2025 11:59:55 +0000
Subject: [PATCH 03/10] feat: add initial vulnerable HTML file for testing
BLHawk
---
test_sources/vuln_test.html | 10 ++++++++++
1 file changed, 10 insertions(+)
create mode 100644 test_sources/vuln_test.html
diff --git a/test_sources/vuln_test.html b/test_sources/vuln_test.html
new file mode 100644
index 0000000..12ae192
--- /dev/null
+++ b/test_sources/vuln_test.html
@@ -0,0 +1,10 @@
+
+
+
Test Vulnerable HTML
+
+ CafeBazaar
+ Myket
+ GitHub
+ Telegram
+
+
From 97b5f9a10d03fc47208bf584a2aa7aa142f270e5 Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Tue, 9 Sep 2025 12:05:15 +0000
Subject: [PATCH 04/10] feat: add vulnerable HTML file for testing purposes
---
test_sources/vuln_html2.html | 8 ++++++++
1 file changed, 8 insertions(+)
create mode 100644 test_sources/vuln_html2.html
diff --git a/test_sources/vuln_html2.html b/test_sources/vuln_html2.html
new file mode 100644
index 0000000..22cab0c
--- /dev/null
+++ b/test_sources/vuln_html2.html
@@ -0,0 +1,8 @@
+
+ Test Vulnerable HTML
+
+ CafeBazaar
+ Myket
+ sdfsdfsdf
+
+
\ No newline at end of file
From f0342dfc594009a74df9360b760aaf3f4d2d5360 Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Tue, 9 Sep 2025 12:09:33 +0000
Subject: [PATCH 05/10] fix: update trigger branches for BLHawk CI workflow to
test-action
---
.github/workflows/blhawk-ci.yml | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml
index c444d86..5cd6731 100644
--- a/.github/workflows/blhawk-ci.yml
+++ b/.github/workflows/blhawk-ci.yml
@@ -3,9 +3,7 @@ name: BLHawk Scan
on:
push:
- branches: [ main ]
- pull_request:
- branches: [ main ]
+ branches: [ test-action ]
jobs:
scan:
From 0e521aa59e251b8a17aeb4f0f82886374b770edd Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Tue, 9 Sep 2025 12:34:33 +0000
Subject: [PATCH 06/10] fix: update source directory for BLHawk scan and add
vulnerable HTML file for testing
---
.github/workflows/blhawk-ci.yml | 2 +-
test_sources/vuln_html3.html | 8 ++++++++
2 files changed, 9 insertions(+), 1 deletion(-)
create mode 100644 test_sources/vuln_html3.html
diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml
index 5cd6731..50d1e9d 100644
--- a/.github/workflows/blhawk-ci.yml
+++ b/.github/workflows/blhawk-ci.yml
@@ -20,4 +20,4 @@ jobs:
pip install -r requirements.txt
- name: Run BLHawk source scan
run: |
- python blhawk.py --src .
+ python blhawk.py --src ./test_sources
\ No newline at end of file
diff --git a/test_sources/vuln_html3.html b/test_sources/vuln_html3.html
new file mode 100644
index 0000000..22cab0c
--- /dev/null
+++ b/test_sources/vuln_html3.html
@@ -0,0 +1,8 @@
+
+ Test Vulnerable HTML
+
+ CafeBazaar
+ Myket
+ sdfsdfsdf
+
+
\ No newline at end of file
From 85a29b00705140a3ed37b90203a85ad7de1ff606 Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Tue, 9 Sep 2025 15:33:14 +0000
Subject: [PATCH 07/10] chore: remove obsolete vulnerable test files from the
repository
---
test_sources/vuln_html.html | 3 ---
test_sources/vuln_html2.html | 8 --------
test_sources/vuln_js.js | 3 ---
test_sources/vuln_md.md | 4 ----
test_sources/vuln_python.py | 5 -----
test_sources/vuln_test.html | 10 ----------
6 files changed, 33 deletions(-)
delete mode 100644 test_sources/vuln_html.html
delete mode 100644 test_sources/vuln_html2.html
delete mode 100644 test_sources/vuln_js.js
delete mode 100644 test_sources/vuln_md.md
delete mode 100644 test_sources/vuln_python.py
delete mode 100644 test_sources/vuln_test.html
diff --git a/test_sources/vuln_html.html b/test_sources/vuln_html.html
deleted file mode 100644
index b2a6acd..0000000
--- a/test_sources/vuln_html.html
+++ /dev/null
@@ -1,3 +0,0 @@
-
-SoundCloud
-BuyMeACoffee
diff --git a/test_sources/vuln_html2.html b/test_sources/vuln_html2.html
deleted file mode 100644
index 22cab0c..0000000
--- a/test_sources/vuln_html2.html
+++ /dev/null
@@ -1,8 +0,0 @@
-
- Test Vulnerable HTML
-
- CafeBazaar
- Myket
- sdfsdfsdf
-
-
\ No newline at end of file
diff --git a/test_sources/vuln_js.js b/test_sources/vuln_js.js
deleted file mode 100644
index 0ca8740..0000000
--- a/test_sources/vuln_js.js
+++ /dev/null
@@ -1,3 +0,0 @@
-// نمونه فایل آسیبپذیر جاوااسکریپت
-const link1 = "https://www.npmjs.com/package/nonexistent-package";
-const link2 = "https://play.google.com/store/apps/details?id=fake.app";
diff --git a/test_sources/vuln_md.md b/test_sources/vuln_md.md
deleted file mode 100644
index 89bfd2e..0000000
--- a/test_sources/vuln_md.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# تست آسیبپذیری لینکها
-
-[PyPI](https://pypi.org/project/nonexistent-package)
-[Dribbble](https://dribbble.com/nonexistentprofile)
diff --git a/test_sources/vuln_python.py b/test_sources/vuln_python.py
deleted file mode 100644
index 1c3410b..0000000
--- a/test_sources/vuln_python.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# نمونه فایل آسیبپذیر پایتون
-# لینکهای آسیبپذیر
-url1 = "https://t.me/deadlink"
-url2 = "https://github.com/nonexistentuser/nonexistentrepo"
-url3 = "https://medium.com/@deleteduser"
diff --git a/test_sources/vuln_test.html b/test_sources/vuln_test.html
deleted file mode 100644
index 12ae192..0000000
--- a/test_sources/vuln_test.html
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
- Test Vulnerable HTML
-
- CafeBazaar
- Myket
- GitHub
- Telegram
-
-
From 1230f4a59a2adfc658bf5477720a1b1f430c337c Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Tue, 9 Sep 2025 15:39:06 +0000
Subject: [PATCH 08/10] feat: add new vulnerable HTML file for testing purposes
---
test_sources/vh-v4.html | 7 +++++++
1 file changed, 7 insertions(+)
create mode 100644 test_sources/vh-v4.html
diff --git a/test_sources/vh-v4.html b/test_sources/vh-v4.html
new file mode 100644
index 0000000..f72593b
--- /dev/null
+++ b/test_sources/vh-v4.html
@@ -0,0 +1,7 @@
+
+ Test Vulnerable HTML
+
+ sdfsdfsdf
+ GitHub
+
+
\ No newline at end of file
From 4fd726c418a75415184a7f044da72170d751a247 Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Thu, 11 Sep 2025 12:03:16 +0000
Subject: [PATCH 09/10] feat: implement broken link scan functionality in CI
workflow
---
.github/scripts/broken_link_commit_check.py | 38 +++++++++++++++++++++
.github/workflows/blhawk-ci.yml | 38 +++++++++++++++------
test_sources/vuln_html.html | 0
test_sources/vuln_js.js | 0
test_sources/vuln_md.md | 0
test_sources/vuln_python.py | 0
test_sources/vuln_test.html | 38 +++++++++++++++++++++
7 files changed, 104 insertions(+), 10 deletions(-)
create mode 100644 .github/scripts/broken_link_commit_check.py
create mode 100644 test_sources/vuln_html.html
create mode 100644 test_sources/vuln_js.js
create mode 100644 test_sources/vuln_md.md
create mode 100644 test_sources/vuln_python.py
create mode 100644 test_sources/vuln_test.html
diff --git a/.github/scripts/broken_link_commit_check.py b/.github/scripts/broken_link_commit_check.py
new file mode 100644
index 0000000..22cfbe9
--- /dev/null
+++ b/.github/scripts/broken_link_commit_check.py
@@ -0,0 +1,38 @@
+import requests
+import re
+from pathlib import Path
+
+# Simple regex to match URLs
+url_pattern = r'https?://[^\s"\']+'
+
+# Only scan text-based files
+TEXT_EXTENSIONS = [".js", ".html", ".md", ".txt"]
+files_to_scan = [f for f in Path(".").rglob("*.*") if f.suffix in TEXT_EXTENSIONS]
+
+broken_links = []
+
+def check_link(link, file_path):
+ try:
+ r = requests.get(link, timeout=5, allow_redirects=True)
+ if r.status_code == 404:
+ broken_links.append(f"{file_path}: {link}")
+ except Exception:
+ broken_links.append(f"{file_path}: {link} (error)")
+
+# Iterate through all selected files
+for file in files_to_scan:
+ try:
+ with open(file, "r", encoding="utf-8") as f:
+ for line in f:
+ for link in re.findall(url_pattern, line):
+ check_link(link, file)
+ except Exception:
+ continue
+
+# Write broken links report to a temporary file
+with open("broken_links_report.txt", "w", encoding="utf-8") as f:
+ if broken_links:
+ for link in broken_links:
+ f.write(link + "\n")
+ else:
+ f.write("No broken links found.\n")
diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml
index 50d1e9d..1df6059 100644
--- a/.github/workflows/blhawk-ci.yml
+++ b/.github/workflows/blhawk-ci.yml
@@ -1,23 +1,41 @@
-# Sample GitHub Actions workflow for BLHawk CI integration
-name: BLHawk Scan
+name: Broken Link Scan on Commit
on:
push:
- branches: [ test-action ]
+ branches:
+ - test-action
jobs:
- scan:
+ broken-link-check:
runs-on: ubuntu-latest
+
steps:
- - name: Checkout code
+ - name: Checkout repository
uses: actions/checkout@v3
+
- name: Set up Python
uses: actions/setup-python@v4
with:
- python-version: '3.10'
+ python-version: '3.11'
+
- name: Install dependencies
+ run: pip install requests
+
+ - name: Run Broken Link Scan
+ id: scan
run: |
- pip install -r requirements.txt
- - name: Run BLHawk source scan
- run: |
- python blhawk.py --src ./test_sources
\ No newline at end of file
+ python .github/scripts/broken_link_commit_check.py
+ echo "links_report<> $GITHUB_OUTPUT
+ cat broken_links_report.txt >> $GITHUB_OUTPUT
+ echo "EOF" >> $GITHUB_OUTPUT
+
+ - name: Create Commit Check
+ uses: peter-evans/create-or-update-check@v2
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ name: "Broken Link Scan"
+ head-sha: ${{ github.sha }}
+ status: completed
+ conclusion: neutral
+ output-title: "Broken Link Scan Report"
+ output-summary: ${{ steps.scan.outputs.links_report }}
diff --git a/test_sources/vuln_html.html b/test_sources/vuln_html.html
new file mode 100644
index 0000000..e69de29
diff --git a/test_sources/vuln_js.js b/test_sources/vuln_js.js
new file mode 100644
index 0000000..e69de29
diff --git a/test_sources/vuln_md.md b/test_sources/vuln_md.md
new file mode 100644
index 0000000..e69de29
diff --git a/test_sources/vuln_python.py b/test_sources/vuln_python.py
new file mode 100644
index 0000000..e69de29
diff --git a/test_sources/vuln_test.html b/test_sources/vuln_test.html
new file mode 100644
index 0000000..22cfbe9
--- /dev/null
+++ b/test_sources/vuln_test.html
@@ -0,0 +1,38 @@
+import requests
+import re
+from pathlib import Path
+
+# Simple regex to match URLs
+url_pattern = r'https?://[^\s"\']+'
+
+# Only scan text-based files
+TEXT_EXTENSIONS = [".js", ".html", ".md", ".txt"]
+files_to_scan = [f for f in Path(".").rglob("*.*") if f.suffix in TEXT_EXTENSIONS]
+
+broken_links = []
+
+def check_link(link, file_path):
+ try:
+ r = requests.get(link, timeout=5, allow_redirects=True)
+ if r.status_code == 404:
+ broken_links.append(f"{file_path}: {link}")
+ except Exception:
+ broken_links.append(f"{file_path}: {link} (error)")
+
+# Iterate through all selected files
+for file in files_to_scan:
+ try:
+ with open(file, "r", encoding="utf-8") as f:
+ for line in f:
+ for link in re.findall(url_pattern, line):
+ check_link(link, file)
+ except Exception:
+ continue
+
+# Write broken links report to a temporary file
+with open("broken_links_report.txt", "w", encoding="utf-8") as f:
+ if broken_links:
+ for link in broken_links:
+ f.write(link + "\n")
+ else:
+ f.write("No broken links found.\n")
From c14145acbf26d2c8a2336e6c588835a0609d4323 Mon Sep 17 00:00:00 2001
From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com>
Date: Thu, 11 Sep 2025 12:20:35 +0000
Subject: [PATCH 10/10] update
---
.github/workflows/blhawk-ci.yml | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml
index 1df6059..2efe3e1 100644
--- a/.github/workflows/blhawk-ci.yml
+++ b/.github/workflows/blhawk-ci.yml
@@ -1,9 +1,9 @@
-name: Broken Link Scan on Commit
+name: Broken Link Scan on PR
on:
- push:
+ pull_request:
branches:
- - test-action
+ - test-action # only run on 'test-action' branch
jobs:
broken-link-check:
@@ -29,13 +29,13 @@ jobs:
cat broken_links_report.txt >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- - name: Create Commit Check
- uses: peter-evans/create-or-update-check@v2
+ - name: Create or Update Comment
+ uses: peter-evans/create-or-update-comment@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: "Broken Link Scan"
- head-sha: ${{ github.sha }}
- status: completed
- conclusion: neutral
- output-title: "Broken Link Scan Report"
- output-summary: ${{ steps.scan.outputs.links_report }}
+ issue-number: ${{ github.event.pull_request.number }}
+ body: |
+ **Broken Link Scan Report**
+
+ ${{ steps.scan.outputs.links_report }}
+ edit-mode: replace