From 9475a60dcce15ab64556ff33c0b03c732b0cffa1 Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Tue, 9 Sep 2025 11:46:35 +0000 Subject: [PATCH 01/10] Test BLHawk workflow on test branch --- .github/workflows/blhawk-ci.yml | 25 +++++++++++++++++++++++++ blhawk.py | 11 +++++++++-- modules/scan.py | 21 ++++++++++++++++++++- test_sources/vuln_html.html | 3 +++ test_sources/vuln_js.js | 3 +++ test_sources/vuln_md.md | 4 ++++ test_sources/vuln_python.py | 5 +++++ 7 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/blhawk-ci.yml create mode 100644 test_sources/vuln_html.html create mode 100644 test_sources/vuln_js.js create mode 100644 test_sources/vuln_md.md create mode 100644 test_sources/vuln_python.py diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml new file mode 100644 index 0000000..8a84bfa --- /dev/null +++ b/.github/workflows/blhawk-ci.yml @@ -0,0 +1,25 @@ +# Sample GitHub Actions workflow for BLHawk CI integration +name: BLHawk Scan + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + scan: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dependencies + run: | + pip install -r requirements.txt + - name: Run BLHawk source scan + run: | + python blhawk.py --src ./modules diff --git a/blhawk.py b/blhawk.py index b7e191f..5c906dd 100644 --- a/blhawk.py +++ b/blhawk.py @@ -4,6 +4,7 @@ def main(): parser = argparse.ArgumentParser(prog='BLHawk', description='Dead links aren\'t always dead!', epilog='version: 0.3.0') parser.add_argument('-u', '--url', type=str, help='example: https://www.target.com') + parser.add_argument('--src', type=str, help='Path to source code directory for scan') #parser.add_argument('-l','--list', type=str, help='File containing URLs to check') #parser.add_argument('-t', '--thread', type=int, default=10, help='Number of threads to use (default: 10)') #parser.add_argument('-s', '--silent', help='show only result in output') @@ -11,14 +12,20 @@ def main(): args = parser.parse_args() try: - inputLoader( + if args.url: + inputLoader( url=args.url, #raw_request=args.list, #cookie=args.filename, #thread=args.thread, #silent=args.silent, ) - + elif args.src: + from modules.scan import scan_source + scan_source(args.src) + else: + print("[!] Please provide either --url or --src argument.") + exit(1) except KeyboardInterrupt: print("\n[!] Scan interrupted by user (Ctrl+C). Exiting...") diff --git a/modules/scan.py b/modules/scan.py index 27c6672..839a696 100644 --- a/modules/scan.py +++ b/modules/scan.py @@ -1,4 +1,4 @@ -import requests +import requests, os, re from colorama import Fore, Style, init from urllib.parse import urlparse @@ -60,6 +60,25 @@ def get_service_by_host(host): return service_name, service_info return None, None +def scan_source(path): + """ + Scan source code files in the given directory for URLs and check their vulnerability. + """ + url_pattern = re.compile(r'https?://[\w\.-]+(?:/[\w\./\-\?&%#=]*)?') + for root, dirs, files in os.walk(path): + for file in files: + file_path = os.path.join(root, file) + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + urls = url_pattern.findall(content) + for url in set(urls): + print(f"[SCAN] {file_path}: {url}") + check_vulnerability(url) + except Exception as e: + print(f"[ERROR] Could not read {file_path}: {e}") + + def check_vulnerability(url): parsed = urlparse(url) host = parsed.netloc diff --git a/test_sources/vuln_html.html b/test_sources/vuln_html.html new file mode 100644 index 0000000..b2a6acd --- /dev/null +++ b/test_sources/vuln_html.html @@ -0,0 +1,3 @@ + +SoundCloud +BuyMeACoffee diff --git a/test_sources/vuln_js.js b/test_sources/vuln_js.js new file mode 100644 index 0000000..0ca8740 --- /dev/null +++ b/test_sources/vuln_js.js @@ -0,0 +1,3 @@ +// نمونه فایل آسیب‌پذیر جاوااسکریپت +const link1 = "https://www.npmjs.com/package/nonexistent-package"; +const link2 = "https://play.google.com/store/apps/details?id=fake.app"; diff --git a/test_sources/vuln_md.md b/test_sources/vuln_md.md new file mode 100644 index 0000000..89bfd2e --- /dev/null +++ b/test_sources/vuln_md.md @@ -0,0 +1,4 @@ +# تست آسیب‌پذیری لینک‌ها + +[PyPI](https://pypi.org/project/nonexistent-package) +[Dribbble](https://dribbble.com/nonexistentprofile) diff --git a/test_sources/vuln_python.py b/test_sources/vuln_python.py new file mode 100644 index 0000000..1c3410b --- /dev/null +++ b/test_sources/vuln_python.py @@ -0,0 +1,5 @@ +# نمونه فایل آسیب‌پذیر پایتون +# لینک‌های آسیب‌پذیر +url1 = "https://t.me/deadlink" +url2 = "https://github.com/nonexistentuser/nonexistentrepo" +url3 = "https://medium.com/@deleteduser" From 6a786283070c15b2a354cf67665d04894843a691 Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Tue, 9 Sep 2025 11:57:16 +0000 Subject: [PATCH 02/10] fix: update source directory for BLHawk scan to current directory --- .github/workflows/blhawk-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml index 8a84bfa..c444d86 100644 --- a/.github/workflows/blhawk-ci.yml +++ b/.github/workflows/blhawk-ci.yml @@ -22,4 +22,4 @@ jobs: pip install -r requirements.txt - name: Run BLHawk source scan run: | - python blhawk.py --src ./modules + python blhawk.py --src . From 8c0e3b08901052660f3e7519a112038cafbc06af Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Tue, 9 Sep 2025 11:59:55 +0000 Subject: [PATCH 03/10] feat: add initial vulnerable HTML file for testing BLHawk --- test_sources/vuln_test.html | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 test_sources/vuln_test.html diff --git a/test_sources/vuln_test.html b/test_sources/vuln_test.html new file mode 100644 index 0000000..12ae192 --- /dev/null +++ b/test_sources/vuln_test.html @@ -0,0 +1,10 @@ + + + Test Vulnerable HTML + + CafeBazaar + Myket + GitHub + Telegram + + From 97b5f9a10d03fc47208bf584a2aa7aa142f270e5 Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Tue, 9 Sep 2025 12:05:15 +0000 Subject: [PATCH 04/10] feat: add vulnerable HTML file for testing purposes --- test_sources/vuln_html2.html | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 test_sources/vuln_html2.html diff --git a/test_sources/vuln_html2.html b/test_sources/vuln_html2.html new file mode 100644 index 0000000..22cab0c --- /dev/null +++ b/test_sources/vuln_html2.html @@ -0,0 +1,8 @@ + + Test Vulnerable HTML + + CafeBazaar + Myket + sdfsdfsdf + + \ No newline at end of file From f0342dfc594009a74df9360b760aaf3f4d2d5360 Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Tue, 9 Sep 2025 12:09:33 +0000 Subject: [PATCH 05/10] fix: update trigger branches for BLHawk CI workflow to test-action --- .github/workflows/blhawk-ci.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml index c444d86..5cd6731 100644 --- a/.github/workflows/blhawk-ci.yml +++ b/.github/workflows/blhawk-ci.yml @@ -3,9 +3,7 @@ name: BLHawk Scan on: push: - branches: [ main ] - pull_request: - branches: [ main ] + branches: [ test-action ] jobs: scan: From 0e521aa59e251b8a17aeb4f0f82886374b770edd Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Tue, 9 Sep 2025 12:34:33 +0000 Subject: [PATCH 06/10] fix: update source directory for BLHawk scan and add vulnerable HTML file for testing --- .github/workflows/blhawk-ci.yml | 2 +- test_sources/vuln_html3.html | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 test_sources/vuln_html3.html diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml index 5cd6731..50d1e9d 100644 --- a/.github/workflows/blhawk-ci.yml +++ b/.github/workflows/blhawk-ci.yml @@ -20,4 +20,4 @@ jobs: pip install -r requirements.txt - name: Run BLHawk source scan run: | - python blhawk.py --src . + python blhawk.py --src ./test_sources \ No newline at end of file diff --git a/test_sources/vuln_html3.html b/test_sources/vuln_html3.html new file mode 100644 index 0000000..22cab0c --- /dev/null +++ b/test_sources/vuln_html3.html @@ -0,0 +1,8 @@ + + Test Vulnerable HTML + + CafeBazaar + Myket + sdfsdfsdf + + \ No newline at end of file From 85a29b00705140a3ed37b90203a85ad7de1ff606 Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Tue, 9 Sep 2025 15:33:14 +0000 Subject: [PATCH 07/10] chore: remove obsolete vulnerable test files from the repository --- test_sources/vuln_html.html | 3 --- test_sources/vuln_html2.html | 8 -------- test_sources/vuln_js.js | 3 --- test_sources/vuln_md.md | 4 ---- test_sources/vuln_python.py | 5 ----- test_sources/vuln_test.html | 10 ---------- 6 files changed, 33 deletions(-) delete mode 100644 test_sources/vuln_html.html delete mode 100644 test_sources/vuln_html2.html delete mode 100644 test_sources/vuln_js.js delete mode 100644 test_sources/vuln_md.md delete mode 100644 test_sources/vuln_python.py delete mode 100644 test_sources/vuln_test.html diff --git a/test_sources/vuln_html.html b/test_sources/vuln_html.html deleted file mode 100644 index b2a6acd..0000000 --- a/test_sources/vuln_html.html +++ /dev/null @@ -1,3 +0,0 @@ - -SoundCloud -BuyMeACoffee diff --git a/test_sources/vuln_html2.html b/test_sources/vuln_html2.html deleted file mode 100644 index 22cab0c..0000000 --- a/test_sources/vuln_html2.html +++ /dev/null @@ -1,8 +0,0 @@ - - Test Vulnerable HTML - - CafeBazaar - Myket - sdfsdfsdf - - \ No newline at end of file diff --git a/test_sources/vuln_js.js b/test_sources/vuln_js.js deleted file mode 100644 index 0ca8740..0000000 --- a/test_sources/vuln_js.js +++ /dev/null @@ -1,3 +0,0 @@ -// نمونه فایل آسیب‌پذیر جاوااسکریپت -const link1 = "https://www.npmjs.com/package/nonexistent-package"; -const link2 = "https://play.google.com/store/apps/details?id=fake.app"; diff --git a/test_sources/vuln_md.md b/test_sources/vuln_md.md deleted file mode 100644 index 89bfd2e..0000000 --- a/test_sources/vuln_md.md +++ /dev/null @@ -1,4 +0,0 @@ -# تست آسیب‌پذیری لینک‌ها - -[PyPI](https://pypi.org/project/nonexistent-package) -[Dribbble](https://dribbble.com/nonexistentprofile) diff --git a/test_sources/vuln_python.py b/test_sources/vuln_python.py deleted file mode 100644 index 1c3410b..0000000 --- a/test_sources/vuln_python.py +++ /dev/null @@ -1,5 +0,0 @@ -# نمونه فایل آسیب‌پذیر پایتون -# لینک‌های آسیب‌پذیر -url1 = "https://t.me/deadlink" -url2 = "https://github.com/nonexistentuser/nonexistentrepo" -url3 = "https://medium.com/@deleteduser" diff --git a/test_sources/vuln_test.html b/test_sources/vuln_test.html deleted file mode 100644 index 12ae192..0000000 --- a/test_sources/vuln_test.html +++ /dev/null @@ -1,10 +0,0 @@ - - - Test Vulnerable HTML - - CafeBazaar - Myket - GitHub - Telegram - - From 1230f4a59a2adfc658bf5477720a1b1f430c337c Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Tue, 9 Sep 2025 15:39:06 +0000 Subject: [PATCH 08/10] feat: add new vulnerable HTML file for testing purposes --- test_sources/vh-v4.html | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 test_sources/vh-v4.html diff --git a/test_sources/vh-v4.html b/test_sources/vh-v4.html new file mode 100644 index 0000000..f72593b --- /dev/null +++ b/test_sources/vh-v4.html @@ -0,0 +1,7 @@ + + Test Vulnerable HTML + + sdfsdfsdf + GitHub + + \ No newline at end of file From 4fd726c418a75415184a7f044da72170d751a247 Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Thu, 11 Sep 2025 12:03:16 +0000 Subject: [PATCH 09/10] feat: implement broken link scan functionality in CI workflow --- .github/scripts/broken_link_commit_check.py | 38 +++++++++++++++++++++ .github/workflows/blhawk-ci.yml | 38 +++++++++++++++------ test_sources/vuln_html.html | 0 test_sources/vuln_js.js | 0 test_sources/vuln_md.md | 0 test_sources/vuln_python.py | 0 test_sources/vuln_test.html | 38 +++++++++++++++++++++ 7 files changed, 104 insertions(+), 10 deletions(-) create mode 100644 .github/scripts/broken_link_commit_check.py create mode 100644 test_sources/vuln_html.html create mode 100644 test_sources/vuln_js.js create mode 100644 test_sources/vuln_md.md create mode 100644 test_sources/vuln_python.py create mode 100644 test_sources/vuln_test.html diff --git a/.github/scripts/broken_link_commit_check.py b/.github/scripts/broken_link_commit_check.py new file mode 100644 index 0000000..22cfbe9 --- /dev/null +++ b/.github/scripts/broken_link_commit_check.py @@ -0,0 +1,38 @@ +import requests +import re +from pathlib import Path + +# Simple regex to match URLs +url_pattern = r'https?://[^\s"\']+' + +# Only scan text-based files +TEXT_EXTENSIONS = [".js", ".html", ".md", ".txt"] +files_to_scan = [f for f in Path(".").rglob("*.*") if f.suffix in TEXT_EXTENSIONS] + +broken_links = [] + +def check_link(link, file_path): + try: + r = requests.get(link, timeout=5, allow_redirects=True) + if r.status_code == 404: + broken_links.append(f"{file_path}: {link}") + except Exception: + broken_links.append(f"{file_path}: {link} (error)") + +# Iterate through all selected files +for file in files_to_scan: + try: + with open(file, "r", encoding="utf-8") as f: + for line in f: + for link in re.findall(url_pattern, line): + check_link(link, file) + except Exception: + continue + +# Write broken links report to a temporary file +with open("broken_links_report.txt", "w", encoding="utf-8") as f: + if broken_links: + for link in broken_links: + f.write(link + "\n") + else: + f.write("No broken links found.\n") diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml index 50d1e9d..1df6059 100644 --- a/.github/workflows/blhawk-ci.yml +++ b/.github/workflows/blhawk-ci.yml @@ -1,23 +1,41 @@ -# Sample GitHub Actions workflow for BLHawk CI integration -name: BLHawk Scan +name: Broken Link Scan on Commit on: push: - branches: [ test-action ] + branches: + - test-action jobs: - scan: + broken-link-check: runs-on: ubuntu-latest + steps: - - name: Checkout code + - name: Checkout repository uses: actions/checkout@v3 + - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' + - name: Install dependencies + run: pip install requests + + - name: Run Broken Link Scan + id: scan run: | - pip install -r requirements.txt - - name: Run BLHawk source scan - run: | - python blhawk.py --src ./test_sources \ No newline at end of file + python .github/scripts/broken_link_commit_check.py + echo "links_report<> $GITHUB_OUTPUT + cat broken_links_report.txt >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + - name: Create Commit Check + uses: peter-evans/create-or-update-check@v2 + with: + token: ${{ secrets.GITHUB_TOKEN }} + name: "Broken Link Scan" + head-sha: ${{ github.sha }} + status: completed + conclusion: neutral + output-title: "Broken Link Scan Report" + output-summary: ${{ steps.scan.outputs.links_report }} diff --git a/test_sources/vuln_html.html b/test_sources/vuln_html.html new file mode 100644 index 0000000..e69de29 diff --git a/test_sources/vuln_js.js b/test_sources/vuln_js.js new file mode 100644 index 0000000..e69de29 diff --git a/test_sources/vuln_md.md b/test_sources/vuln_md.md new file mode 100644 index 0000000..e69de29 diff --git a/test_sources/vuln_python.py b/test_sources/vuln_python.py new file mode 100644 index 0000000..e69de29 diff --git a/test_sources/vuln_test.html b/test_sources/vuln_test.html new file mode 100644 index 0000000..22cfbe9 --- /dev/null +++ b/test_sources/vuln_test.html @@ -0,0 +1,38 @@ +import requests +import re +from pathlib import Path + +# Simple regex to match URLs +url_pattern = r'https?://[^\s"\']+' + +# Only scan text-based files +TEXT_EXTENSIONS = [".js", ".html", ".md", ".txt"] +files_to_scan = [f for f in Path(".").rglob("*.*") if f.suffix in TEXT_EXTENSIONS] + +broken_links = [] + +def check_link(link, file_path): + try: + r = requests.get(link, timeout=5, allow_redirects=True) + if r.status_code == 404: + broken_links.append(f"{file_path}: {link}") + except Exception: + broken_links.append(f"{file_path}: {link} (error)") + +# Iterate through all selected files +for file in files_to_scan: + try: + with open(file, "r", encoding="utf-8") as f: + for line in f: + for link in re.findall(url_pattern, line): + check_link(link, file) + except Exception: + continue + +# Write broken links report to a temporary file +with open("broken_links_report.txt", "w", encoding="utf-8") as f: + if broken_links: + for link in broken_links: + f.write(link + "\n") + else: + f.write("No broken links found.\n") From c14145acbf26d2c8a2336e6c588835a0609d4323 Mon Sep 17 00:00:00 2001 From: Mohammad Reza Omrani <38619429+omranisecurity@users.noreply.github.com> Date: Thu, 11 Sep 2025 12:20:35 +0000 Subject: [PATCH 10/10] update --- .github/workflows/blhawk-ci.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml index 1df6059..2efe3e1 100644 --- a/.github/workflows/blhawk-ci.yml +++ b/.github/workflows/blhawk-ci.yml @@ -1,9 +1,9 @@ -name: Broken Link Scan on Commit +name: Broken Link Scan on PR on: - push: + pull_request: branches: - - test-action + - test-action # only run on 'test-action' branch jobs: broken-link-check: @@ -29,13 +29,13 @@ jobs: cat broken_links_report.txt >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT - - name: Create Commit Check - uses: peter-evans/create-or-update-check@v2 + - name: Create or Update Comment + uses: peter-evans/create-or-update-comment@v4 with: token: ${{ secrets.GITHUB_TOKEN }} - name: "Broken Link Scan" - head-sha: ${{ github.sha }} - status: completed - conclusion: neutral - output-title: "Broken Link Scan Report" - output-summary: ${{ steps.scan.outputs.links_report }} + issue-number: ${{ github.event.pull_request.number }} + body: | + **Broken Link Scan Report** + + ${{ steps.scan.outputs.links_report }} + edit-mode: replace