omranisecurity · omranisecurity · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/.github/scripts/broken_link_commit_check.py b/.github/scripts/broken_link_commit_check.py
@@ -0,0 +1,38 @@
+import requests
+import re
+from pathlib import Path
+
+# Simple regex to match URLs
+url_pattern = r'https?://[^\s"\']+'
+
+# Only scan text-based files
+TEXT_EXTENSIONS = [".js", ".html", ".md", ".txt"]
+files_to_scan = [f for f in Path(".").rglob("*.*") if f.suffix in TEXT_EXTENSIONS]
+
+broken_links = []
+
+def check_link(link, file_path):
+    try:
+        r = requests.get(link, timeout=5, allow_redirects=True)
+        if r.status_code == 404:
+            broken_links.append(f"{file_path}: {link}")
+    except Exception:
+        broken_links.append(f"{file_path}: {link} (error)")
+
+# Iterate through all selected files
+for file in files_to_scan:
+    try:
+        with open(file, "r", encoding="utf-8") as f:
+            for line in f:
+                for link in re.findall(url_pattern, line):
+                    check_link(link, file)
+    except Exception:
+        continue
+
+# Write broken links report to a temporary file
+with open("broken_links_report.txt", "w", encoding="utf-8") as f:
+    if broken_links:
+        for link in broken_links:
+            f.write(link + "\n")
+    else:
+        f.write("No broken links found.\n")
diff --git a/.github/workflows/blhawk-ci.yml b/.github/workflows/blhawk-ci.yml
@@ -0,0 +1,41 @@
+name: Broken Link Scan on PR
+
+on:
+  pull_request:
+    branches:
+      - test-action   # only run on 'test-action' branch
+
+jobs:
+  broken-link-check:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: pip install requests
+
+      - name: Run Broken Link Scan
+        id: scan
+        run: |
+          python .github/scripts/broken_link_commit_check.py
+          echo "links_report<<EOF" >> $GITHUB_OUTPUT
+          cat broken_links_report.txt >> $GITHUB_OUTPUT
+          echo "EOF" >> $GITHUB_OUTPUT
+
+      - name: Create or Update Comment
+        uses: peter-evans/create-or-update-comment@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          issue-number: ${{ github.event.pull_request.number }}
+          body: |
+            **Broken Link Scan Report**
+
+            ${{ steps.scan.outputs.links_report }}
+          edit-mode: replace
diff --git a/blhawk.py b/blhawk.py
@@ -4,21 +4,28 @@
 def main():
     parser = argparse.ArgumentParser(prog='BLHawk', description='Dead links aren\'t always dead!', epilog='version: 0.3.0')
     parser.add_argument('-u', '--url', type=str, help='example: https://www.target.com')
+    parser.add_argument('--src', type=str, help='Path to source code directory for scan')
     #parser.add_argument('-l','--list', type=str, help='File containing URLs to check')
     #parser.add_argument('-t', '--thread', type=int, default=10, help='Number of threads to use (default: 10)')
     #parser.add_argument('-s', '--silent', help='show only result in output')
     #parser.add_argument('-h', '--help', action=help, help='Display this help message and exit')
     args = parser.parse_args()
 
     try:
-        inputLoader(
+        if args.url:
+            inputLoader(
                 url=args.url,
                 #raw_request=args.list,
                 #cookie=args.filename,
                 #thread=args.thread,
                 #silent=args.silent,
             )
-
+        elif args.src:
+            from modules.scan import scan_source
+            scan_source(args.src)
+        else:
+            print("[!] Please provide either --url or --src argument.")
+            exit(1)
     except KeyboardInterrupt:
         print("\n[!] Scan interrupted by user (Ctrl+C). Exiting...")
 

diff --git a/modules/scan.py b/modules/scan.py
@@ -1,4 +1,4 @@
-import requests
+import requests, os, re
 from colorama import Fore, Style, init
 from urllib.parse import urlparse
 
@@ -60,6 +60,25 @@ def get_service_by_host(host):
             return service_name, service_info
     return None, None
 
+def scan_source(path):
+    """
+    Scan source code files in the given directory for URLs and check their vulnerability.
+    """
+    url_pattern = re.compile(r'https?://[\w\.-]+(?:/[\w\./\-\?&%#=]*)?')
+    for root, dirs, files in os.walk(path):
+        for file in files:
+            file_path = os.path.join(root, file)
+            try:
+                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.read()
+                    urls = url_pattern.findall(content)
+                    for url in set(urls):
+                        print(f"[SCAN] {file_path}: {url}")
+                        check_vulnerability(url)
+            except Exception as e:
+                print(f"[ERROR] Could not read {file_path}: {e}")
+
+
 def check_vulnerability(url):
     parsed = urlparse(url)
     host = parsed.netloc

diff --git a/test_sources/vh-v4.html b/test_sources/vh-v4.html
@@ -0,0 +1,7 @@
+<html>
+  <head><title>Test Vulnerable HTML</title></head>
+  <body>
+    <a href="https://t.me/dfhgifwheoinskidewgb/">sdfsdfsdf</a>
+    <a href="https://github.com/fgdfgdfgdfgdf">GitHub</a>
+</body>
+</html>
diff --git a/test_sources/vuln_html.html b/test_sources/vuln_html.html
diff --git a/test_sources/vuln_html3.html b/test_sources/vuln_html3.html
@@ -0,0 +1,8 @@
+<html>
+  <head><title>Test Vulnerable HTML</title></head>
+  <body>
+    <a href="https://cafebazaar.ir/app/fakeapp">CafeBazaar</a>
+    <a href="https://myket.ir/app/fakeapp">Myket</a>
+    <a href="https://t.me/dfhgifwheoinskidewgb/">sdfsdfsdf</a>
+</body>
+</html>
diff --git a/test_sources/vuln_js.js b/test_sources/vuln_js.js
diff --git a/test_sources/vuln_md.md b/test_sources/vuln_md.md
diff --git a/test_sources/vuln_python.py b/test_sources/vuln_python.py
diff --git a/test_sources/vuln_test.html b/test_sources/vuln_test.html
@@ -0,0 +1,38 @@
+import requests
+import re
+from pathlib import Path
+
+# Simple regex to match URLs
+url_pattern = r'https?://[^\s"\']+'
+
+# Only scan text-based files
+TEXT_EXTENSIONS = [".js", ".html", ".md", ".txt"]
+files_to_scan = [f for f in Path(".").rglob("*.*") if f.suffix in TEXT_EXTENSIONS]
+
+broken_links = []
+
+def check_link(link, file_path):
+    try:
+        r = requests.get(link, timeout=5, allow_redirects=True)
+        if r.status_code == 404:
+            broken_links.append(f"{file_path}: {link}")
+    except Exception:
+        broken_links.append(f"{file_path}: {link} (error)")
+
+# Iterate through all selected files
+for file in files_to_scan:
+    try:
+        with open(file, "r", encoding="utf-8") as f:
+            for line in f:
+                for link in re.findall(url_pattern, line):
+                    check_link(link, file)
+    except Exception:
+        continue
+
+# Write broken links report to a temporary file
+with open("broken_links_report.txt", "w", encoding="utf-8") as f:
+    if broken_links:
+        for link in broken_links:
+            f.write(link + "\n")
+    else:
+        f.write("No broken links found.\n")