Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .github/scripts/broken_link_commit_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import requests
import re
from pathlib import Path

# Simple regex to match URLs
url_pattern = r'https?://[^\s"\']+'

# Only scan text-based files
TEXT_EXTENSIONS = [".js", ".html", ".md", ".txt"]
files_to_scan = [f for f in Path(".").rglob("*.*") if f.suffix in TEXT_EXTENSIONS]

broken_links = []

def check_link(link, file_path):
try:
r = requests.get(link, timeout=5, allow_redirects=True)
if r.status_code == 404:
broken_links.append(f"{file_path}: {link}")
except Exception:
broken_links.append(f"{file_path}: {link} (error)")

# Iterate through all selected files
for file in files_to_scan:
try:
with open(file, "r", encoding="utf-8") as f:
for line in f:
for link in re.findall(url_pattern, line):
check_link(link, file)
except Exception:
continue

# Write broken links report to a temporary file
with open("broken_links_report.txt", "w", encoding="utf-8") as f:
if broken_links:
for link in broken_links:
f.write(link + "\n")
else:
f.write("No broken links found.\n")
41 changes: 41 additions & 0 deletions .github/workflows/blhawk-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Broken Link Scan on PR

on:
pull_request:
branches:
- test-action # only run on 'test-action' branch

jobs:
broken-link-check:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Install dependencies
run: pip install requests

- name: Run Broken Link Scan
id: scan
run: |
python .github/scripts/broken_link_commit_check.py
echo "links_report<<EOF" >> $GITHUB_OUTPUT
cat broken_links_report.txt >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT

- name: Create or Update Comment
uses: peter-evans/create-or-update-comment@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
issue-number: ${{ github.event.pull_request.number }}
body: |
**Broken Link Scan Report**

${{ steps.scan.outputs.links_report }}
edit-mode: replace
11 changes: 9 additions & 2 deletions blhawk.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,28 @@
def main():
parser = argparse.ArgumentParser(prog='BLHawk', description='Dead links aren\'t always dead!', epilog='version: 0.3.0')
parser.add_argument('-u', '--url', type=str, help='example: https://www.target.com')
parser.add_argument('--src', type=str, help='Path to source code directory for scan')
#parser.add_argument('-l','--list', type=str, help='File containing URLs to check')
#parser.add_argument('-t', '--thread', type=int, default=10, help='Number of threads to use (default: 10)')
#parser.add_argument('-s', '--silent', help='show only result in output')
#parser.add_argument('-h', '--help', action=help, help='Display this help message and exit')
args = parser.parse_args()

try:
inputLoader(
if args.url:
inputLoader(
url=args.url,
#raw_request=args.list,
#cookie=args.filename,
#thread=args.thread,
#silent=args.silent,
)

elif args.src:
from modules.scan import scan_source
scan_source(args.src)
else:
print("[!] Please provide either --url or --src argument.")
exit(1)
except KeyboardInterrupt:
print("\n[!] Scan interrupted by user (Ctrl+C). Exiting...")

Expand Down
21 changes: 20 additions & 1 deletion modules/scan.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import requests
import requests, os, re
from colorama import Fore, Style, init
from urllib.parse import urlparse

Expand Down Expand Up @@ -60,6 +60,25 @@ def get_service_by_host(host):
return service_name, service_info
return None, None

def scan_source(path):
"""
Scan source code files in the given directory for URLs and check their vulnerability.
"""
url_pattern = re.compile(r'https?://[\w\.-]+(?:/[\w\./\-\?&%#=]*)?')
for root, dirs, files in os.walk(path):
for file in files:
file_path = os.path.join(root, file)
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
urls = url_pattern.findall(content)
for url in set(urls):
print(f"[SCAN] {file_path}: {url}")
check_vulnerability(url)
except Exception as e:
print(f"[ERROR] Could not read {file_path}: {e}")


def check_vulnerability(url):
parsed = urlparse(url)
host = parsed.netloc
Expand Down
7 changes: 7 additions & 0 deletions test_sources/vh-v4.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<html>
<head><title>Test Vulnerable HTML</title></head>
<body>
<a href="https://t.me/dfhgifwheoinskidewgb/">sdfsdfsdf</a>
<a href="https://github.com/fgdfgdfgdfgdf">GitHub</a>
</body>
</html>
Empty file added test_sources/vuln_html.html
Empty file.
8 changes: 8 additions & 0 deletions test_sources/vuln_html3.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<html>
<head><title>Test Vulnerable HTML</title></head>
<body>
<a href="https://cafebazaar.ir/app/fakeapp">CafeBazaar</a>
<a href="https://myket.ir/app/fakeapp">Myket</a>
<a href="https://t.me/dfhgifwheoinskidewgb/">sdfsdfsdf</a>
</body>
</html>
Empty file added test_sources/vuln_js.js
Empty file.
Empty file added test_sources/vuln_md.md
Empty file.
Empty file added test_sources/vuln_python.py
Empty file.
38 changes: 38 additions & 0 deletions test_sources/vuln_test.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import requests
import re
from pathlib import Path

# Simple regex to match URLs
url_pattern = r'https?://[^\s"\']+'

# Only scan text-based files
TEXT_EXTENSIONS = [".js", ".html", ".md", ".txt"]
files_to_scan = [f for f in Path(".").rglob("*.*") if f.suffix in TEXT_EXTENSIONS]

broken_links = []

def check_link(link, file_path):
try:
r = requests.get(link, timeout=5, allow_redirects=True)
if r.status_code == 404:
broken_links.append(f"{file_path}: {link}")
except Exception:
broken_links.append(f"{file_path}: {link} (error)")

# Iterate through all selected files
for file in files_to_scan:
try:
with open(file, "r", encoding="utf-8") as f:
for line in f:
for link in re.findall(url_pattern, line):
check_link(link, file)
except Exception:
continue

# Write broken links report to a temporary file
with open("broken_links_report.txt", "w", encoding="utf-8") as f:
if broken_links:
for link in broken_links:
f.write(link + "\n")
else:
f.write("No broken links found.\n")