From 72af7eb9c395e2d592f9a334b6ace98508e4c60b Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 11 Jan 2026 07:36:43 +0800 Subject: [PATCH 1/9] Implement multi-language support for UI Added multi-language support with English, Chinese, Spanish, French, German, and Japanese translations for various UI elements and messages. --- website/Teranslate.js | 331 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 331 insertions(+) create mode 100644 website/Teranslate.js diff --git a/website/Teranslate.js b/website/Teranslate.js new file mode 100644 index 0000000..a713821 --- /dev/null +++ b/website/Teranslate.js @@ -0,0 +1,331 @@ +-- ============================================= +-- EXPLOREPI LANGUAGE DATA INSTALLATION +-- Complete Multi-Language Support +-- ============================================= + +USE explorepi; + +-- Clear existing language data (optional) +-- TRUNCATE TABLE language_data; + +-- ============================================= +-- ENGLISH (en) - Complete Translation +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +-- Navigation +('en', 'nav.home', 'Home', 'navigation'), +('en', 'nav.blocks', 'Blocks', 'navigation'), +('en', 'nav.transactions', 'Transactions', 'navigation'), +('en', 'nav.tokens', 'Tokens', 'navigation'), +('en', 'nav.contracts', 'Contracts', 'navigation'), +('en', 'nav.statistics', 'Statistics', 'navigation'), +('en', 'nav.search', 'Search', 'navigation'), +('en', 'nav.about', 'About', 'navigation'), +('en', 'nav.api', 'API', 'navigation'), + +-- Common Terms +('en', 'common.loading', 'Loading...', 'common'), +('en', 'common.error', 'Error', 'common'), +('en', 'common.success', 'Success', 'common'), +('en', 'common.warning', 'Warning', 'common'), +('en', 'common.info', 'Information', 'common'), +('en', 'common.block', 'Block', 'common'), +('en', 'common.transaction', 'Transaction', 'common'), +('en', 'common.address', 'Address', 'common'), +('en', 'common.token', 'Token', 'common'), +('en', 'common.contract', 'Smart Contract', 'common'), +('en', 'common.view_all', 'View All', 'common'), +('en', 'common.view_more', 'View More', 'common'), +('en', 'common.details', 'Details', 'common'), +('en', 'common.copy', 'Copy', 'common'), +('en', 'common.copied', 'Copied!', 'common'), +('en', 'common.ago', 'ago', 'common'), +('en', 'common.total', 'Total', 'common'), +('en', 'common.amount', 'Amount', 'common'), +('en', 'common.price', 'Price', 'common'), +('en', 'common.value', 'Value', 'common'), + +-- Search +('en', 'search.placeholder', 'Search by Address / Txn Hash / Block / Token', 'search'), +('en', 'search.searching', 'Searching...', 'search'), +('en', 'search.no_results', 'No results found', 'search'), +('en', 'search.invalid_input', 'Invalid search input', 'search'), +('en', 'search.enter_query', 'Enter your search query', 'search'), + +-- Home Page +('en', 'home.title', 'Pi Network Block Explorer', 'home'), +('en', 'home.subtitle', 'Explore the Pi Blockchain', 'home'), +('en', 'home.latest_blocks', 'Latest Blocks', 'home'), +('en', 'home.latest_transactions', 'Latest Transactions', 'home'), +('en', 'home.network_stats', 'Network Statistics', 'home'), +('en', 'home.total_blocks', 'Total Blocks', 'home'), +('en', 'home.total_transactions', 'Total Transactions', 'home'), +('en', 'home.total_addresses', 'Total Addresses', 'home'), +('en', 'home.avg_block_time', 'Avg Block Time', 'home'), +('en', 'home.welcome', 'Welcome to Pi Network Explorer', 'home'), + +-- Block Details +('en', 'block.height', 'Block Height', 'block'), +('en', 'block.hash', 'Block Hash', 'block'), +('en', 'block.parent_hash', 'Parent Hash', 'block'), +('en', 'block.timestamp', 'Timestamp', 'block'), +('en', 'block.transactions', 'Transactions', 'block'), +('en', 'block.miner', 'Miner', 'block'), +('en', 'block.difficulty', 'Difficulty', 'block'), +('en', 'block.total_difficulty', 'Total Difficulty', 'block'), +('en', 'block.size', 'Size', 'block'), +('en', 'block.gas_used', 'Gas Used', 'block'), +('en', 'block.gas_limit', 'Gas Limit', 'block'), +('en', 'block.nonce', 'Nonce', 'block'), +('en', 'block.state_root', 'State Root', 'block'), +('en', 'block.receipts_root', 'Receipts Root', 'block'), +('en', 'block.transactions_root', 'Transactions Root', 'block'), +('en', 'block.extra_data', 'Extra Data', 'block'), +('en', 'block.not_found', 'Block not found', 'block'), +('en', 'block.overview', 'Block Overview', 'block'), + +-- Transaction Details +('en', 'tx.hash', 'Transaction Hash', 'transaction'), +('en', 'tx.status', 'Status', 'transaction'), +('en', 'tx.success', 'Success', 'transaction'), +('en', 'tx.failed', 'Failed', 'transaction'), +('en', 'tx.pending', 'Pending', 'transaction'), +('en', 'tx.block', 'Block', 'transaction'), +('en', 'tx.from', 'From', 'transaction'), +('en', 'tx.to', 'To', 'transaction'), +('en', 'tx.contract_creation', 'Contract Creation', 'transaction'), +('en', 'tx.value', 'Value', 'transaction'), +('en', 'tx.fee', 'Transaction Fee', 'transaction'), +('en', 'tx.gas_price', 'Gas Price', 'transaction'), +('en', 'tx.gas_limit', 'Gas Limit', 'transaction'), +('en', 'tx.gas_used', 'Gas Used', 'transaction'), +('en', 'tx.nonce', 'Nonce', 'transaction'), +('en', 'tx.input_data', 'Input Data', 'transaction'), +('en', 'tx.logs', 'Logs', 'transaction'), +('en', 'tx.not_found', 'Transaction not found', 'transaction'), + +-- Address Details +('en', 'address.overview', 'Address Overview', 'address'), +('en', 'address.balance', 'Balance', 'address'), +('en', 'address.transactions', 'Transactions', 'address'), +('en', 'address.token_transfers', 'Token Transfers', 'address'), +('en', 'address.is_contract', 'Smart Contract', 'address'), +('en', 'address.creator', 'Creator', 'address'), +('en', 'address.creation_tx', 'Creation Transaction', 'address'), +('en', 'address.first_seen', 'First Seen', 'address'), +('en', 'address.last_seen', 'Last Seen', 'address'), +('en', 'address.not_found', 'Address not found', 'address'), + +-- Token Details +('en', 'token.name', 'Token Name', 'token'), +('en', 'token.symbol', 'Symbol', 'token'), +('en', 'token.decimals', 'Decimals', 'token'), +('en', 'token.total_supply', 'Total Supply', 'token'), +('en', 'token.holders', 'Holders', 'token'), +('en', 'token.transfers', 'Transfers', 'token'), +('en', 'token.type', 'Token Type', 'token'), +('en', 'token.contract', 'Contract Address', 'token'), +('en', 'token.not_found', 'Token not found', 'token'), + +-- Contract Details +('en', 'contract.address', 'Contract Address', 'contract'), +('en', 'contract.creator', 'Creator', 'contract'), +('en', 'contract.creation_tx', 'Creation Transaction', 'contract'), +('en', 'contract.creation_block', 'Creation Block', 'contract'), +('en', 'contract.verified', 'Verified', 'contract'), +('en', 'contract.unverified', 'Not Verified', 'contract'), +('en', 'contract.source_code', 'Source Code', 'contract'), +('en', 'contract.abi', 'Contract ABI', 'contract'), +('en', 'contract.bytecode', 'Bytecode', 'contract'), +('en', 'contract.compiler_version', 'Compiler Version', 'contract'), +('en', 'contract.optimization', 'Optimization', 'contract'), +('en', 'contract.runs', 'Runs', 'contract'), +('en', 'contract.not_found', 'Contract not found', 'contract'), + +-- Time Units +('en', 'time.seconds', 'seconds', 'time'), +('en', 'time.minutes', 'minutes', 'time'), +('en', 'time.hours', 'hours', 'time'), +('en', 'time.days', 'days', 'time'), +('en', 'time.months', 'months', 'time'), +('en', 'time.years', 'years', 'time'), +('en', 'time.ago', 'ago', 'time'), +('en', 'time.just_now', 'just now', 'time'), + +-- Error Messages +('en', 'error.general', 'An error occurred', 'error'), +('en', 'error.not_found', 'Not found', 'error'), +('en', 'error.invalid_address', 'Invalid address', 'error'), +('en', 'error.invalid_tx_hash', 'Invalid transaction hash', 'error'), +('en', 'error.invalid_block', 'Invalid block number', 'error'), +('en', 'error.database', 'Database error', 'error'), +('en', 'error.network', 'Network error', 'error'), +('en', 'error.connection', 'Connection error', 'error'); + +-- ============================================= +-- CHINESE (zh) - 中文翻译 +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('zh', 'nav.home', '首页', 'navigation'), +('zh', 'nav.blocks', '区块', 'navigation'), +('zh', 'nav.transactions', '交易', 'navigation'), +('zh', 'nav.tokens', '代币', 'navigation'), +('zh', 'nav.contracts', '合约', 'navigation'), +('zh', 'nav.statistics', '统计', 'navigation'), +('zh', 'nav.search', '搜索', 'navigation'), + +('zh', 'common.loading', '加载中...', 'common'), +('zh', 'common.error', '错误', 'common'), +('zh', 'common.success', '成功', 'common'), +('zh', 'common.block', '区块', 'common'), +('zh', 'common.transaction', '交易', 'common'), +('zh', 'common.address', '地址', 'common'), +('zh', 'common.token', '代币', 'common'), +('zh', 'common.view_all', '查看全部', 'common'), +('zh', 'common.details', '详情', 'common'), + +('zh', 'search.placeholder', '搜索地址 / 交易哈希 / 区块 / 代币', 'search'), +('zh', 'search.no_results', '未找到结果', 'search'), + +('zh', 'home.title', 'Pi网络区块浏览器', 'home'), +('zh', 'home.latest_blocks', '最新区块', 'home'), +('zh', 'home.latest_transactions', '最新交易', 'home'), +('zh', 'home.total_blocks', '总区块数', 'home'), + +('zh', 'block.height', '区块高度', 'block'), +('zh', 'block.hash', '区块哈希', 'block'), +('zh', 'block.timestamp', '时间戳', 'block'), +('zh', 'block.transactions', '交易', 'block'), +('zh', 'block.miner', '矿工', 'block'), + +('zh', 'tx.hash', '交易哈希', 'transaction'), +('zh', 'tx.status', '状态', 'transaction'), +('zh', 'tx.success', '成功', 'transaction'), +('zh', 'tx.failed', '失败', 'transaction'), +('zh', 'tx.from', '发送方', 'transaction'), +('zh', 'tx.to', '接收方', 'transaction'); + +-- ============================================= +-- SPANISH (es) - Español +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('es', 'nav.home', 'Inicio', 'navigation'), +('es', 'nav.blocks', 'Bloques', 'navigation'), +('es', 'nav.transactions', 'Transacciones', 'navigation'), +('es', 'nav.tokens', 'Tokens', 'navigation'), +('es', 'nav.contracts', 'Contratos', 'navigation'), +('es', 'nav.statistics', 'Estadísticas', 'navigation'), +('es', 'nav.search', 'Buscar', 'navigation'), + +('es', 'common.loading', 'Cargando...', 'common'), +('es', 'common.error', 'Error', 'common'), +('es', 'common.success', 'Éxito', 'common'), +('es', 'common.block', 'Bloque', 'common'), +('es', 'common.transaction', 'Transacción', 'common'), +('es', 'common.address', 'Dirección', 'common'), +('es', 'common.token', 'Token', 'common'), +('es', 'common.view_all', 'Ver Todo', 'common'), +('es', 'common.details', 'Detalles', 'common'), + +('es', 'search.placeholder', 'Buscar por Dirección / Hash de Tx / Bloque / Token', 'search'), +('es', 'search.no_results', 'No se encontraron resultados', 'search'), + +('es', 'home.title', 'Explorador de Bloques de Pi Network', 'home'), +('es', 'home.latest_blocks', 'Últimos Bloques', 'home'), +('es', 'home.latest_transactions', 'Últimas Transacciones', 'home'), + +('es', 'block.height', 'Altura del Bloque', 'block'), +('es', 'block.hash', 'Hash del Bloque', 'block'), +('es', 'block.timestamp', 'Marca de Tiempo', 'block'), +('es', 'block.transactions', 'Transacciones', 'block'), + +('es', 'tx.hash', 'Hash de Transacción', 'transaction'), +('es', 'tx.status', 'Estado', 'transaction'), +('es', 'tx.success', 'Éxito', 'transaction'), +('es', 'tx.failed', 'Fallido', 'transaction'); + +-- ============================================= +-- FRENCH (fr) - Français +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('fr', 'nav.home', 'Accueil', 'navigation'), +('fr', 'nav.blocks', 'Blocs', 'navigation'), +('fr', 'nav.transactions', 'Transactions', 'navigation'), +('fr', 'nav.tokens', 'Jetons', 'navigation'), +('fr', 'nav.contracts', 'Contrats', 'navigation'), +('fr', 'nav.search', 'Rechercher', 'navigation'), + +('fr', 'common.loading', 'Chargement...', 'common'), +('fr', 'common.error', 'Erreur', 'common'), +('fr', 'common.block', 'Bloc', 'common'), +('fr', 'common.transaction', 'Transaction', 'common'), +('fr', 'common.address', 'Adresse', 'common'), +('fr', 'common.details', 'Détails', 'common'), + +('fr', 'search.placeholder', 'Rechercher par Adresse / Hash / Bloc / Jeton', 'search'), + +('fr', 'home.title', 'Explorateur de Blocs Pi Network', 'home'), +('fr', 'home.latest_blocks', 'Derniers Blocs', 'home'), +('fr', 'home.latest_transactions', 'Dernières Transactions', 'home'); + +-- ============================================= +-- GERMAN (de) - Deutsch +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('de', 'nav.home', 'Startseite', 'navigation'), +('de', 'nav.blocks', 'Blöcke', 'navigation'), +('de', 'nav.transactions', 'Transaktionen', 'navigation'), +('de', 'nav.tokens', 'Token', 'navigation'), +('de', 'nav.contracts', 'Verträge', 'navigation'), +('de', 'nav.search', 'Suchen', 'navigation'), + +('de', 'common.loading', 'Laden...', 'common'), +('de', 'common.error', 'Fehler', 'common'), +('de', 'common.block', 'Block', 'common'), +('de', 'common.transaction', 'Transaktion', 'common'), +('de', 'common.address', 'Adresse', 'common'), +('de', 'common.details', 'Details', 'common'), + +('de', 'home.title', 'Pi Network Block Explorer', 'home'), +('de', 'home.latest_blocks', 'Neueste Blöcke', 'home'), +('de', 'home.latest_transactions', 'Neueste Transaktionen', 'home'); + +-- ============================================= +-- JAPANESE (ja) - 日本語 +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('ja', 'nav.home', 'ホーム', 'navigation'), +('ja', 'nav.blocks', 'ブロック', 'navigation'), +('ja', 'nav.transactions', 'トランザクション', 'navigation'), +('ja', 'nav.tokens', 'トークン', 'navigation'), +('ja', 'nav.search', '検索', 'navigation'), + +('ja', 'common.loading', '読み込み中...', 'common'), +('ja', 'common.error', 'エラー', 'common'), +('ja', 'common.block', 'ブロック', 'common'), +('ja', 'common.transaction', 'トランザクション', 'common'), +('ja', 'common.address', 'アドレス', 'common'), + +('ja', 'home.title', 'Piネットワーク ブロックエクスプローラー', 'home'), +('ja', 'home.latest_blocks', '最新ブロック', 'home'), +('ja', 'home.latest_transactions', '最新トランザクション', 'home'); + +-- ============================================= +-- VERIFY INSTALLATION +-- ============================================= + +SELECT + lang_code, + COUNT(*) as total_translations, + COUNT(DISTINCT category) as categories +FROM language_data +GROUP BY lang_code +ORDER BY lang_code; + +SELECT 'Language data installation completed!' AS status; From d4a3a359c2fce713005f87e55ec8c35c3d4f3091 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 11 Jan 2026 12:09:14 +0800 Subject: [PATCH 2/9] Add GitHub Actions workflow for Azure Node.js deployment This workflow builds and deploys a Node.js application to Azure Web App on commits to the main branch. --- .github/workflows/azure-webapps-node.yml | 78 ++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 .github/workflows/azure-webapps-node.yml diff --git a/.github/workflows/azure-webapps-node.yml b/.github/workflows/azure-webapps-node.yml new file mode 100644 index 0000000..2ebbac2 --- /dev/null +++ b/.github/workflows/azure-webapps-node.yml @@ -0,0 +1,78 @@ +# This workflow will build and push a node.js application to an Azure Web App when a commit is pushed to your default branch. +# +# This workflow assumes you have already created the target Azure App Service web app. +# For instructions see https://docs.microsoft.com/en-us/azure/app-service/quickstart-nodejs?tabs=linux&pivots=development-environment-cli +# +# To configure this workflow: +# +# 1. Download the Publish Profile for your Azure Web App. You can download this file from the Overview page of your Web App in the Azure Portal. +# For more information: https://docs.microsoft.com/en-us/azure/app-service/deploy-github-actions?tabs=applevel#generate-deployment-credentials +# +# 2. Create a secret in your repository named AZURE_WEBAPP_PUBLISH_PROFILE, paste the publish profile contents as the value of the secret. +# For instructions on obtaining the publish profile see: https://docs.microsoft.com/azure/app-service/deploy-github-actions#configure-the-github-secret +# +# 3. Change the value for the AZURE_WEBAPP_NAME. Optionally, change the AZURE_WEBAPP_PACKAGE_PATH and NODE_VERSION environment variables below. +# +# For more information on GitHub Actions for Azure: https://github.com/Azure/Actions +# For more information on the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy +# For more samples to get started with GitHub Action workflows to deploy to Azure: https://github.com/Azure/actions-workflow-samples + +on: + push: + branches: [ "main" ] + workflow_dispatch: + +env: + AZURE_WEBAPP_NAME: your-app-name # set this to your application's name + AZURE_WEBAPP_PACKAGE_PATH: '.' # set this to the path to your web app project, defaults to the repository root + NODE_VERSION: '20.x' # set this to the node version to use + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + cache: 'npm' + + - name: npm install, build, and test + run: | + npm install + npm run build --if-present + npm run test --if-present + + - name: Upload artifact for deployment job + uses: actions/upload-artifact@v4 + with: + name: node-app + path: . + + deploy: + permissions: + contents: none + runs-on: ubuntu-latest + needs: build + environment: + name: 'Development' + url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v4 + with: + name: node-app + + - name: 'Deploy to Azure WebApp' + id: deploy-to-webapp + uses: azure/webapps-deploy@v2 + with: + app-name: ${{ env.AZURE_WEBAPP_NAME }} + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE }} + package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }} From f4f7155586faddb0fd58d537873bb6606b4cb2ca Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Fri, 16 Jan 2026 05:11:01 +0800 Subject: [PATCH 3/9] Update privacy policy last updated date --- website/public/policy.html | 71 ++++++++++++++------------------------ 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/website/public/policy.html b/website/public/policy.html index 0648d69..599b2e6 100644 --- a/website/public/policy.html +++ b/website/public/policy.html @@ -1,10 +1,10 @@

Privacy Policy

-

Last updated: March 14, 2023

-

This Privacy Policy describes Our policies and procedures on the collection, use and disclosure of Your information when You use the Service and tells You about Your privacy rights and how the law protects You.

+

Last updated: January 15, 2026

+

This Privacy Policy describes Our policies and procedures on the collection, use and disclosure of Your information when You use the Service and tells You about Your privacy rights and how the law [...]

We use Your Personal data to provide and improve the Service. By using the Service, You agree to the collection and use of information in accordance with this Privacy Policy.

Interpretation and Definitions

Interpretation

-

The words of which the initial letter is capitalized have meanings defined under the following conditions. The following definitions shall have the same meaning regardless of whether they appear in singular or in plural.

+

The words of which the initial letter is capitalized have meanings defined under the following conditions. The following definitions shall have the same meaning regardless of whether they appear in[...]

Definitions

For the purposes of this Privacy Policy:

\ No newline at end of file From af40f758903257058b03c79f481964ec1caffc78 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 18 Jan 2026 22:14:38 +0800 Subject: [PATCH 4/9] Revert "corrected crawler README.md MySQL link" --- crawler/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawler/README.md b/crawler/README.md index fe4f211..679b064 100644 --- a/crawler/README.md +++ b/crawler/README.md @@ -61,7 +61,7 @@ npm start ## ⛏️ Built Using -- [MYSQL](https://www.mysql.com/) - Database +- [MYSQL](https://www.mongodb.com/) - Database - [NodeJs](https://nodejs.org/en/) - Server Environment - [StellarSDK](https://github.com/stellar/js-stellar-sdk) - BlockchainTool ## ✍️ Authors From dd7f06ec921a7ceb5a0158ec80458d1ec13cbe6e Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 18 Jan 2026 23:17:12 +0800 Subject: [PATCH 5/9] Create README.md --- .github/workflows/README.md | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 .github/workflows/README.md diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 0000000..cc583f4 --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,83 @@ +# Crawler + +A web crawler for collecting and processing data from specified sources. + +## Table of Contents + +- [Installation](#installation) +- [Configuration](#configuration) +- [Usage](#usage) +- [Database Setup](#database-setup) +- [Contributing](#contributing) + +## Installation + +Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +Ensure you have Python 3.8+ installed on your system. + +## Configuration + +### Environment Variables + +Create a `.env` file in the project root with the following variables: + +``` +DATABASE_HOST=localhost +DATABASE_USER=crawler_user +DATABASE_PASSWORD=your_password +DATABASE_NAME=crawler_db +``` + +Update these values according to your local environment. + +## Usage + +Run the crawler with: + +```bash +python crawler.py +``` + +Optional flags: +- `--verbose`: Enable detailed logging output +- `--limit N`: Limit crawling to N pages +- `--timeout S`: Set request timeout to S seconds + +## Database Setup + +### MySQL Configuration + +The crawler uses MySQL to store collected data. Follow these steps to set up your database: + +1. **Install MySQL**: Download and install from [MySQL Official Website](https://dev.mysql.com/downloads/mysql/) + +2. **Create Database and User**: + ```sql + CREATE DATABASE crawler_db; + CREATE USER 'crawler_user'@'localhost' IDENTIFIED BY 'your_password'; + GRANT ALL PRIVILEGES ON crawler_db.* TO 'crawler_user'@'localhost'; + FLUSH PRIVILEGES; + ``` + +3. **Initialize Tables**: Run the database migration script: + ```bash + python scripts/init_db.py + ``` + +### Connection Details + +- **Host**: localhost (default) +- **Port**: 3306 (default MySQL port) +- **User**: crawler_user +- **Database**: crawler_db + +Update the connection parameters in your `.env` file if using different settings. + +## Contributing + +Please read CONTRIBUTING.md for details on our code of conduct and the process for submitting pull requests. \ No newline at end of file From 453469bb3a53caccd68d01e9fbf2645eb0337fee Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 18 Jan 2026 23:24:43 +0800 Subject: [PATCH 6/9] Create CONTRIBUTING.md Reporting Bugs When reporting bugs, please include: Description: What you were trying to do Expected behavior: What should have happened Actual behavior: What actually happened Environment: Python version, OS, MySQL version Steps to reproduce: Clear steps to replicate the issue Error message: Full error traceback if available Screenshots: If applicable --- .github/workflows/CONTRIBUTING.md | 224 ++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 .github/workflows/CONTRIBUTING.md diff --git a/.github/workflows/CONTRIBUTING.md b/.github/workflows/CONTRIBUTING.md new file mode 100644 index 0000000..d652fba --- /dev/null +++ b/.github/workflows/CONTRIBUTING.md @@ -0,0 +1,224 @@ +# Contributing to Crawler + +Thank you for your interest in contributing to the Crawler project! We welcome contributions from everyone. This document provides guidelines and instructions for contributing. + +## Code of Conduct + +We are committed to providing a welcoming and inspiring community for all. Please be respectful and constructive in all interactions. Harassment, discrimination, or disruptive behavior will not be tolerated. + +## How to Contribute + +There are many ways to contribute to this project: + +- **Report bugs** by opening an issue with detailed information +- **Suggest features** with clear use cases and expected behavior +- **Improve documentation** by fixing typos or clarifying confusing sections +- **Submit code changes** by creating pull requests with meaningful improvements +- **Review pull requests** and provide constructive feedback to other contributors + +## Getting Started + +### Prerequisites + +- Python 3.8 or higher +- Git +- A MySQL database for testing (optional but recommended) +- A code editor or IDE of your choice + +### Setting Up Your Development Environment + +1. Fork the repository on GitHub +2. Clone your fork locally: + ```bash + git clone https://github.com/your-username/crawler.git + cd crawler + ``` +3. Create a virtual environment: + ```bash + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` +4. Install development dependencies: + ```bash + pip install -r requirements-dev.txt + ``` +5. Create a local `.env` file for testing: + ```bash + cp .env.example .env + ``` + +## Making Changes + +### Branch Naming + +Create a descriptive branch name for your changes: +- `feature/add-proxy-support` +- `bugfix/fix-mysql-connection-timeout` +- `docs/improve-readme` +- `test/add-crawler-tests` + +```bash +git checkout -b feature/your-feature-name +``` + +### Code Style + +Follow these guidelines to maintain consistent code quality: + +- Use PEP 8 style guide for Python code +- Keep lines under 100 characters when possible +- Use meaningful variable and function names +- Add docstrings to functions and classes +- Use type hints where applicable + +Example: +```python +def fetch_url(url: str, timeout: int = 10) -> str: + """ + Fetch content from a given URL. + + Args: + url: The URL to fetch + timeout: Request timeout in seconds (default: 10) + + Returns: + The HTML content of the page + + Raises: + requests.exceptions.RequestException: If the request fails + """ + response = requests.get(url, timeout=timeout) + response.raise_for_status() + return response.text +``` + +### Testing + +Before submitting a pull request, ensure your code passes all tests: + +```bash +# Run all tests +pytest + +# Run tests with coverage +pytest --cov=crawler + +# Run specific test file +pytest tests/test_crawler.py +``` + +Write tests for new features: +```python +def test_fetch_url_success(): + """Test that fetch_url returns content for valid URLs.""" + result = fetch_url("https://example.com") + assert result is not None + assert len(result) > 0 +``` + +### Commits + +Write clear, descriptive commit messages: + +```bash +# Good +git commit -m "Add proxy support to crawler + +- Add ProxyManager class to handle proxy rotation +- Update fetch_url to accept proxy configuration +- Add tests for proxy connection handling" + +# Avoid +git commit -m "fix stuff" +git commit -m "changes" +``` + +## Submitting Changes + +### Pull Request Process + +1. Ensure all tests pass and code is formatted correctly +2. Push your branch to your fork: + ```bash + git push origin feature/your-feature-name + ``` +3. Open a pull request on GitHub with: + - A clear title describing the change + - A detailed description of what was changed and why + - Reference to any related issues (e.g., "Fixes #123") + - Screenshots or examples if applicable +4. Address review comments and make requested changes +5. Ensure the CI/CD pipeline passes +6. Once approved, your PR will be merged + +### Pull Request Template + +```markdown +## Description +Brief explanation of what this PR does. + +## Changes Made +- Change 1 +- Change 2 +- Change 3 + +## Related Issues +Fixes #123 + +## Testing +Describe how you tested these changes. + +## Checklist +- [ ] Code follows style guidelines +- [ ] Tests pass locally +- [ ] Documentation is updated +- [ ] No breaking changes (or documented in PR) +``` + +## Reporting Bugs + +When reporting bugs, please include: + +- **Description**: What you were trying to do +- **Expected behavior**: What should have happened +- **Actual behavior**: What actually happened +- **Environment**: Python version, OS, MySQL version +- **Steps to reproduce**: Clear steps to replicate the issue +- **Error message**: Full error traceback if available +- **Screenshots**: If applicable + +Example: +``` +Title: Crawler fails with timeout on large datasets + +Description: When crawling more than 10,000 pages, the crawler +consistently times out. + +Steps to reproduce: +1. Configure crawler with 15,000 pages +2. Run `python crawler.py` +3. After ~8,000 pages, connection fails + +Expected: Crawler should complete all 15,000 pages +Actual: Crawler crashes with timeout error + +Environment: Python 3.9, Ubuntu 20.04, MySQL 8.0 +``` + +## Suggesting Features + +When suggesting features, explain: + +- **Use case**: Why this feature is needed +- **Expected behavior**: How it should work +- **Alternative approaches**: Other possible implementations +- **Impact**: How it affects existing functionality + +## Documentation + +Help improve documentation by: + +- Fixing typos and grammatical errors +- Adding missing sections or examples +- Clarifying confusing explanations +- Adding inline code comments for complex logic From ea3c366a1d4168be3114f98ffa8948f26f2c96d7 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Mon, 19 Jan 2026 01:37:41 +0800 Subject: [PATCH 7/9] Create node.yml --- .github/workflows/node.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/node.yml diff --git a/.github/workflows/node.yml b/.github/workflows/node.yml new file mode 100644 index 0000000..4016102 --- /dev/null +++ b/.github/workflows/node.yml @@ -0,0 +1,37 @@ +{{ env.NODE_VERSION }} + cache: 'npm' + + - name: npm install, build, and test + run: | + npm install + npm run build --if-present + npm run test --if-present + + - name: Upload artifact for deployment job + uses: actions/upload-artifact@v4 + with: + name: node-app + path: . + + deploy: + permissions: + contents: none + runs-on: ubuntu-latest + needs: build + environment: + name: 'Development' + url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v4 + with: + name: node-app + + - name: 'Deploy to Azure WebApp' + id: deploy-to-webapp + uses: azure/webapps-deploy@v2 + with: + app-name: ${{ env.AZURE_WEBAPP_NAME }} + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE }} + package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }} \ No newline at end of file From c81bf0c378c8dbb9b891edd4fcefad1690fdf2db Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Mon, 19 Jan 2026 08:02:46 +0800 Subject: [PATCH 8/9] Create CONTRIBUTING.md --- crawler/CONTRIBUTING.md | 224 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 crawler/CONTRIBUTING.md diff --git a/crawler/CONTRIBUTING.md b/crawler/CONTRIBUTING.md new file mode 100644 index 0000000..22f3b8c --- /dev/null +++ b/crawler/CONTRIBUTING.md @@ -0,0 +1,224 @@ +# Contributing to Crawler + +Thank you for your interest in contributing to the Crawler project! We welcome contributions from everyone. This document provides guidelines and instructions for contributing. + +## Code of Conduct + +We are committed to providing a welcoming and inspiring community for all. Please be respectful and constructive in all interactions. Harassment, discrimination, or disruptive behavior will not be tolerated. + +## How to Contribute + +There are many ways to contribute to this project: + +- **Report bugs** by opening an issue with detailed information +- **Suggest features** with clear use cases and expected behavior +- **Improve documentation** by fixing typos or clarifying confusing sections +- **Submit code changes** by creating pull requests with meaningful improvements +- **Review pull requests** and provide constructive feedback to other contributors + +## Getting Started + +### Prerequisites + +- Python 3.8 or higher +- Git +- A MySQL database for testing (optional but recommended) +- A code editor or IDE of your choice + +### Setting Up Your Development Environment + +1. Fork the repository on GitHub +2. Clone your fork locally: + ```bash + git clone https://github.com/your-username/crawler.git + cd crawler + ``` +3. Create a virtual environment: + ```bash + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` +4. Install development dependencies: + ```bash + pip install -r requirements-dev.txt + ``` +5. Create a local `.env` file for testing: + ```bash + cp .env.example .env + ``` + +## Making Changes + +### Branch Naming + +Create a descriptive branch name for your changes: +- `feature/add-proxy-support` +- `bugfix/fix-mysql-connection-timeout` +- `docs/improve-readme` +- `test/add-crawler-tests` + +```bash +git checkout -b feature/your-feature-name +``` + +### Code Style + +Follow these guidelines to maintain consistent code quality: + +- Use PEP 8 style guide for Python code +- Keep lines under 100 characters when possible +- Use meaningful variable and function names +- Add docstrings to functions and classes +- Use type hints where applicable + +Example: +```python +def fetch_url(url: str, timeout: int = 10) -> str: + """ + Fetch content from a given URL. + + Args: + url: The URL to fetch + timeout: Request timeout in seconds (default: 10) + + Returns: + The HTML content of the page + + Raises: + requests.exceptions.RequestException: If the request fails + """ + response = requests.get(url, timeout=timeout) + response.raise_for_status() + return response.text +``` + +### Testing + +Before submitting a pull request, ensure your code passes all tests: + +```bash +# Run all tests +pytest + +# Run tests with coverage +pytest --cov=crawler + +# Run specific test file +pytest tests/test_crawler.py +``` + +Write tests for new features: +```python +def test_fetch_url_success(): + """Test that fetch_url returns content for valid URLs.""" + result = fetch_url("https://example.com") + assert result is not None + assert len(result) > 0 +``` + +### Commits + +Write clear, descriptive commit messages: + +```bash +# Good +git commit -m "Add proxy support to crawler + +- Add ProxyManager class to handle proxy rotation +- Update fetch_url to accept proxy configuration +- Add tests for proxy connection handling" + +# Avoid +git commit -m "fix stuff" +git commit -m "changes" +``` + +## Submitting Changes + +### Pull Request Process + +1. Ensure all tests pass and code is formatted correctly +2. Push your branch to your fork: + ```bash + git push origin feature/your-feature-name + ``` +3. Open a pull request on GitHub with: + - A clear title describing the change + - A detailed description of what was changed and why + - Reference to any related issues (e.g., "Fixes #123") + - Screenshots or examples if applicable +4. Address review comments and make requested changes +5. Ensure the CI/CD pipeline passes +6. Once approved, your PR will be merged + +### Pull Request Template + +```markdown +## Description +Brief explanation of what this PR does. + +## Changes Made +- Change 1 +- Change 2 +- Change 3 + +## Related Issues +Fixes #123 + +## Testing +Describe how you tested these changes. + +## Checklist +- [ ] Code follows style guidelines +- [ ] Tests pass locally +- [ ] Documentation is updated +- [ ] No breaking changes (or documented in PR) +``` + +## Reporting Bugs + +When reporting bugs, please include: + +- **Description**: What you were trying to do +- **Expected behavior**: What should have happened +- **Actual behavior**: What actually happened +- **Environment**: Python version, OS, MySQL version +- **Steps to reproduce**: Clear steps to replicate the issue +- **Error message**: Full error traceback if available +- **Screenshots**: If applicable + +Example: +``` +Title: Crawler fails with timeout on large datasets + +Description: When crawling more than 10,000 pages, the crawler +consistently times out. + +Steps to reproduce: +1. Configure crawler with 15,000 pages +2. Run `python crawler.py` +3. After ~8,000 pages, connection fails + +Expected: Crawler should complete all 15,000 pages +Actual: Crawler crashes with timeout error + +Environment: Python 3.9, Ubuntu 20.04, MySQL 8.0 +``` + +## Suggesting Features + +When suggesting features, explain: + +- **Use case**: Why this feature is needed +- **Expected behavior**: How it should work +- **Alternative approaches**: Other possible implementations +- **Impact**: How it affects existing functionality + +## Documentation + +Help improve documentation by: + +- Fixing typos and grammatical errors +- Adding missing sections or examples +- Clarifying confusing explanations +- Adding inline code comments for complex logic \ No newline at end of file From bc3596690d1489f8520e06b76c342d21e77d87e6 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Mon, 19 Jan 2026 10:08:37 +0800 Subject: [PATCH 9/9] Create LICENSE --- crawler/LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 crawler/LICENSE diff --git a/crawler/LICENSE b/crawler/LICENSE new file mode 100644 index 0000000..bc7a6e5 --- /dev/null +++ b/crawler/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Crawler Project Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file