Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added Hackathon_model.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Idea Submission & Presentation 1-0-1.pdf
Binary file not shown.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ksp-submission
This repository is created for Karnataka State Police Hackathon 2023 - submission collection.
## Team Information
### Team Name -
### Problem Statement -
### Team Name - EData2
### Problem Statement - Solution for Crowd Sourcing of Records
11 changes: 11 additions & 0 deletions captcha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from PIL import Image
import pytesseract
import cv2
import os


def get_captcha(imag_path):

text = pytesseract.image_to_string(Image.open(imag_path))
# os.remove(imag_path)
return text.replace("\n\f","").strip()
Binary file added example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
86 changes: 86 additions & 0 deletions fb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import requests

# Replace ACCESS_TOKEN with your Facebook access token
ACCESS_TOKEN = 'EAATQZBQ0Xw2UBADgE1rEbVplG4jhzxqrgGktMnq5PSJSjUicx28h7K1foHcRoqI3H7Wnu7eUg6ZCELPfvZBMbodD5QTXtKKzLUzAe75H9vboyb2jZAv1UUrB1GxqW7UQTsogm31zKWiIEimYOMoXEiyOJgAxupXl8u6PFoXE7kkBRD4Ou7NtGvXBSGcv3ugAuUKvZBW45bghrYd4ZARpYw1pkZC4JWOKZCxdHtuSodWohHKtdsPEIiYFVli5B1fSHgwZD'

# # Define the endpoint you want to access
endpoint = 'https://graph.facebook.com/v9.0/me?fields=id,name&access_token=' + ACCESS_TOKEN

# Make a GET request to the endpoint
response = requests.get(endpoint)

# Check if the request was successful
if response.status_code == 200:
# If successful, extract the data from the response
data = response.json()
print(data)
else:
# If the request failed, print an error message
print('Request failed with status code:', response.status_code)



# Replace USER_ID with the Facebook ID of the user you want to retrieve data for
user_id = data["id"]
user_id = '100086603361203'

# Define the endpoint URL
url = f"https://graph.facebook.com/v12.0/{user_id}?fields=name,picture&access_token={ACCESS_TOKEN}"

# Send a GET request to the endpoint
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
# Parse the response JSON
user_data = response.json()

# Extract the name and picture URL from the response
name = user_data["name"]
picture_url = user_data["picture"]["data"]["url"]

# Print the name and picture URL
print("Name:", name)
print("Picture URL:", picture_url)
else:
# If the request was unsuccessful, print the error message
print("Failed to retrieve data. Response code:", response.status_code)

# import requests

# # Replace ACCESS_TOKEN with your Facebook access token
# ACCESS_TOKEN = 'your-access-token'

# Replace EMAIL with the email address of the Facebook user you want to search for
# EMAIL = 'rahul.v@elintdata.com'

# # Define the endpoint for the Facebook Graph API
# endpoint = f'https://graph.facebook.com/v9.0/search?q={EMAIL}&type=user&fields=id,name&access_token={ACCESS_TOKEN}'

# # Make a GET request to the endpoint
# response = requests.get(endpoint)
# print(response)
# # Check if the request was successful
# if response.status_code == 200:
# # If successful, extract the data from the response
# data = response.json()
# users = data['data']
# if len(users) > 0:
# user = users[0]
# user_id = user['id']
# user_name = user['name']
# print('User ID:', user_id)
# print('User Name:', user_name)
# else:
# print('No user found with that email address')
# else:
# # If the request failed, print an error message
# print('Request failed with status code:', response.status_code)
# if response.status_code == 400:
# # If so, extract the error information from the response
# error = response.json()
# error_message = error['error']['message']
# error_code = error['error']['code']
# print('Error Message:', error_message)
# print('Error Code:', error_code)

Binary file added final_ppt.pptx
Binary file not shown.
33 changes: 33 additions & 0 deletions get_fbid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import requests

# Replace ACCESS_TOKEN with your access token
ACCESS_TOKEN = 'EAATQZBQ0Xw2UBADgE1rEbVplG4jhzxqrgGktMnq5PSJSjUicx28h7K1foHcRoqI3H7Wnu7eUg6ZCELPfvZBMbodD5QTXtKKzLUzAe75H9vboyb2jZAv1UUrB1GxqW7UQTsogm31zKWiIEimYOMoXEiyOJgAxupXl8u6PFoXE7kkBRD4Ou7NtGvXBSGcv3ugAuUKvZBW45bghrYd4ZARpYw1pkZC4JWOKZCxdHtuSodWohHKtdsPEIiYFVli5B1fSHgwZD'

# Replace EMAIL_ADDRESS with the email address of the user you want to retrieve the Facebook ID for
email_address = "rahul.v@elintdata.com"

# Define the endpoint URL
url = f"https://graph.facebook.com/search?q={email_address}&type=user&access_token={ACCESS_TOKEN}"

# Send a GET request to the endpoint
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
# Parse the response JSON
search_results = response.json()["data"]

# Loop through the search results to find the user with the matching email address
for user in search_results:
if user["email"] == email_address:
# If the email address matches, print the Facebook ID
print("Facebook ID:", user["id"])
break
else:
# If the request was unsuccessful, print the error message
print("Failed to retrieve data. Response code:", response.status_code)
error = response.json()
error_message = error['error']['message']
error_code = error['error']['code']
print('Error Message:', error_message)
print('Error Code:', error_code)
21 changes: 21 additions & 0 deletions get_token.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import requests

# Replace APP_ID and APP_SECRET with your Facebook app ID and app secret
APP_ID = '586953412840539'
APP_SECRET = 'ca66efcb3b4f8f22b43803e45a51b86f'

# Define the endpoint for getting an access token
endpoint = f'https://graph.facebook.com/v9.0/oauth/access_token?client_id={APP_ID}&client_secret={APP_SECRET}&grant_type=client_credentials'

# Make a GET request to the endpoint
response = requests.get(endpoint)

# Check if the request was successful
if response.status_code == 200:
# If successful, extract the access token from the response
data = response.json()
access_token = data['access_token']
print('Access Token:', access_token)
else:
# If the request failed, print an error message
print('Request failed with status code:', response.status_code)
23 changes: 23 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
appdirs==1.4.4
certifi==2022.9.24
click==8.1.3
Flask==2.2.2
Flask-Cors==3.0.10
importlib-metadata==5.0.0
itsdangerous==2.1.2
Jinja2==3.1.2
MarkupSafe==2.1.1
numpy==1.23.4
opencv-python==4.6.0.66
packaging==21.3
Pillow==9.2.0
pyee==8.2.2
pyparsing==3.0.9
pyppeteer==1.0.2
pytesseract==0.3.10
six==1.16.0
tqdm==4.64.1
urllib3==1.26.12
websockets==10.3
Werkzeug==2.2.2
zipp==3.10.0
85 changes: 85 additions & 0 deletions scrap_insta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import asyncio
from pyppeteer import launch
import time
import json
import os
from captcha import get_captcha

async def get_text(page, selector_path):
element = await page.querySelector(selector_path)
query = '(element) => element.innerText.trim()'
temp = await page.evaluate(query, element)
return temp


async def main_scrap(email):
browser = await launch({"headless": False})
page = await browser.newPage()
await page.goto(url)

# # take screenshot of captcha
# time.sleep(2)

#type captcha
time.sleep(2)
# search_box = await page.querySelector('#layers > div:nth-child(2) > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1dqxon3 > div > div.css-1dbjc4n.r-mk0yit.r-1f1sjgu > label > div > div.css-1dbjc4n.r-18u37iz.r-16y2uox.r-1wbh5a2.r-1wzrnnt.r-1udh08x.r-xd6kpl.r-1pn2ns4.r-ttdzmv')
search_box = await page.querySelector('#mount_0_0_gs > div > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > section > main > div._ac06.x78zum5.xdt5ytf > div > div > div > div > div:nth-child(4) > form')
# print(search_box)
await search_box.type(email)

# press enter button

time.sleep(2)
# await page.click("#layers > div:nth-child(2) > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div.css-1dbjc4n.r-1isdzm1 > div > div > div > div > div")

await page.click('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(6)')
# page.keypad('Enter')
# time.sleep(10)
login_option = await page.querySelector('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(1)')
time.sleep(2)
# print("login option:", login_option)

captcha_element = await page.querySelector(
'#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(1)'
)
await captcha_element.screenshot({'path': 'example.png'})


#get captcha from image using pytesseract
captcha = get_captcha('example.png')

print("captcha:", captcha)

if "Sign in to Twitter" in captcha:
print("we did not found an twitter account with this id")
else:
print("we found a twitter account with this id")


await browser.close()
return captcha


def scrape_data(email):
data = asyncio.new_event_loop().run_until_complete(main_scrap(email))

try:
os.makedirs("output_data")
except FileExistsError:
# directory already exists
pass
with open('output_data/{}.json'.format(name), 'w') as f:
json.dump(data, f)

return data


if __name__ == '__main__':
url = "https://www.instagram.com/accounts/password/reset/"
name = "sumit jha"
id = "gopal@elintdata.com"
id = "rahulverma.upe@gmail.com"
id = "gopal.kgpian@gmail.com"
id = "6206609503"

data = scrape_data(id)
85 changes: 85 additions & 0 deletions scrap_twitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import asyncio
from pyppeteer import launch
import time
import json
import os
from captcha import get_captcha

async def get_text(page, selector_path):
element = await page.querySelector(selector_path)
query = '(element) => element.innerText.trim()'
temp = await page.evaluate(query, element)
return temp


async def main_scrap(email):
browser = await launch({"headless": False})
page = await browser.newPage()
await page.goto(url)

# # take screenshot of captcha
# time.sleep(2)

#type captcha
time.sleep(5)
# search_box = await page.querySelector('#layers > div:nth-child(2) > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1dqxon3 > div > div.css-1dbjc4n.r-mk0yit.r-1f1sjgu > label > div > div.css-1dbjc4n.r-18u37iz.r-16y2uox.r-1wbh5a2.r-1wzrnnt.r-1udh08x.r-xd6kpl.r-1pn2ns4.r-ttdzmv')
search_box = await page.querySelector('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div.css-1dbjc4n.r-mk0yit.r-1f1sjgu.r-13qz1uu > label > div > div.css-1dbjc4n.r-18u37iz.r-16y2uox.r-1wbh5a2.r-1wzrnnt.r-1udh08x.r-xd6kpl.r-1pn2ns4.r-ttdzmv > div > input')
# print(search_box)
await search_box.type(email)

# press enter button

time.sleep(5)
# await page.click("#layers > div:nth-child(2) > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div.css-1dbjc4n.r-1isdzm1 > div > div > div > div > div")

await page.click('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(6)')
# page.keypad('Enter')
# time.sleep(10)
login_option = await page.querySelector('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(1)')
time.sleep(5)
# print("login option:", login_option)

captcha_element = await page.querySelector(
'#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(1)'
)
await captcha_element.screenshot({'path': 'example.png'})


#get captcha from image using pytesseract
captcha = get_captcha('example.png')

print("captcha:", captcha)

if "Sign in to Twitter" in captcha:
print("we did not found an twitter account with this id")
else:
print("we found a twitter account with this id")


await browser.close()
return captcha


def scrape_data(email):
data = asyncio.new_event_loop().run_until_complete(main_scrap(email))

try:
os.makedirs("output_data")
except FileExistsError:
# directory already exists
pass
with open('output_data/{}.json'.format(name), 'w') as f:
json.dump(data, f)

return data


if __name__ == '__main__':
url = "https://twitter.com/i/flow/login"
name = "sumit jha"
id = "gopal@elintdata.com"
id = "rahulverma.upe@gmail.com"
id = "gopal.kgpian@gmail.com"
id = "9845107111"

data = scrape_data(id)