hack2skill · rahulelint · Feb 5, 2023
diff --git a/Hackathon_model.png b/Hackathon_model.png
diff --git a/Idea Submission & Presentation 1-0-1.pdf b/Idea Submission & Presentation 1-0-1.pdf
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 # ksp-submission
 This repository is created for Karnataka State Police Hackathon 2023 - submission collection. 
 ## Team Information
-### Team Name -
-### Problem Statement -
+### Team Name - EData2
+### Problem Statement - Solution for Crowd Sourcing of Records
diff --git a/captcha.py b/captcha.py
@@ -0,0 +1,11 @@
+from PIL import Image
+import pytesseract
+import cv2
+import os
+
+
+def get_captcha(imag_path):
+
+    text = pytesseract.image_to_string(Image.open(imag_path))
+    # os.remove(imag_path)
+    return text.replace("\n\f","").strip()
diff --git a/example.png b/example.png
diff --git a/fb.py b/fb.py
@@ -0,0 +1,86 @@
+import requests
+
+# Replace ACCESS_TOKEN with your Facebook access token
+ACCESS_TOKEN = 'EAATQZBQ0Xw2UBADgE1rEbVplG4jhzxqrgGktMnq5PSJSjUicx28h7K1foHcRoqI3H7Wnu7eUg6ZCELPfvZBMbodD5QTXtKKzLUzAe75H9vboyb2jZAv1UUrB1GxqW7UQTsogm31zKWiIEimYOMoXEiyOJgAxupXl8u6PFoXE7kkBRD4Ou7NtGvXBSGcv3ugAuUKvZBW45bghrYd4ZARpYw1pkZC4JWOKZCxdHtuSodWohHKtdsPEIiYFVli5B1fSHgwZD'
+
+# # Define the endpoint you want to access
+endpoint = 'https://graph.facebook.com/v9.0/me?fields=id,name&access_token=' + ACCESS_TOKEN
+
+# Make a GET request to the endpoint
+response = requests.get(endpoint)
+
+# Check if the request was successful
+if response.status_code == 200:
+    # If successful, extract the data from the response
+    data = response.json()
+    print(data)
+else:
+    # If the request failed, print an error message
+    print('Request failed with status code:', response.status_code)
+
+
+
+# Replace USER_ID with the Facebook ID of the user you want to retrieve data for
+user_id = data["id"]
+user_id = '100086603361203'
+
+# Define the endpoint URL
+url = f"https://graph.facebook.com/v12.0/{user_id}?fields=name,picture&access_token={ACCESS_TOKEN}"
+
+# Send a GET request to the endpoint
+response = requests.get(url)
+
+# Check if the request was successful
+if response.status_code == 200:
+    # Parse the response JSON
+    user_data = response.json()
+
+    # Extract the name and picture URL from the response
+    name = user_data["name"]
+    picture_url = user_data["picture"]["data"]["url"]
+
+    # Print the name and picture URL
+    print("Name:", name)
+    print("Picture URL:", picture_url)
+else:
+    # If the request was unsuccessful, print the error message
+    print("Failed to retrieve data. Response code:", response.status_code)
+
+# import requests
+
+# # Replace ACCESS_TOKEN with your Facebook access token
+# ACCESS_TOKEN = 'your-access-token'
+
+# Replace EMAIL with the email address of the Facebook user you want to search for
+# EMAIL = 'rahul.v@elintdata.com'
+
+# # Define the endpoint for the Facebook Graph API
+# endpoint = f'https://graph.facebook.com/v9.0/search?q={EMAIL}&type=user&fields=id,name&access_token={ACCESS_TOKEN}'
+
+# # Make a GET request to the endpoint
+# response = requests.get(endpoint)
+# print(response)
+# # Check if the request was successful
+# if response.status_code == 200:
+#     # If successful, extract the data from the response
+#     data = response.json()
+#     users = data['data']
+#     if len(users) > 0:
+#         user = users[0]
+#         user_id = user['id']
+#         user_name = user['name']
+#         print('User ID:', user_id)
+#         print('User Name:', user_name)
+#     else:
+#         print('No user found with that email address')
+# else:
+#     # If the request failed, print an error message
+#     print('Request failed with status code:', response.status_code)
+#     if response.status_code == 400:
+#         # If so, extract the error information from the response
+#         error = response.json()
+#         error_message = error['error']['message']
+#         error_code = error['error']['code']
+#         print('Error Message:', error_message)
+#         print('Error Code:', error_code)
+
diff --git a/final_ppt.pptx b/final_ppt.pptx
diff --git a/get_fbid.py b/get_fbid.py
@@ -0,0 +1,33 @@
+import requests
+
+# Replace ACCESS_TOKEN with your access token
+ACCESS_TOKEN = 'EAATQZBQ0Xw2UBADgE1rEbVplG4jhzxqrgGktMnq5PSJSjUicx28h7K1foHcRoqI3H7Wnu7eUg6ZCELPfvZBMbodD5QTXtKKzLUzAe75H9vboyb2jZAv1UUrB1GxqW7UQTsogm31zKWiIEimYOMoXEiyOJgAxupXl8u6PFoXE7kkBRD4Ou7NtGvXBSGcv3ugAuUKvZBW45bghrYd4ZARpYw1pkZC4JWOKZCxdHtuSodWohHKtdsPEIiYFVli5B1fSHgwZD'
+
+# Replace EMAIL_ADDRESS with the email address of the user you want to retrieve the Facebook ID for
+email_address = "rahul.v@elintdata.com"
+
+# Define the endpoint URL
+url = f"https://graph.facebook.com/search?q={email_address}&type=user&access_token={ACCESS_TOKEN}"
+
+# Send a GET request to the endpoint
+response = requests.get(url)
+
+# Check if the request was successful
+if response.status_code == 200:
+    # Parse the response JSON
+    search_results = response.json()["data"]
+
+    # Loop through the search results to find the user with the matching email address
+    for user in search_results:
+        if user["email"] == email_address:
+            # If the email address matches, print the Facebook ID
+            print("Facebook ID:", user["id"])
+            break
+else:
+    # If the request was unsuccessful, print the error message
+    print("Failed to retrieve data. Response code:", response.status_code)
+    error = response.json()
+    error_message = error['error']['message']
+    error_code = error['error']['code']
+    print('Error Message:', error_message)
+    print('Error Code:', error_code)
diff --git a/get_token.py b/get_token.py
@@ -0,0 +1,21 @@
+import requests
+
+# Replace APP_ID and APP_SECRET with your Facebook app ID and app secret
+APP_ID = '586953412840539'
+APP_SECRET = 'ca66efcb3b4f8f22b43803e45a51b86f'
+
+# Define the endpoint for getting an access token
+endpoint = f'https://graph.facebook.com/v9.0/oauth/access_token?client_id={APP_ID}&client_secret={APP_SECRET}&grant_type=client_credentials'
+
+# Make a GET request to the endpoint
+response = requests.get(endpoint)
+
+# Check if the request was successful
+if response.status_code == 200:
+    # If successful, extract the access token from the response
+    data = response.json()
+    access_token = data['access_token']
+    print('Access Token:', access_token)
+else:
+    # If the request failed, print an error message
+    print('Request failed with status code:', response.status_code)
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,23 @@
+appdirs==1.4.4
+certifi==2022.9.24
+click==8.1.3
+Flask==2.2.2
+Flask-Cors==3.0.10
+importlib-metadata==5.0.0
+itsdangerous==2.1.2
+Jinja2==3.1.2
+MarkupSafe==2.1.1
+numpy==1.23.4
+opencv-python==4.6.0.66
+packaging==21.3
+Pillow==9.2.0
+pyee==8.2.2
+pyparsing==3.0.9
+pyppeteer==1.0.2
+pytesseract==0.3.10
+six==1.16.0
+tqdm==4.64.1
+urllib3==1.26.12
+websockets==10.3
+Werkzeug==2.2.2
+zipp==3.10.0
diff --git a/scrap_insta.py b/scrap_insta.py
@@ -0,0 +1,85 @@
+import asyncio
+from pyppeteer import launch
+import time
+import json
+import os
+from captcha import get_captcha
+
+async def get_text(page, selector_path):
+    element = await page.querySelector(selector_path)
+    query = '(element) => element.innerText.trim()'
+    temp = await page.evaluate(query, element)
+    return temp
+
+
+async def main_scrap(email):
+    browser = await launch({"headless": False})
+    page = await browser.newPage()
+    await page.goto(url)
+
+    # # take screenshot of captcha
+    # time.sleep(2)
+
+    #type captcha
+    time.sleep(2)
+    # search_box = await page.querySelector('#layers > div:nth-child(2) > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1dqxon3 > div > div.css-1dbjc4n.r-mk0yit.r-1f1sjgu > label > div > div.css-1dbjc4n.r-18u37iz.r-16y2uox.r-1wbh5a2.r-1wzrnnt.r-1udh08x.r-xd6kpl.r-1pn2ns4.r-ttdzmv')
+    search_box = await page.querySelector('#mount_0_0_gs > div > div > div > div.x9f619.x1n2onr6.x1ja2u2z > div > div > div > div.x78zum5.xdt5ytf.x10cihs4.x1t2pt76.x1n2onr6.x1ja2u2z > section > main > div._ac06.x78zum5.xdt5ytf > div > div > div > div > div:nth-child(4) > form')
+    # print(search_box)
+    await search_box.type(email)
+
+    # press enter button
+
+    time.sleep(2)
+    # await page.click("#layers > div:nth-child(2) > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div.css-1dbjc4n.r-1isdzm1 > div > div > div > div > div")
+
+    await page.click('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(6)')
+    # page.keypad('Enter')
+    # time.sleep(10)
+    login_option = await page.querySelector('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(1)')
+    time.sleep(2)
+    # print("login option:", login_option)
+
+    captcha_element = await page.querySelector(
+            '#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(1)'
+        )
+    await captcha_element.screenshot({'path': 'example.png'})
+
+
+    #get captcha from image using pytesseract
+    captcha = get_captcha('example.png')
+
+    print("captcha:", captcha)
+
+    if "Sign in to Twitter" in captcha:
+        print("we did not found an twitter account with this id")
+    else:
+        print("we found a twitter account with this id")
+
+
+    await browser.close()
+    return captcha
+
+
+def scrape_data(email):
+    data = asyncio.new_event_loop().run_until_complete(main_scrap(email))
+
+    try:
+        os.makedirs("output_data")
+    except FileExistsError:
+        # directory already exists
+        pass
+    with open('output_data/{}.json'.format(name), 'w') as f:
+        json.dump(data, f)
+
+    return data
+
+
+if __name__ == '__main__':
+    url = "https://www.instagram.com/accounts/password/reset/"
+    name = "sumit jha"
+    id = "gopal@elintdata.com"
+    id = "rahulverma.upe@gmail.com"
+    id = "gopal.kgpian@gmail.com"
+    id = "6206609503"
+
+    data = scrape_data(id)
diff --git a/scrap_twitter.py b/scrap_twitter.py
@@ -0,0 +1,85 @@
+import asyncio
+from pyppeteer import launch
+import time
+import json
+import os
+from captcha import get_captcha
+
+async def get_text(page, selector_path):
+    element = await page.querySelector(selector_path)
+    query = '(element) => element.innerText.trim()'
+    temp = await page.evaluate(query, element)
+    return temp
+
+
+async def main_scrap(email):
+    browser = await launch({"headless": False})
+    page = await browser.newPage()
+    await page.goto(url)
+
+    # # take screenshot of captcha
+    # time.sleep(2)
+
+    #type captcha
+    time.sleep(5)
+    # search_box = await page.querySelector('#layers > div:nth-child(2) > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1dqxon3 > div > div.css-1dbjc4n.r-mk0yit.r-1f1sjgu > label > div > div.css-1dbjc4n.r-18u37iz.r-16y2uox.r-1wbh5a2.r-1wzrnnt.r-1udh08x.r-xd6kpl.r-1pn2ns4.r-ttdzmv')
+    search_box = await page.querySelector('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div.css-1dbjc4n.r-mk0yit.r-1f1sjgu.r-13qz1uu > label > div > div.css-1dbjc4n.r-18u37iz.r-16y2uox.r-1wbh5a2.r-1wzrnnt.r-1udh08x.r-xd6kpl.r-1pn2ns4.r-ttdzmv > div > input')
+    # print(search_box)
+    await search_box.type(email)
+
+    # press enter button
+
+    time.sleep(5)
+    # await page.click("#layers > div:nth-child(2) > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div.css-1dbjc4n.r-1isdzm1 > div > div > div > div > div")
+
+    await page.click('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(6)')
+    # page.keypad('Enter')
+    # time.sleep(10)
+    login_option = await page.querySelector('#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(1)')
+    time.sleep(5)
+    # print("login option:", login_option)
+
+    captcha_element = await page.querySelector(
+            '#layers > div > div > div > div > div > div > div.css-1dbjc4n.r-1awozwy.r-18u37iz.r-1pi2tsx.r-1777fci.r-1xcajam.r-ipm5af.r-g6jmlv > div.css-1dbjc4n.r-1867qdf.r-1wbh5a2.r-kwpbio.r-rsyp9y.r-1pjcn9w.r-1279nm1.r-htvplk.r-1udh08x > div > div > div.css-1dbjc4n.r-14lw9ot.r-6koalj.r-16y2uox.r-1wbh5a2 > div.css-1dbjc4n.r-16y2uox.r-1wbh5a2.r-1jgb5lz.r-1ye8kvj.r-13qz1uu > div > div > div > div:nth-child(1)'
+        )
+    await captcha_element.screenshot({'path': 'example.png'})
+
+
+    #get captcha from image using pytesseract
+    captcha = get_captcha('example.png')
+
+    print("captcha:", captcha)
+
+    if "Sign in to Twitter" in captcha:
+        print("we did not found an twitter account with this id")
+    else:
+        print("we found a twitter account with this id")
+
+
+    await browser.close()
+    return captcha
+
+
+def scrape_data(email):
+    data = asyncio.new_event_loop().run_until_complete(main_scrap(email))
+
+    try:
+        os.makedirs("output_data")
+    except FileExistsError:
+        # directory already exists
+        pass
+    with open('output_data/{}.json'.format(name), 'w') as f:
+        json.dump(data, f)
+
+    return data
+
+
+if __name__ == '__main__':
+    url = "https://twitter.com/i/flow/login"
+    name = "sumit jha"
+    id = "gopal@elintdata.com"
+    id = "rahulverma.upe@gmail.com"
+    id = "gopal.kgpian@gmail.com"
+    id = "9845107111"
+
+    data = scrape_data(id)