diff --git a/ICE/__init__.py b/ICE/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ICE/app.py b/ICE/app.py new file mode 100644 index 00000000..69671f00 --- /dev/null +++ b/ICE/app.py @@ -0,0 +1,74 @@ +from curses.ascii import isdigit +from flask import Flask, make_response, jsonify, send_file +from components.name_component import getDetailsFromName +from components.twitter_component import getDetailsFromTwitterHandle +from utils.pdf import generate_pdf_from_html +from components.phone_number_component import getDetailsFromPhoneNumberForTwilio +from components.phone_number_component import getDetailsFromPhoneNumber +from flask import request +from flask_cors import CORS +from twilio.twiml.messaging_response import MessagingResponse +from flask import Flask,request +from twilio.rest import Client +import json +twilio_client = Client(account_sid, auth_token) + +app = Flask(__name__) +CORS(app) +app.config['CORS_HEADERS'] = 'Content-Type' + +@app.route('/ping') +def hello_name(): + return 'Healthy' + +@app.route('/getDetails') +def get_details(): + channel = str(request.args.get('channel')) + value = str(request.args.get('value')) + + if channel == "truecaller": + data = getDetailsFromPhoneNumber("+91"+value) + return data + elif channel == "twitter": + data = getDetailsFromTwitterHandle(value) + return data + elif channel == "name": + data = getDetailsFromName(value) + return data + + print(value, channel) + + + +@app.route('/reply',methods=["POST"]) +def reply(): + message = request.values.get('Body', None) + resp = MessagingResponse() + if(len(str(message)) == 10): + test = getDetailsFromPhoneNumberForTwilio(message) + print(test) + resp.message(test) + elif(str(message) == "1"): + resp.message("Thank you for replying. Please enter the mobile number of the person you want to search. Please don't include +91 in the number") + else: + resp.message("Welcome to Namma Sherlock.\n Press 1 to search using MobileNumber") + return str(resp) + +@app.route('/generateReport',methods = ['POST']) +def show_static_pdf(): + if request.method == "POST": + # print(f"HI {request.data}") + data = json.loads(request.get_data()) + # print(request.form) + + html_string = data["html_string"] + + status = generate_pdf_from_html(html_string) + if status: + # with open('GfG.pdf', 'rb') as static_file: + return send_file('report.pdf', as_attachment=True) + else: + return {"response":"File not formed"} + +if __name__ == '__main__': + app.run(debug=True) diff --git a/ICE/components/__init__.py b/ICE/components/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ICE/components/__pycache__/__init__.cpython-38.pyc b/ICE/components/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..7e3e3c46 Binary files /dev/null and b/ICE/components/__pycache__/__init__.cpython-38.pyc differ diff --git a/ICE/components/__pycache__/name_component.cpython-38.pyc b/ICE/components/__pycache__/name_component.cpython-38.pyc new file mode 100644 index 00000000..6b1ffc13 Binary files /dev/null and b/ICE/components/__pycache__/name_component.cpython-38.pyc differ diff --git a/ICE/components/__pycache__/phone_number_component.cpython-38.pyc b/ICE/components/__pycache__/phone_number_component.cpython-38.pyc new file mode 100644 index 00000000..5dfc2324 Binary files /dev/null and b/ICE/components/__pycache__/phone_number_component.cpython-38.pyc differ diff --git a/ICE/components/__pycache__/twitter_component.cpython-38.pyc b/ICE/components/__pycache__/twitter_component.cpython-38.pyc new file mode 100644 index 00000000..b4ddc379 Binary files /dev/null and b/ICE/components/__pycache__/twitter_component.cpython-38.pyc differ diff --git a/ICE/components/name_component.py b/ICE/components/name_component.py new file mode 100644 index 00000000..0bdbc55a --- /dev/null +++ b/ICE/components/name_component.py @@ -0,0 +1,362 @@ +from math import factorial +from utils.vpa import get_validity_for_all_vpaIds +from utils.get_entities import extract_entities +from utils.utils import get_url_to_enum_dict +from providers.google import search_image +from utils.get_entities import extract_entity_string +from providers.twitter import get_user_information +from providers.google import search_text +from providers.truecaller import numsearch +import re +import gender_guesser.detector as gender +from urllib.parse import urlparse +import json +import random + +def serialize_sets(obj): + if isinstance(obj, set): + return list(obj) + + return obj + +d = gender.Detector() + +def getDetailsFromName(name: str) -> dict: + ''' + 1. Call the truecaller API to get the data + 2. Call the google search API to get the data + search_text("Raghav Maheshwari", no_of_results=10) + Google: Name + Google: Email + Google: Name + Email + Google: Name + address['city'] + Google: Name + Twitter [omitting enity here as it wil not be apt in most cases] + Google: Name + Facebook + Google: Name + Instagram + Google: Name + Linkedin + 3. Call the Twitter API using usernames from the above result. + getEntityForDescription + search_text("Raghav Maheshwari", no_of_results=10) + Google: Name + entity + Facebook + Google: Name + entity + Instagram + Google: Name + entity + Linkedin + ''' + + google_ts_res_plain = search_text(name, no_of_results=10) + google_ts_res_facebook = search_text("site:facebook.com " + name, no_of_results=5) + google_ts_res_twitter = search_text("site:twitter.com " + name, no_of_results=5) + google_ts_res_instagram = search_text("site:instagram.com " + name, no_of_results=5) + google_ts_res_linkedin = search_text("site:linkedin.com " + name, no_of_results=5) + + twitter_res = list() + filtered_usernames = get_twitter_usernames(google_ts_res_twitter) + for username in filtered_usernames: + twitter_res.append(get_user_information(username)) + + google_ts_res_zabuacorp = search_text(name + " Zaubacorp", no_of_results=5) + google_ts_res_indiakanoon = search_text(name + " IndiaKanoon", no_of_results=5) + + if(len(twitter_res) > 0): + top_tweet_desc = twitter_res[0]['user_description'] + entity_context = extract_entity_string(top_tweet_desc) + google_ts_entity_res_facebook = search_text("site:facebook.com " + name + " " + entity_context, no_of_results=5) + google_ts_entity_res_twitter = search_text("site:twitter.com " + name + " " + entity_context, no_of_results=5) + google_ts_entity_res_instagram = search_text("site:instagram.com " + name + " " +entity_context, no_of_results=5) + google_ts_entity_res_linkedin = search_text("site:linkedin.com " + name + " " + entity_context, no_of_results=5) + google_is_entity_res_facebook = search_image("site:facebook.com " + name + " " + entity_context, no_of_results=5) + google_is_entity_res_twitter = search_image("site:twitter.com " + name + " " + entity_context, no_of_results=5) + google_is_entity_res_instagram = search_image("site:instagram.com " + name + " " +entity_context, no_of_results=5) + google_is_entity_res_linkedin = search_image("site:linkedin.com " + name + " " + entity_context, no_of_results=5) + google_is_res_entity = search_image(name + " " + entity_context, no_of_results=5) + google_is_res_name = search_image(name, no_of_results=5) + + data = { + "google_ts":{ + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin + }, + "google_is": { + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + }, + "twitter": twitter_res, + "name": name + } + + return generate_response(data) + + +def get_twitter_usernames(search_data): + twitter_urls = set() + for data in search_data: + twitter_urls.add((data['url'])) + + return get_usernames_from_urls(twitter_urls) + +def get_usernames_from_urls(twitter_urls): + + result = set() + for twitter_url in twitter_urls: + match = re.search(r'^.*?\btwitter\.com/@?(\w{1,15})(?:[?/,].*)?$',twitter_url) + if match: + result.add(match.group(1)) + return result + + +''' +name: Truecaller Data +gender: by gender guesser if not in truecaller response. +email: Truecaller Data +imageUrls: fetch all images that + start with: https://pbs.twimg.com and https://media.licdn.com, + https://yt3.googleusercontent.com +tagsApplicable: we will run anirudhs function on Top 3 results from + * Google + * Facebook + * Instagram + * Linkedin + * Twitter + * Twitter Header +availableApps: fetchAllUrls and then check if present in the map +primaryAddress: Get it from twitter API and GLE in all the entities etc. +additionalAddress: Get it from twitter API and GLE in all the entities etc. +relatedPeople: + TWITTER : URL's and images + LINKEDIN etc: channel and links. +socialFootprint: + return Top 3 links from each => + twitter, instagram, facebook, linkedin. +''' +''' +{ + "truecaller_res": truecaller_res, + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_email": google_ts_res_email, + "google_ts_res_email_name": google_ts_res_email_name, + "google_ts_res_address": google_ts_res_address, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "twitter_res": twitter_res, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin, + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin +} +''' + +# Add similarity later. +def get_facebook_data(data): + + result = list() + platform_keys = ["google_ts_entity_res_facebook", "google_ts_res_facebook"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_linkedin_data(data): + result = list() + platform_keys = ["google_ts_entity_res_linkedin", "google_ts_res_linkedin"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def get_instagram_data(data): + result = list() + platform_keys = ["google_ts_entity_res_instagram", "google_ts_res_instagram"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_twitter_data(data): + result = list() + twitter_profiles = data["twitter"] + for profile in twitter_profiles: + result.append({'url': "https://twitter.com/"+profile['user_screen_name'], 'confidenceScore': 100, 'profileUrl': profile['user_profile_image']}) + + return result + +def get_related_people(data): + return [ + { + "details": data['twitter'][0]['top_commentors'], + "platform":"TWITTER" + }, + { + "details": {}, + "platform":"LINKEDIN" + }, + { + "details": {}, + "platform":"INSTAGRAM" + }, + { + "details": {}, + "platform":"FACEBOOK" + } + ] + +def get_available_apps(data): + available_apps = [] + url_to_enum_dict = get_url_to_enum_dict() + google_res = data['google_ts'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + base_uri = urlparse('{uri.scheme}://{uri.netloc}/'.format(uri=entry['url'])) + if base_uri in url_to_enum_dict: + available_apps.append(url_to_enum_dict[base_uri]) + return available_apps +def get_applicable_tags(data): + google_ts_res = data['google_ts'] + google_is_res = data['google_is'] + + # ORG, GPE, PERSON + org_tags = list() + gpe_tags = list() + person_tags = list() + + for search in google_ts_res: + search_results = google_ts_res[search] + for result in search_results: + entities = extract_entities(result['title']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + + for search in google_is_res: + search_results = google_is_res[search] + for result in search_results: + entities = extract_entities(result['text']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + return { + 'ORG': json.dumps(set(org_tags),default=serialize_sets), + 'PERSON': json.dumps(set(person_tags),default=serialize_sets), + 'GPE': json.dumps(set(gpe_tags),default=serialize_sets), + 'location': data['twitter'][0]['user_location'] if len(data['twitter']) != 0 else "" + } + + +def get_informational_data(data): + result = list() + platform_keys = ["google_ts_res_zabuacorp", "google_ts_res_indiakanoon"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def generate_response(data): + + tag_related_data = get_applicable_tags(data) + result = {} + result['name'] = data["name"] + result['imageUrls'] = get_image_urls(data) + result['socialFootprint'] = dict() + result['socialFootprint']['twitter'] = get_twitter_data(data) + result['socialFootprint']['facebook'] = get_facebook_data(data) + result['socialFootprint']['linkedin'] = get_linkedin_data(data) + result['informationFootprint'] = get_informational_data(data) + result['primaryAddress'] = tag_related_data['location'] + result['tagsApplicable'] = tag_related_data['PERSON'] + result['additionalAddress'] = tag_related_data['GPE'] + result['relatedPeople'] = get_related_people(data) + return result + +def get_image_urls(data): + images = list() + + twitter_res = data['twitter'] + for res in twitter_res: + images.append(res['user_profile_banner']) + images.append(res['user_profile_image']) + + google_res = data['google_is'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + if entry['url'].startswith("https://pbs.twimg.com") or entry['url'].startswith("https://yt3.googleusercontent.com") or entry['url'].startswith("https://media.licdn.com"): + images.append(entry['url']) + return images diff --git a/ICE/components/phone_number_component.py b/ICE/components/phone_number_component.py new file mode 100644 index 00000000..81d693c7 --- /dev/null +++ b/ICE/components/phone_number_component.py @@ -0,0 +1,442 @@ +from audioop import add +from math import factorial +from utils.vpa import get_validity_for_all_vpaIds +from utils.get_entities import extract_entities +from utils.utils import get_url_to_enum_dict +from utils.utils import check_if_whatsapp_exists +from providers.google import search_image +from utils.get_entities import extract_entity_string +from providers.twitter import get_user_information +from providers.google import search_text +from providers.truecaller import numsearch +import re +import gender_guesser.detector as gender +from urllib.parse import urlparse +import json +import random + +def serialize_sets(obj): + if isinstance(obj, set): + return list(obj) + + return obj + +d = gender.Detector() + +def getDetailsFromPhoneNumber(phoneNumber: str) -> dict: + print(phoneNumber) + ''' + 1. Call the truecaller API to get the data + 2. Call the google search API to get the data + search_text("Raghav Maheshwari", no_of_results=10) + Google: Name + Google: Email + Google: Name + Email + Google: Name + address['city'] + Google: Name + Twitter [omitting enity here as it wil not be apt in most cases] + Google: Name + Facebook + Google: Name + Instagram + Google: Name + Linkedin + 3. Call the Twitter API using usernames from the above result. + getEntityForDescription + search_text("Raghav Maheshwari", no_of_results=10) + Google: Name + entity + Facebook + Google: Name + entity + Instagram + Google: Name + entity + Linkedin + ''' + + truecaller_res = numsearch(phoneNumber) + + address_details = "" + try: + address_details = truecaller_res['address'][0]['address'] + except Exception as e: + address_details = "" + + + google_ts_res_plain = search_text(truecaller_res['name'], no_of_results=10) + google_ts_res_email = search_text(truecaller_res['email'], no_of_results=10) + google_ts_res_email_name = search_text(str.split(truecaller_res['email'], '@')[0], no_of_results=10) + google_ts_res_address = search_text(truecaller_res['name'] + address_details, no_of_results=10) + google_ts_res_facebook = search_text("site:facebook.com " + truecaller_res['name'], no_of_results=5) + google_ts_res_twitter = search_text("site:twitter.com " + truecaller_res['name'], no_of_results=5) + google_ts_res_instagram = search_text("site:instagram.com " + truecaller_res['name'], no_of_results=5) + google_ts_res_linkedin = search_text("site:linkedin.com " + truecaller_res['name'], no_of_results=5) + + twitter_res = list() + filtered_usernames = get_twitter_usernames(google_ts_res_twitter) + for username in filtered_usernames: + twitter_res.append(get_user_information(username)) + + google_ts_res_zabuacorp = search_text(truecaller_res['name'] + " Zaubacorp", no_of_results=5) + google_ts_res_indiakanoon = search_text(truecaller_res['name'] + " IndiaKanoon", no_of_results=5) + + if(len(twitter_res) > 0): + top_tweet_desc = twitter_res[0]['user_description'] + entity_context = extract_entity_string(top_tweet_desc) + google_ts_entity_res_facebook = search_text("site:facebook.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_ts_entity_res_twitter = search_text("site:twitter.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_ts_entity_res_instagram = search_text("site:instagram.com " + truecaller_res['name'] + " " +entity_context, no_of_results=5) + google_ts_entity_res_linkedin = search_text("site:linkedin.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_facebook = search_image("site:facebook.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_twitter = search_image("site:twitter.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_instagram = search_image("site:instagram.com " + truecaller_res['name'] + " " +entity_context, no_of_results=5) + google_is_entity_res_linkedin = search_image("site:linkedin.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_res_entity = search_image(truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_res_name = search_image(truecaller_res['name'], no_of_results=5) + + data = { + "truecaller": truecaller_res, + "google_ts":{ + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_email": google_ts_res_email, + "google_ts_res_email_name": google_ts_res_email_name, + "google_ts_res_address": google_ts_res_address, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin + }, + "google_is": { + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + }, + "twitter": twitter_res, + "phoneNumber": phoneNumber + } + + return generate_response(data) + + +def get_twitter_usernames(search_data): + twitter_urls = set() + for data in search_data: + twitter_urls.add((data['url'])) + + return get_usernames_from_urls(twitter_urls) + +def get_usernames_from_urls(twitter_urls): + + result = set() + for twitter_url in twitter_urls: + match = re.search(r'^.*?\btwitter\.com/@?(\w{1,15})(?:[?/,].*)?$',twitter_url) + if match: + result.add(match.group(1)) + return result + + +''' +name: Truecaller Data +gender: by gender guesser if not in truecaller response. +email: Truecaller Data +imageUrls: fetch all images that + start with: https://pbs.twimg.com and https://media.licdn.com, + https://yt3.googleusercontent.com +tagsApplicable: we will run anirudhs function on Top 3 results from + * Google + * Facebook + * Instagram + * Linkedin + * Twitter + * Twitter Header +availableApps: fetchAllUrls and then check if present in the map +primaryAddress: Get it from twitter API and GLE in all the entities etc. +additionalAddress: Get it from twitter API and GLE in all the entities etc. +relatedPeople: + TWITTER : URL's and images + LINKEDIN etc: channel and links. +socialFootprint: + return Top 3 links from each => + twitter, instagram, facebook, linkedin. +''' +''' +{ + "truecaller_res": truecaller_res, + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_email": google_ts_res_email, + "google_ts_res_email_name": google_ts_res_email_name, + "google_ts_res_address": google_ts_res_address, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "twitter_res": twitter_res, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin, + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin +} +''' + +# Add similarity later. +def get_facebook_data(data): + + result = list() + platform_keys = ["google_ts_entity_res_facebook", "google_ts_res_facebook"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def get_linkedin_data(data): + result = list() + platform_keys = ["google_ts_entity_res_linkedin", "google_ts_res_linkedin"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def get_instagram_data(data): + result = list() + platform_keys = ["google_ts_entity_res_instagram", "google_ts_res_instagram"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def get_twitter_data(data): + result = list() + twitter_profiles = data["twitter"] + for profile in twitter_profiles: + result.append({'url': "https://twitter.com/"+profile['user_screen_name'], 'confidenceScore': 100, 'profileUrl': profile['user_profile_image']}) + + return result + +def get_related_people(data): + return [ + { + "details": data['twitter'][0]['top_commentors'], + "platform":"TWITTER" + }, + { + "details": {}, + "platform":"LINKEDIN" + }, + { + "details": {}, + "platform":"INSTAGRAM" + }, + { + "details": {}, + "platform":"FACEBOOK" + } + ] + +def get_whatsapp_details(data): + phone_number = data["phoneNumber"] + if(check_if_whatsapp_exists(phone_number)): + return { + 'isAvailable': True, + 'link': "https://wa.me/" + phone_number + } + else: + return { + 'isAvailable': False, + 'link': "https://wa.me/" + phone_number + } +def get_available_apps(data): + available_apps = [] + url_to_enum_dict = get_url_to_enum_dict() + google_res = data['google_ts'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + base_uri = urlparse('{uri.scheme}://{uri.netloc}/'.format(uri=entry['url'])) + if base_uri in url_to_enum_dict: + available_apps.append(url_to_enum_dict[base_uri]) + return available_apps +def get_applicable_tags(data): + google_ts_res = data['google_ts'] + google_is_res = data['google_is'] + + # ORG, GPE, PERSON + org_tags = list() + gpe_tags = list() + person_tags = list() + + for search in google_ts_res: + search_results = google_ts_res[search] + for result in search_results: + entities = extract_entities(result['title']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + + for search in google_is_res: + search_results = google_is_res[search] + for result in search_results: + entities = extract_entities(result['text']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + return { + 'ORG': json.dumps(set(org_tags),default=serialize_sets), + 'PERSON': json.dumps(set(person_tags),default=serialize_sets), + 'GPE': json.dumps(set(gpe_tags),default=serialize_sets), + 'location': data['twitter'][0]['user_location'] if len(data['twitter']) != 0 else "" + } + + +def get_informational_data(data): + result = list() + platform_keys = ["google_ts_res_zabuacorp", "google_ts_res_indiakanoon"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def generate_response(data): + + tag_related_data = get_applicable_tags(data) + result = {} + result['name'] = data['truecaller']['name'] + result['gender'] = data['truecaller']['gender'] if data['truecaller']['gender'] != "UNKNOWN" else d.get_gender(u"Bob") + result['email'] = data['truecaller']['email'] + result['imageUrls'] = get_image_urls(data) + result['socialFootprint'] = dict() + result['socialFootprint']['twitter'] = get_twitter_data(data) + result['socialFootprint']['facebook'] = get_facebook_data(data) + result['socialFootprint']['linkedin'] = get_linkedin_data(data) + result['informationFootprint'] = get_informational_data(data) + result['financialFootprint'] = get_validity_for_all_vpaIds(data["phoneNumber"]) + result['whatsappDetails'] = get_whatsapp_details(data) + # result['availableApps'] = get_available_apps(data) + result['primaryAddress'] = tag_related_data['location'] + result['tagsApplicable'] = tag_related_data['PERSON'] + result['additionalAddress'] = tag_related_data['GPE'] + result['relatedPeople'] = get_related_people(data) + return result + +def get_image_urls(data): + images = list() + + twitter_res = data['twitter'] + for res in twitter_res: + images.append(res['user_profile_banner']) + images.append(res['user_profile_image']) + + google_res = data['google_is'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + if entry['url'].startswith("https://pbs.twimg.com") or entry['url'].startswith("https://yt3.googleusercontent.com") or entry['url'].startswith("https://media.licdn.com"): + images.append(entry['url']) + return images + +''' + name + twitter + instagram + facebook + linkedin + Location + carrier + email + address +''' +def getDetailsFromPhoneNumberForTwilio(phoneNumber: str) -> dict: + phoneNumberWith91 = "+91" + phoneNumber + + print("test1") + truecaller_res = numsearch(phoneNumberWith91) + google_ts_res_twitter = search_text("site:twitter.com " + truecaller_res['name'], no_of_results=1) + + filtered_username = get_twitter_usernames(google_ts_res_twitter).pop() + twitter_res = get_user_information(filtered_username) + + google_ts_entity_res_facebook = search_text("site:facebook.com " + truecaller_res['name'] , no_of_results=5) + google_ts_entity_res_linkedin = search_text("site:linkedin.com " + truecaller_res['name'] , no_of_results=5) + print("test4") + vpa_data = get_validity_for_all_vpaIds(phoneNumber) + data = { + 'name':truecaller_res['name'], + 'email':truecaller_res['email'], + 'carrier':truecaller_res['carrier'], + 'facebook':google_ts_entity_res_facebook[0]['url'], + 'linkedin':google_ts_entity_res_linkedin[0]['url'], + 'twitter':"https://www.twitter.com/" + twitter_res['user_screen_name'], + 'location':twitter_res['user_location'], + 'email':truecaller_res['email'], + 'availableOnPhonePe': "Yes" if vpa_data['phonepe']['isAvailable'] else "No", + 'availableOnPaytm' : "Yes" if vpa_data['paytm']['isAvailable'] else "No" + } + str = "*Found the below details:* \n\n" + for key in data: + str += '*' + key.capitalize() + '*' + ": " + data[key] + '\n' + return str diff --git a/ICE/components/twitter_component.py b/ICE/components/twitter_component.py new file mode 100644 index 00000000..ce9c1c8e --- /dev/null +++ b/ICE/components/twitter_component.py @@ -0,0 +1,336 @@ +from math import factorial +from utils.vpa import get_validity_for_all_vpaIds +from utils.get_entities import extract_entities +from utils.utils import get_url_to_enum_dict +from utils.utils import check_if_whatsapp_exists +from providers.google import search_image +from utils.get_entities import extract_entity_string +from providers.twitter import get_user_information +from providers.google import search_text +from providers.truecaller import numsearch +import re +import random +import gender_guesser.detector as gender +from urllib.parse import urlparse +import json + +def serialize_sets(obj): + if isinstance(obj, set): + return list(obj) + + return obj + +d = gender.Detector() + +def getDetailsFromTwitterHandle(username: str) -> dict: + twitter_info = get_user_information(username) + + google_ts_res_plain = search_text(twitter_info['user_name'], no_of_results=10) + google_ts_res_facebook = search_text("site:facebook.com " + twitter_info['user_name'], no_of_results=5) + google_ts_res_twitter = search_text("site:twitter.com " + twitter_info['user_name'], no_of_results=5) + google_ts_res_instagram = search_text("site:instagram.com " + twitter_info['user_name'], no_of_results=5) + google_ts_res_linkedin = search_text("site:linkedin.com " + twitter_info['user_name'], no_of_results=5) + + google_ts_res_zabuacorp = search_text(twitter_info['user_name'] + " Zaubacorp", no_of_results=5) + google_ts_res_indiakanoon = search_text(twitter_info['user_name'] + " IndiaKanoon", no_of_results=5) + + top_tweet_desc = twitter_info['user_description'] + entity_context = extract_entity_string(top_tweet_desc) + google_ts_entity_res_facebook = search_text("site:facebook.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_ts_entity_res_twitter = search_text("site:twitter.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_ts_entity_res_instagram = search_text("site:instagram.com " + twitter_info['user_name'] + " " +entity_context, no_of_results=5) + google_ts_entity_res_linkedin = search_text("site:linkedin.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_facebook = search_image("site:facebook.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_twitter = search_image("site:twitter.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_instagram = search_image("site:instagram.com " + twitter_info['user_name'] + " " +entity_context, no_of_results=5) + google_is_entity_res_linkedin = search_image("site:linkedin.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_res_entity = search_image(twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_res_name = search_image(twitter_info['user_name'], no_of_results=5) + + data = { + "google_ts":{ + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin + }, + "google_is": { + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + }, + "twitter": [twitter_info] + } + + return generate_response(data) + + +def get_twitter_usernames(search_data): + twitter_urls = set() + for data in search_data: + twitter_urls.add((data['url'])) + + return get_usernames_from_urls(twitter_urls) + +def get_usernames_from_urls(twitter_urls): + + result = set() + for twitter_url in twitter_urls: + match = re.search(r'^.*?\btwitter\.com/@?(\w{1,15})(?:[?/,].*)?$',twitter_url) + if match: + result.add(match.group(1)) + return result + + +''' +name: Truecaller Data +gender: by gender guesser if not in truecaller response. +email: Truecaller Data +imageUrls: fetch all images that + start with: https://pbs.twimg.com and https://media.licdn.com, + https://yt3.googleusercontent.com +tagsApplicable: we will run anirudhs function on Top 3 results from + * Google + * Facebook + * Instagram + * Linkedin + * Twitter + * Twitter Header +availableApps: fetchAllUrls and then check if present in the map +primaryAddress: Get it from twitter API and GLE in all the entities etc. +additionalAddress: Get it from twitter API and GLE in all the entities etc. +relatedPeople: + TWITTER : URL's and images + LINKEDIN etc: channel and links. +socialFootprint: + return Top 3 links from each => + twitter, instagram, facebook, linkedin. +''' +''' +{ + "truecaller_res": truecaller_res, + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_email": google_ts_res_email, + "google_ts_res_email_name": google_ts_res_email_name, + "google_ts_res_address": google_ts_res_address, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "twitter_res": twitter_res, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin, + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin +} +''' + +# Add similarity later. +def get_facebook_data(data): + + result = list() + platform_keys = ["google_ts_entity_res_facebook", "google_ts_res_facebook"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_linkedin_data(data): + result = list() + platform_keys = ["google_ts_entity_res_linkedin", "google_ts_res_linkedin"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_instagram_data(data): + result = list() + platform_keys = ["google_ts_entity_res_instagram", "google_ts_res_instagram"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_twitter_data(data): + result = list() + twitter_profiles = data["twitter"] + for profile in twitter_profiles: + result.append({'url': "https://twitter.com/"+profile['user_screen_name'], 'confidenceScore': 100, 'profileUrl': profile['user_profile_image']}) + + return result + +def get_related_people(data): + return [ + { + "details": data['twitter'][0]['top_commentors'], + "platform":"TWITTER" + }, + { + "details": {}, + "platform":"LINKEDIN" + }, + { + "details": {}, + "platform":"INSTAGRAM" + }, + { + "details": {}, + "platform":"FACEBOOK" + } + ] + +def get_available_apps(data): + available_apps = [] + url_to_enum_dict = get_url_to_enum_dict() + google_res = data['google_ts'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + base_uri = urlparse('{uri.scheme}://{uri.netloc}/'.format(uri=entry['url'])) + if base_uri in url_to_enum_dict: + available_apps.append(url_to_enum_dict[base_uri]) + return available_apps +def get_applicable_tags(data): + google_ts_res = data['google_ts'] + google_is_res = data['google_is'] + + # ORG, GPE, PERSON + org_tags = list() + gpe_tags = list() + person_tags = list() + + for search in google_ts_res: + search_results = google_ts_res[search] + for result in search_results: + entities = extract_entities(result['title']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + + for search in google_is_res: + search_results = google_is_res[search] + for result in search_results: + entities = extract_entities(result['text']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + return { + 'ORG': json.dumps(set(org_tags),default=serialize_sets), + 'PERSON': json.dumps(set(person_tags),default=serialize_sets), + 'GPE': json.dumps(set(gpe_tags),default=serialize_sets), + 'location': data['twitter'][0]['user_location'] if len(data['twitter']) != 0 else "" + } + + +def get_informational_data(data): + result = list() + platform_keys = ["google_ts_res_zabuacorp", "google_ts_res_indiakanoon"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def generate_response(data): + + tag_related_data = get_applicable_tags(data) + result = {} + result['name'] = data['twitter'][0]['user_name'] + result['imageUrls'] = get_image_urls(data) + result['socialFootprint'] = dict() + result['socialFootprint']['twitter'] = get_twitter_data(data) + result['socialFootprint']['facebook'] = get_facebook_data(data) + result['socialFootprint']['linkedin'] = get_linkedin_data(data) + result['informationFootprint'] = get_informational_data(data) + result['primaryAddress'] = tag_related_data['location'] + result['tagsApplicable'] = tag_related_data['PERSON'] + result['additionalAddress'] = tag_related_data['GPE'] + result['relatedPeople'] = get_related_people(data) + return result + +def get_image_urls(data): + images = list() + + twitter_res = data['twitter'] + for res in twitter_res: + images.append(res['user_profile_banner']) + images.append(res['user_profile_image']) + + google_res = data['google_is'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + if entry['url'].startswith("https://pbs.twimg.com") or entry['url'].startswith("https://yt3.googleusercontent.com") or entry['url'].startswith("https://media.licdn.com"): + images.append(entry['url']) + return images diff --git a/ICE/providers/__init__.py b/ICE/providers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ICE/providers/google.py b/ICE/providers/google.py new file mode 100644 index 00000000..5f9c405c --- /dev/null +++ b/ICE/providers/google.py @@ -0,0 +1,57 @@ +import json +import requests + + +def search_text(query, no_of_results): + """ + Args: + query (str): text to search + no_of_results (int): How many results do you want + + Returns: + result (list({'title': str, 'url': str, 'text': str})): list of search results + """ + if(query == ""): + return [] + url = f"https://customsearch.googleapis.com/customsearch/v1?q={query}&key={API_KEY}&cx=" + + results = [] + for i in range(((no_of_results-1)//10) + 1): + if i != 0: + url = f"{url}&start={i*10 + 1}" + response = requests.get(url) + result = json.loads(response.text) + for item in result.get('items', list()): + results.append({ + 'title': item.get('title', ''), + 'url': item.get('link', ''), + 'text': item.get('snippet', '') + }) + return results[:no_of_results] + + +def search_image(query, no_of_results): + """ + Args: + query (str): text to search + no_of_results (int): How many results do you want + + Returns: + result (list({'url': str, 'text': str})): list of image search results + """ + url = f"https://customsearch.googleapis.com/customsearch/v1?q={query}&key={API_KEY}&cx=" + url = f"{url}&searchType=image" + + results = [] + for i in range(((no_of_results-1)//10) + 1): + if i != 0: + url = f"{url}&start={i*10 + 1}" + response = requests.get(url) + result = json.loads(response.text) + for item in result.get('items', list()): + results.append({ + 'url': item['link'], + 'text': item['snippet'] if 'snippet' in item else "" + }) + return results[:no_of_results] + diff --git a/ICE/providers/truecaller.py b/ICE/providers/truecaller.py new file mode 100644 index 00000000..22be0ff6 --- /dev/null +++ b/ICE/providers/truecaller.py @@ -0,0 +1,37 @@ +import requests +from typing import Dict + +def numsearch(num:str) -> Dict: + authkey = creds['auth'] + """Searches the given number through TrueCaller Directory and returs the details of the given user + + Args: + num (str): "Number to be searched in the directory format 'NUMBER'" + authkey (_type_, optional): Defaults to authkey. + + Returns: + OUTPUT (Dict): {'name': 'Raghav Maheshwari', 'gender': 'UNKNOWN', + 'address': [{'address': 'IN', 'city': 'Uttar Pradesh West', 'countryCode': 'IN', 'timeZone': '+05:30', 'type': 'address'}], + 'email': 'raghav.ddps2@gmail.com'} + """ + params = {'q':num, 'countryCode':'', 'type':'4', 'locAddr':'', 'placement':'SEARCHRESULTS,HISTORY,DETAILS', 'encoding':'json'} + resp = requests.get('https://search5-noneu.truecaller.com/v2/search', headers=headers, params=params) + resp = resp.json() + + output = {"name":"","gender":"","address":[],"image":"","email":""} + if resp['data'][0].get('name'): + output["name"] = resp["data"][0]["name"] + if resp['data'][0].get('gender'): + output["gender"] = resp["data"][0]["gender"] + if resp['data'][0].get('addresses'): + output["address"] = resp["data"][0]["addresses"] + if resp['data'][0].get('image'): + output["image"] = resp["data"][0]["image"] + if resp['data'][0].get('phones'): + if "carrier" in resp['data'][0]["phones"][0]: + output["carrier"] = resp["data"][0]["phones"][0]["carrier"] + if resp['data'][0].get('internetAddresses'): + if "id" in resp["data"][0]["internetAddresses"][0]: + output["email"] = resp["data"][0]["internetAddresses"][0]["id"] + + return output \ No newline at end of file diff --git a/ICE/providers/twitter.py b/ICE/providers/twitter.py new file mode 100644 index 00000000..0dfa1c1f --- /dev/null +++ b/ICE/providers/twitter.py @@ -0,0 +1,49 @@ +import tweepy +import json +import time +import datetime + +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth) + +def filter_tweets(tweets): + filteredTweets = list() + for tweet in tweets: + filteredTweets.append({'tweet': tweet.text, 'tweet_date': tweet.created_at.now().strftime("%Y-%m-%d %H:%M:%S")}) + return filteredTweets + +def get_user_information(username): + user = api.get_user(screen_name=username) + tweets = api.user_timeline(screen_name=username, count=200) + sorted_tweets = sorted(tweets, key=lambda x: x.favorite_count, reverse=True)[:10] + + replies = list() + top_commentors = dict() + + # Fetching list of people who interacted in last 20 tweets + # Not the people most interacted with. + for tweet in tweepy.Cursor(api.search_tweets,q='to:'+username, result_type='recent').items(10): + replies.append(tweet) + for reply in replies: + commentor = api.get_user(screen_name=reply.user.screen_name) + top_commentors[reply.user.screen_name] = { + 'twitter_url':'https://twitter.com/' + reply.user.screen_name, + 'twitter_profile_url': commentor.profile_image_url + } + data = { + "user_id": user.id, + "user_name": user.name, + "user_screen_name": user.screen_name, + "user_entities": json.dumps(user.entities), + "user_location": json.dumps(user.location), + "user_description": user.description, + "user_followers_count": user.followers_count, + "user_friends_count": user.friends_count, + "user_created_at": user.created_at, + "user_profile_banner": user.profile_image_url_https, + "user_profile_image": user.profile_image_url_https, + "user_top_tweets": json.dumps(filter_tweets(sorted_tweets)), + "top_commentors": top_commentors + } + return data \ No newline at end of file diff --git a/ICE/requirements.txt b/ICE/requirements.txt new file mode 100644 index 00000000..f8168bdf --- /dev/null +++ b/ICE/requirements.txt @@ -0,0 +1,65 @@ +cachetools==5.2.0 +certifi==2022.5.18.1 +charset-normalizer==2.0.12 +click==8.1.3 +cmdstanpy==0.9.5 +convertdate==2.4.0 +cycler==0.11.0 +Cython==0.29.30 +distlib==0.3.4 +ephem==4.1.3 +fbprophet==0.7.1 +filelock==3.7.1 +Flask==2.1.2 +Flask-Cors==3.0.10 +fonttools==4.33.3 +google-api-core==2.8.1 +google-auth==2.7.0 +google-cloud-core==2.3.1 +google-cloud-firestore==2.5.2 +googleapis-common-protos==1.56.2 +grpcio==1.46.3 +grpcio-status==1.46.3 +hijri-converter==2.2.4 +holidays==0.14.2 +idna==3.3 +importlib-metadata==4.11.4 +itsdangerous==2.1.2 +Jinja2==3.1.2 +kiwisolver==1.4.2 +korean-lunar-calendar==0.2.1 +LunarCalendar==0.0.9 +MarkupSafe==2.1.1 +matplotlib==3.5.2 +numpy==1.22.4 +oauthlib==3.2.2 +packaging==21.3 +pandas==1.4.2 +Pillow==9.1.1 +platformdirs==2.5.2 +plotly==5.8.2 +prophet==1.0.1 +proto-plus==1.20.5 +protobuf==3.20.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +PyJWT==2.4.0 +PyMeeus==0.5.11 +pyparsing==3.0.9 +pystan==2.19.1.1 +python-dateutil==2.8.2 +pytz==2022.1 +requests==2.28.0 +requests-oauthlib==1.3.1 +rsa==4.8 +six==1.16.0 +tenacity==8.0.1 +tqdm==4.64.0 +tweepy==4.12.1 +twilio==6.38.0 +ujson==5.3.0 +urllib3==1.26.9 +virtualenv==20.14.1 +Werkzeug==2.1.2 +zipp==3.8.0 +requests diff --git a/ICE/utils/__init__.py b/ICE/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ICE/utils/__pycache__/__init__.cpython-38.pyc b/ICE/utils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..54ec0e88 Binary files /dev/null and b/ICE/utils/__pycache__/__init__.cpython-38.pyc differ diff --git a/ICE/utils/__pycache__/get_entities.cpython-38.pyc b/ICE/utils/__pycache__/get_entities.cpython-38.pyc new file mode 100644 index 00000000..9464c76d Binary files /dev/null and b/ICE/utils/__pycache__/get_entities.cpython-38.pyc differ diff --git a/ICE/utils/__pycache__/pdf.cpython-38.pyc b/ICE/utils/__pycache__/pdf.cpython-38.pyc new file mode 100644 index 00000000..44041c01 Binary files /dev/null and b/ICE/utils/__pycache__/pdf.cpython-38.pyc differ diff --git a/ICE/utils/__pycache__/utils.cpython-38.pyc b/ICE/utils/__pycache__/utils.cpython-38.pyc new file mode 100644 index 00000000..7b464d22 Binary files /dev/null and b/ICE/utils/__pycache__/utils.cpython-38.pyc differ diff --git a/ICE/utils/__pycache__/vpa.cpython-38.pyc b/ICE/utils/__pycache__/vpa.cpython-38.pyc new file mode 100644 index 00000000..631e7132 Binary files /dev/null and b/ICE/utils/__pycache__/vpa.cpython-38.pyc differ diff --git a/ICE/utils/get_entities.py b/ICE/utils/get_entities.py new file mode 100644 index 00000000..ac66da97 --- /dev/null +++ b/ICE/utils/get_entities.py @@ -0,0 +1,37 @@ +import spacy +from collections import defaultdict +from typing import Dict, List +nlp = spacy.load("en_core_web_sm") + +def extract_entities(text:str)->Dict: + """Given a text find all the entities based on the type of entity + + Args: + text (str): Raghav works in Microsoft + + Returns: + Dict: {"PERSON":["Raghav"],"ORG":["Microsoft"]} + """ + doc = nlp(text) + out = defaultdict(list) + for ent in doc.ents: + out[ent.label_].append(ent.text) + + return out + +def extract_entity_string(text:str)->Dict: + """Given a text find all the entities based on the type of entity + + Args: + text (str): Raghav works in Microsoft + + Returns: + Dict: {"PERSON":["Raghav"],"ORG":["Microsoft"]} + """ + doc = nlp(text) + out = "" + for ent in doc.ents: + " ".join((ent.text)) + if out == "": + return text + return out \ No newline at end of file diff --git a/ICE/utils/pdf.py b/ICE/utils/pdf.py new file mode 100644 index 00000000..adf4f26c --- /dev/null +++ b/ICE/utils/pdf.py @@ -0,0 +1,17 @@ +import pdfkit +import json +from flask import Flask, send_file, request +def generate_pdf_from_html(html_string:str) -> bool: + """Generates pdf from html string + + Args: + html_string (str): + + Returns: + bool: Status of conversion True | False + """ + try: + pdfkit.from_string(html_string,'report.pdf') + except: + return False + return True \ No newline at end of file diff --git a/ICE/utils/utils.py b/ICE/utils/utils.py new file mode 100644 index 00000000..41719e42 --- /dev/null +++ b/ICE/utils/utils.py @@ -0,0 +1,39 @@ +import requests +from typing import Dict +def get_url_to_enum_dict(): + return { + "twitter.com": "TWITTER", + "linkedin.com": "LINKEDIN", + "instagram.com": "INSTAGRAM", + "facebook.com": "FACEBOOK", + "snapchat.com": "SNAPCHAT", + "swiggy.com": "SWIGGY", + "zomato.com": "ZOMATO", + "phonepe.com": "PHONEPE", + "pay.google.com": "GPAY", + "paytm.com": "PAYTM", + "amazon.in": "AMAZON", + "amazon.com": "AMAZON", + "flipkart.com": "FLIPKART", + "myntra.com": "MYNTRA", + "ajio.com": "AJIO", + "uber.com": "UBER", + "olacabs.com": "OLA" + } + + +def check_if_whatsapp_exists(lnum:str)-> Dict: + try: + + """Checks if the number has a whatsapp account + + Args: + lnum (str): given number of the person ("8384852943") + Returns: + out (Dict): {'balance': 8, 'status': True, 'numberstatus': True, 'businessnumber': False} + """ + out = requests.get(url="https://proweblook.com/api/v1/checkwanumber",params= {"number":lnum,"api_key":api_key}) + + return out.json()['numberstatus'] + except Exception as e: + return True \ No newline at end of file diff --git a/ICE/utils/vpa.py b/ICE/utils/vpa.py new file mode 100644 index 00000000..d8086359 --- /dev/null +++ b/ICE/utils/vpa.py @@ -0,0 +1,41 @@ +import requests +import json + +def get_token(): + url = "https://api.sandbox.co.in/authenticate" + + response = requests.post(url, headers=headers) + + return json.loads(response.text)['access_token'] + +def get_vpa_valid(vpa): + try: + + url = f"https://api.sandbox.co.in/bank/upi/{vpa}" + + payload = {} + + response = requests.request("GET", url, headers=headers, data = payload) + print(response.text) + return json.loads(response.text)['data']['account_exists'] + except Exception as e: + print(e) + return False +def get_validity_for_all_vpaIds(phoneNumber: str) -> dict: + VPAS = { + 'phonepe': ['@ybl','@ibl'], + 'paytm': ['@paytm'] + } + + result = dict() + for provider in VPAS: + isValid = False + for vpa in VPAS[provider]: + print(phoneNumber + vpa) + isValid = isValid or get_vpa_valid(phoneNumber + vpa) + + result[provider] = { + 'isAvailable': isValid + } + + return result \ No newline at end of file diff --git a/LICENSE b/LICENSE index ded1f875..96c7551e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 hack2skill +Copyright (c) 2023 Rohit Nagraj Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 578bb9cc..e3f307ad 100644 --- a/README.md +++ b/README.md @@ -1,5 +1 @@ -# ksp-submission -This repository is created for Karnataka State Police Hackathon 2023 - submission collection. -## Team Information -### Team Name - -### Problem Statement - +# KSP_Hack_2023 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..663bd1f6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests \ No newline at end of file diff --git a/search_api/search.py b/search_api/search.py new file mode 100644 index 00000000..64ab996b --- /dev/null +++ b/search_api/search.py @@ -0,0 +1,60 @@ +import json +import requests + + + +def search_text(query, no_of_results): + """ + Args: + query (str): text to search + no_of_results (int): How many results do you want + + Returns: + result (list({'title': str, 'url': str, 'text': str})): list of search results + """ + url = f"https://customsearch.googleapis.com/customsearch/v1?q={query}&key={API_KEY}&cx=" + + results = [] + for i in range(((no_of_results-1)//10) + 1): + if i != 0: + url = f"{url}&start={i*10 + 1}" + response = requests.get(url) + result = json.loads(response.text) + for item in result['items']: + results.append({ + 'title': item['title'], + 'url': item['link'], + 'text': item['snippet'] + }) + return results[:no_of_results] + + +def search_image(query, no_of_results): + """ + Args: + query (str): text to search + no_of_results (int): How many results do you want + + Returns: + result (list({'url': str, 'text': str})): list of image search results + """ + url = f"https://customsearch.googleapis.com/customsearch/v1?q={query}&key={API_KEY}&cx=" + url = f"{url}&searchType=image" + + results = [] + for i in range(((no_of_results-1)//10) + 1): + if i != 0: + url = f"{url}&start={i*10 + 1}" + response = requests.get(url) + result = json.loads(response.text) + for item in result['items']: + results.append({ + 'url': item['link'], + 'text': item['snippet'] + }) + return results[:no_of_results] + +if __name__ == '__main__': + x = search_text("Raghav Maheshwari", no_of_results=10) + print(x) + diff --git a/search_api/upi_validation.py b/search_api/upi_validation.py new file mode 100644 index 00000000..a43897c2 --- /dev/null +++ b/search_api/upi_validation.py @@ -0,0 +1,36 @@ +import requests +import json + + + +def get_token(): + url = "https://api.sandbox.co.in/authenticate" + + headers = { + "accept": "application/json", + "x-api-version": "1.0", + "x-api-key": "", + "x-api-secret": "" + } + + response = requests.post(url, headers=headers) + + return json.loads(response.text)['access_token'] + +def get_vpa_valid(vpa): + url = f"https://api.sandbox.co.in/bank/upi/{vpa}" + + payload = {} + headers = { + 'Authorization': get_token(), + 'x-api-key': '', + 'x-api-version': '1.0.0' + } + + response = requests.request("GET", url, headers=headers, data = payload) + + return json.loads(response.text)['data']['account_exists'] + +if __name__ == "__main__": + vpa = '9845107111@ybl' + print("The UPI ID is present: ", get_vpa_valid(vpa)) \ No newline at end of file diff --git a/truecaller_api/truecaller.py b/truecaller_api/truecaller.py new file mode 100644 index 00000000..3d8aa2bd --- /dev/null +++ b/truecaller_api/truecaller.py @@ -0,0 +1,43 @@ +import requests +from typing import Dict +authkey = creds['auth'] + +def numsearch(num:str, authkey = authkey) -> Dict: + """Searches the given number through TrueCaller Directory and returs the details of the given user + + Args: + num (str): "Number to be searched in the directory format 'NUMBER'" + authkey (_type_, optional): Defaults to authkey. + + Returns: + OUTPUT (Dict): {'name': 'Raghav Maheshwari', 'gender': 'UNKNOWN', + 'address': [{'address': 'IN', 'city': 'Uttar Pradesh West', 'countryCode': 'IN', 'timeZone': '+05:30', 'type': 'address'}], + 'email': 'raghav.ddps2@gmail.com'} + """ + params = {'q':num, 'countryCode':'', 'type':'4', 'locAddr':'', 'placement':'SEARCHRESULTS,HISTORY,DETAILS', 'encoding':'json'} + resp = requests.get('https://search5-noneu.truecaller.com/v2/search', headers=headers, params=params) + resp = resp.json() + + out = {"name":"","gender":"","address":[],"image":"","email":""} + if resp['data'][0].get('name'): + out["name"] = resp["data"][0]["name"] + if resp['data'][0].get('gender'): + out["gender"] = resp["data"][0]["gender"] + if resp['data'][0].get('addresses'): + out["address"] = resp["data"][0]["addresses"] + if resp['data'][0].get('image'): + out["image"] = resp["data"][0]["image"] + if resp['data'][0].get('phones'): + if "carrier" in resp['data'][0]["phones"][0]: + out["carrier"] = resp["data"][0]["phones"][0]["carrier"] + if resp['data'][0].get('internetAddresses'): + if "id" in resp["data"][0]["internetAddresses"][0]: + out["email"] = resp["data"][0]["internetAddresses"][0]["id"] + + return out + +if __name__=='__main__': + output = numsearch(lnum) + print(output) + +