From cd31afe545b2a74af4982ad5eb1711d2f17a5364 Mon Sep 17 00:00:00 2001 From: Rohit Nagraj Date: Sun, 5 Feb 2023 12:36:18 +0530 Subject: [PATCH] Added code --- ICE/__init__.py | 0 ICE/app.py | 74 +++ ICE/components/__init__.py | 0 .../__pycache__/__init__.cpython-38.pyc | Bin 0 -> 163 bytes .../__pycache__/name_component.cpython-38.pyc | Bin 0 -> 8105 bytes .../phone_number_component.cpython-38.pyc | Bin 0 -> 9085 bytes .../twitter_component.cpython-38.pyc | Bin 0 -> 7300 bytes ICE/components/name_component.py | 362 ++++++++++++++ ICE/components/phone_number_component.py | 442 ++++++++++++++++++ ICE/components/twitter_component.py | 336 +++++++++++++ ICE/providers/__init__.py | 0 ICE/providers/google.py | 57 +++ ICE/providers/truecaller.py | 37 ++ ICE/providers/twitter.py | 49 ++ ICE/requirements.txt | 65 +++ ICE/utils/__init__.py | 0 ICE/utils/__pycache__/__init__.cpython-38.pyc | Bin 0 -> 158 bytes .../__pycache__/get_entities.cpython-38.pyc | Bin 0 -> 972 bytes ICE/utils/__pycache__/pdf.cpython-38.pyc | Bin 0 -> 628 bytes ICE/utils/__pycache__/utils.cpython-38.pyc | Bin 0 -> 1172 bytes ICE/utils/__pycache__/vpa.cpython-38.pyc | Bin 0 -> 1457 bytes ICE/utils/get_entities.py | 37 ++ ICE/utils/pdf.py | 17 + ICE/utils/utils.py | 39 ++ ICE/utils/vpa.py | 41 ++ LICENSE | 2 +- README.md | 6 +- requirements.txt | 1 + search_api/search.py | 60 +++ search_api/upi_validation.py | 36 ++ truecaller_api/truecaller.py | 43 ++ 31 files changed, 1698 insertions(+), 6 deletions(-) create mode 100644 ICE/__init__.py create mode 100644 ICE/app.py create mode 100644 ICE/components/__init__.py create mode 100644 ICE/components/__pycache__/__init__.cpython-38.pyc create mode 100644 ICE/components/__pycache__/name_component.cpython-38.pyc create mode 100644 ICE/components/__pycache__/phone_number_component.cpython-38.pyc create mode 100644 ICE/components/__pycache__/twitter_component.cpython-38.pyc create mode 100644 ICE/components/name_component.py create mode 100644 ICE/components/phone_number_component.py create mode 100644 ICE/components/twitter_component.py create mode 100644 ICE/providers/__init__.py create mode 100644 ICE/providers/google.py create mode 100644 ICE/providers/truecaller.py create mode 100644 ICE/providers/twitter.py create mode 100644 ICE/requirements.txt create mode 100644 ICE/utils/__init__.py create mode 100644 ICE/utils/__pycache__/__init__.cpython-38.pyc create mode 100644 ICE/utils/__pycache__/get_entities.cpython-38.pyc create mode 100644 ICE/utils/__pycache__/pdf.cpython-38.pyc create mode 100644 ICE/utils/__pycache__/utils.cpython-38.pyc create mode 100644 ICE/utils/__pycache__/vpa.cpython-38.pyc create mode 100644 ICE/utils/get_entities.py create mode 100644 ICE/utils/pdf.py create mode 100644 ICE/utils/utils.py create mode 100644 ICE/utils/vpa.py create mode 100644 requirements.txt create mode 100644 search_api/search.py create mode 100644 search_api/upi_validation.py create mode 100644 truecaller_api/truecaller.py diff --git a/ICE/__init__.py b/ICE/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ICE/app.py b/ICE/app.py new file mode 100644 index 00000000..69671f00 --- /dev/null +++ b/ICE/app.py @@ -0,0 +1,74 @@ +from curses.ascii import isdigit +from flask import Flask, make_response, jsonify, send_file +from components.name_component import getDetailsFromName +from components.twitter_component import getDetailsFromTwitterHandle +from utils.pdf import generate_pdf_from_html +from components.phone_number_component import getDetailsFromPhoneNumberForTwilio +from components.phone_number_component import getDetailsFromPhoneNumber +from flask import request +from flask_cors import CORS +from twilio.twiml.messaging_response import MessagingResponse +from flask import Flask,request +from twilio.rest import Client +import json +twilio_client = Client(account_sid, auth_token) + +app = Flask(__name__) +CORS(app) +app.config['CORS_HEADERS'] = 'Content-Type' + +@app.route('/ping') +def hello_name(): + return 'Healthy' + +@app.route('/getDetails') +def get_details(): + channel = str(request.args.get('channel')) + value = str(request.args.get('value')) + + if channel == "truecaller": + data = getDetailsFromPhoneNumber("+91"+value) + return data + elif channel == "twitter": + data = getDetailsFromTwitterHandle(value) + return data + elif channel == "name": + data = getDetailsFromName(value) + return data + + print(value, channel) + + + +@app.route('/reply',methods=["POST"]) +def reply(): + message = request.values.get('Body', None) + resp = MessagingResponse() + if(len(str(message)) == 10): + test = getDetailsFromPhoneNumberForTwilio(message) + print(test) + resp.message(test) + elif(str(message) == "1"): + resp.message("Thank you for replying. Please enter the mobile number of the person you want to search. Please don't include +91 in the number") + else: + resp.message("Welcome to Namma Sherlock.\n Press 1 to search using MobileNumber") + return str(resp) + +@app.route('/generateReport',methods = ['POST']) +def show_static_pdf(): + if request.method == "POST": + # print(f"HI {request.data}") + data = json.loads(request.get_data()) + # print(request.form) + + html_string = data["html_string"] + + status = generate_pdf_from_html(html_string) + if status: + # with open('GfG.pdf', 'rb') as static_file: + return send_file('report.pdf', as_attachment=True) + else: + return {"response":"File not formed"} + +if __name__ == '__main__': + app.run(debug=True) diff --git a/ICE/components/__init__.py b/ICE/components/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ICE/components/__pycache__/__init__.cpython-38.pyc b/ICE/components/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e3e3c469e98b7dc7bbc01a30c90ba2d06048b7d GIT binary patch literal 163 zcmWIL<>g`k0`sDK$sqbMh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6v5KeRZts93)! zF+C%(EH^PDwK$_Zu_#mDCAB!aB)>r4J2)WTBQZHU-pIhnSl`pxRX;gDw;(?+HLs*t gKR!M)FS8^*Uaz3?7Kcr4eoARhsvXFN&p^xo01TEVX8-^I literal 0 HcmV?d00001 diff --git a/ICE/components/__pycache__/name_component.cpython-38.pyc b/ICE/components/__pycache__/name_component.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b1ffc13290de4a6cf2716510e60793456abda01 GIT binary patch literal 8105 zcmds6OK;rP6(%_x4reqPjb4_OI0@r8w#RX$*l8Yh-Plh2i0VW(Y^O~b$5U!9HKH{` zjxQ-EQaIhX+W=h@T^WVFDG+quMK=8l1qyV*O&0|Uc#}=hO;8m5&LM{uC&~jzSCKH6 z_ndoqALpLOcMsp4oUAGMTlw+dJGWj@lz$Mh`>zDzEPkP-DvD5q+EeHV@@=DOZ9`W3sv^>WYbSM4g-4fLtmHLh2B6aBhf=emhLllCOntGz~l%AVqS zt=F`hn5Wj8?$6k>%$w-V_2=#RtbL!okJ}gQ1yL81zgF!1?(Bobhl>4#`=on7H12C+ zN;H41+fRvUF@yG}#jKda^BFNO_Tg!X1+gE`XT_p;0?&iuNpS$r=fqRuX*>^!XM~04 z^Ws@?5YNNnIdKTjBjR~+7|)~Ph&YO8OVq9_t=1!wT}w@>>rN*Mr04WNJlS=l_Fbpv z2`}1iuLrX2^m^^P17}5qXl}Y6MH1cHt{-`k=Mry@UfGhpb`*fU)o%;06MeOr>D$ z_NR($;{7jHm*2(8!e!}nH=MhDXTuFQK6IqFeBKQ=qhPRn`TEuNTTW-Qee%VVFDaY@5&MP0x49(bhP>S@}yr({dG2OYvGn)A*sU5m=GaZeSsP5n2j{B3NA+M*&R z7M0;dWW*B~xt_1HzM#bTW$T^H*IVQ_@^u&aQ~COf{AN_ix7-d0S6b(;u2@lE!S=XCIFXaT za&p%z-5}`pTq`x4uYHT=+huV_4_;%Hx8BKh{@`)T58AA`z>1e%| zc*0ZGDm3^j8!z-7ulJR$^s4oO^{qNNLSQxF`$sx(SVunirZI1P=tU94_kPev1^&l! zeY|LGxYD(p(DEYdL$8NzM%Ee_gUIrHs~?1s)p0^M{KoMwIvsZ{2sXcI9V_H`y3*-e z-_-MM&);-~=jZ)i+I704?T|KX!JBnax{{nnXsyej&${8P1$SL5J+q~J7&4?(ZCo@GZnP5v#s`*Cqsuw1eY$bA*)Uhh7c*q6{ zoG7qV*4oZZIWe5U!1aYRsZXir`m#c=5N=4OyRPp_Cvs^uz#XC6+Rp<^f(mkxiYKUe zl8OT;l8Mm_c*CAq_j>S%uGlr7IhtBNMK3*#!fpWUU}-KENLp(*vSaTAJ^?iO4E40A zc$SKTR6Iw;Au67y;xH9Qs5puu(J5##%1VoAV$MJU=iLb5A-o6&MxlWA5#B2 z;an0&QXicSHOiBCe@sI1IC$&$g`^FNN`h!=%q=WlM{|Sxp%$qRu=H4a zs0~YTN!G9#JRbnSJWOF8H~=H1)t%$FmX4jdxt6W#CV98zvuBRp{P5!w$4|UGY#lvw z>isjz$3Iv));e?O5jnS3Io-G>fnn45AzubRDR*VCHE8MbRiXfI2}antI%3O78SuZe zF}`P9_mn<WnI>rLb?33Rws5`%kE(`zDCo)rPnPh|;2hW78KEKvKZv zj4m^+3w>CQOE}=yh6X@kS(F!Wfq<-ZQF*A%Df<-wW0I92OpH?z)%%c`s!-2Ym0|hL zD?Eb!9Y*+wC0|=jO^OY;g?(_`kguWk$R={03Od+NHGU*hFvWFGxVX<;N7!*c)l;Wp zNXHjJKdEf_9^J3x59lQ`Rj$*h+mnbh5jC|p-R;mWlO~=>A14#lOG-E?c@{m#&V`&$ z)E+tgHof*o^a{ygq^CsYsk3p;skO&7RbE1m@9Vj-?;}qui{~a^CtiMT@(hvZs5nmr z-38NvJt50>6m?j(e@2H?wyD+cEn7{#g?`^x*>Z5EFK1qtooP;5^Ppl#2%p7Z_sal$R8@z%BQdr2N(TgD9cOf33 z;Rk`hyO!^39>kG7aZzX5q-R%0B&w#ONzpTd5d@+>Ku935*c(4zxpCvdHKa}|B1I30 zIdW>Y`S!}{h&AvE?qnKj%{8%_vVE+u3Wjy8*Hh2#ac!qz*X>$z()f& z81&rFX}bidlu|?0{;QXP9*X6^+v9@W*!yJgTxRN1|!*`H*fT+r=?09w#e7q&SrLeQ% z_T3?riAmp$dO>Hn%nCxuF7$ya5YCYkC=w&h!KFHqMhUy&+yx?Y)}UX2q9HVKEZ>*Z zGNen9;vx;x@Nnh1>xnI=N60iOlK@1aT*O;6V72U1+hK_d<6RD z@I&PP8TbVu)9Q0Wyoq-+(E_lMo&oa^d_xipB~o7~%drX}-yRxB*cj-Mro7%8nvoit zByb438duThjbSaW34K$O>p)2wrFAl){}lW(_`izvF@Cv7-+ATEVRi;I9}e_O^MSL+ ze1998@3U;a&&THbEH2LX*_}V!`CU}Hts@}z5|)bEo$PNqsGk3s558A*g5vZz>q6M1k_XM`IXQFI>BRWmW!y2y@ilGEk4$&EE&*dLnB=_`RW1$k6Kfk zEA7QguRMqTBRhD%pz+@$teamnzY}KKm&WzfgCUaz-$NnlRT4jg(ys?j6iy2QE`dQm zzD)cc70PwPNL7y_J9}=r0iMPcf%vFvsB8D=f>A;Z*9+#d?*^^!qRXI>jn3N*(8vZe zT{*49Dqk=(QRO;}4`WVnz0UPX^oO}}{|5LPjB;v~ul{zGKe`~`hxSw$bnwC9Vh}_F zd~iuE(Kbh*-8M%M)1KS)`Q>rnZl6oDt?~nAl4hUF6`c*&p2lStUxrBZ zS!a7P3!qmye!`$hkCO%;6@44xdoZ#A_-w?$f$s(kVkI32M^oIT39Q&n%$WydI$Ac$ zF%y)_=z5vX`~WHn3{8ib)$xTTH6^O2|J<{1;uJvgNc~$794IJ+JVtX?ispde5LWo| zN0|Udb{Uofpn-eWW4Z>eqBo*C=rpFEhWiK1SyXVTG#;wquX3tVpv;G=yh^AsZRe4% z-Z{$=J_AGcYytYMIX+TB@-kM;&in{V4%WgY0JnaZ(B#guBldRm(o*UyX)sRb z_z`zm7s-%Zxu=a(La2|sJd5vnPxk|{)pK9RK`0@m l8Ae9(_({=$*!D{;0EDXW5bNSQ?Au zP(Z#$v5+y&b6IRp;_RxjdZbDEI^aRlGv;u~TC-`B(|;wZi+#H->MzIE|UaU9>1;x%ys-&3M4PU3r7 zoD!$;eSJ9Y>tYe(E{f7Et-kb}WL?*l=?Z~Q;l8^P1=4rh$Q)etqSiyV?F&EJZru-L z%Wbz?4}0!a5u&)_J&Gi%x4drTN4`hA8a3IJ?N$_kz1e9Ae5mOrjf_}(tlvT&^3fZ0Qx*Nt z?}m}vUGbC|dXci)m^%74gXIU$G&zO#FMfUL4rUfENq2S4eb{l=yl`#HmHyHtFI}{(udc}75J6;Pz zhgF)e4Wj8q-7(}eQguU(aR&`o3fi(@DbFj;KvL8D`#rp#rga&pNq`MdYVHW;yJqpkCD{12W?|xcF;Z_nWB*N2wsu^JeNv8RACAW3!*m$y(+#DmJdBj|!cfxj zRaV#Sc+bguJk!;9H)sX-TPWRZN1@7j9oKKa&@a5GhJNIog<12KgJ69TXKUek4sBjc zazV7^N0BF)zca-wKI^N}?JzU#pzU|pJ>hqgip7PdySePH1hThNTDaO3zI)B>20>R< zHpxB-FI#I++V)yeNK$VzH@WaG4eaYzA;?dEwrC%I9TRs>a4lvnh6w^j0t5?-5m$tK>Q^iPb zdMog$o>Yb5qK8R8RQYsPl3WzjDA$84jN~|}f8p|@6^~U+mMCw+PpVwlYx|L$B*rxP z;k2u>joMyU(h-sc%1*9&T~E4^N0Wzd3cdOf9$S_7=1OJFiB$cd z>Jd&_sB}*rr+!`|a)L;m$Vnonh@2+!I*1w@ObQ*T>V3bB1L%o;Q*;Kim5bD9i5ivR zL$MkUYogPo%Bf^8%43Rg8yh-HSMBXCuHj(cTd56fmAj%SMjy=@eMZFDxLMnjnZMs-sXD3HJd8V)*Pw}GCt?qm4`Adcdjb6HN{=~hlAD=mW=BlLH&Fj4_=R*B zHCBR>Zs}EBQb}grB$YgVyzak2B3ZWp_F6TtX^g+HZ9r4N6Xr0VwRB%K+?D1JiTXDVybNEc16%p`OU6R$X=3Y zo;itf-9=iB!(~QP)LQqp;onIU&m^BWl%OshBx5YI`zEvdCVxm(J|^-TL|!J_f$L-0{yl0XvQ19? zm&tY&^}epMW&gw8?yJbQN_snl9W2~`{eKE~ZB)2qXom*pEHVQ_OP&D+$ABp~3PS^L zA&l6hs|>Cx7E&-d!hT}1_o(ur&o|_+pog8>S`_udvr9{<)n%ib!e4-2&+9U3eJ)U0 zRT&lcWB_OHr5AmZO#+`;RZNTSz$F=M64(O346}P-3s4!jowP2{QJNZ6%L<8ZP@7Kt ze`5|Ip`64QIr@Sj1LQIqNN!>1jR;M^?m|4n`>-W)PYfU8NWb~?>(F=Af+s2@qDgKi zj91`@I>0G`#AI*Zy?Xoh<(s&y=LeX{85lL?{MyyVwab^THdNti@x_nJd~v2RB>4-bsk=F7eHDdYgxnvacj zTOMZ@Ys3g=@aZ_dF~jF0*I<*nbLhvL(q9Y__v(0k923rVy{H|m^p{xmDS(7JP;#1` zEQ2UJiEAa=pZGsjb_b$s0lJ4cV0pB>@v8)_`IXNK3G`h95A^WAHOVrrX5O#N0#MRJ4GiNhFB{ZZfr) zA{GQ8Kj_ zk^dBtCC=wY^42h)hpd3u5LGIyo1f|Oc9ItbSRt&qYltyXdP0_FJW(=v4pLV+L|<{N z%3;cL877U?)~-oDDH_kYlHE4(*|TzeC;FH!k12b-f{fmom>uvBGi!+o1I>mTp%b6DKCN4$M{66@!@@z6>yMEuEoj;4d54BW=#vA(2?!4*y8(yqeTX(=HWYW>9@XV3GKe@RZGVN# z4fnT5%1pvyYhl_@XxkulcKCjV>j_d7qh`L0NB6sme(ZnbAESLn>Ayxmet6OTN`P=C z0WHJeJj@UI-;YTAjJW@ARQM;O0w#wvbV~n)429`&PIaGP%g{gLu4J4W_-l$?TkEEL zCoM<53!8%bQErzm!{)Fi&OGw=If`AQSETbuTaStBAeBqGIFq7)yuB}9L>}jrTT%HK z@=lsBAzi@e<8+qMR~h+YS~h{vb%8iB_>-Kja(ar>HBL{X{#aUn27I{N`fNiIaFgWL zm5nfsK+q_Er*W-u{cfXaz8@@060j=pD++K_s3yW-1uq9zf*?x5MugjxGq?-kjHfY8 zRonNTvR9pAPECYox_J25-%oR5;EdBiiTnfVxQszMuH3$u#%O<^nRK)-4i95uTqY4a z!h=!Jb=zrWHJ&1v8w_CTB|d9P7jSJj&FON>tX=d<#Bm7(aPYXqK#uQfaup*=(jiHJ zmw^D|97H(?WvB`!GlWW{muyi+?1#LJs%=u(V@PPQ5*AMXN&HH9aY<}JiE>5>9QxxH z8M{D^aELhm5E*uO!q;<()iCx>(Kgf=Zpw=3qTWDlppzjsIR`fe zAUdx>t?gYs{A8HRXL8Oi;tz!DlXC9)#>NFkDk)r5QyFl+>~;|XlK!sqTHtg_%BIyQx(l#Fy#5wB}9 z3K5H6tLKx=vRRQs`8}#cpho^Ck)IIxEh5>wsQfv2zfIdlJ`FgZ&3hR}Ju&GHwh*|5 zYGt;HqBVqF81~lC9uX&8eh456BYhtu*k?ope>o-t2%NlInXnu1?oX{nJb6Ed zdH{&Db*Sg?yOj-IVT6}Q|3&VftxDfX%0b8229!W;vLK?2RDO^W6Nuu#df+uFCnmWj z)prD}h?tt! zzv4>iL#ZI%Bp$RgH_(c$tu4~0!5hqy^r_D8f#7;a!B?PyN7SMD14NyTMt~T&yB&4v z#bg3x^yFU>`5BQvBSLnLZ4wXPtPX+g^)APAZ+OkhejfF=bNJ+yV6!V0qBU<}*=q+| z3+YYc?8${1T$c6lU# zeI<~$xBRvr{EUtXMJa%W_;}P{%N&YV6Cy(BlKzVPN2#h_{ZG3@A+qDT{A;wTFE#V- zrtkwSnIp#*BBE=O9JMNJMtBWa#AFjeRFgbe?g4O(Zcb`vMqXG&zuscc| z(__jKqD(v%c^$S+Ea82d%4Ux4Vahqz3B+dGdykG$cmZVKfwEu1wDjtLy|o>q(sJ_7 Vdw8*^;m^UZ?bX+OUXY0E0M{FmmTUoJXG+>=FE6$2qYssZ% zS9YW<(*n+|4N#y(Pd?a}6#fI+ga3yf+rwOXXfN)em$sLnDEfPE$(5FIfB?CL1mDcO znVp?^^FDs_cyn~Lq~Wu2_OH!9ysBw`qsIP^iN<;S!c)4Y2~Fs}*4FE~PIbdK+GgG4 zy6Ic(Ts_Bi%g?t9^#a#(sM~d$>v_M}F4aq1FJOGRUgo;(kF-bYqg*dyOr>7oddVMa zSL;=-m;LejINn+IC+ZViPPV7&)7&@WA860iXZqu2>$751R6f<}bMEwmXC7+xXWfJD zAu)E}5LGe$sZ~ELCd4HApA%DJ8s)q=AZAb=5wl_rw#=IzTddhb(Tbk?yCDyBr&|iQ@!o{9jFyenbR8MGO(;eBp*pF{+xUeCKq4*HBK@8rCn6)(?&&ws zhG6vNnyE@&=yk%#=`>wshHj)RA4|vFX0UeqktWCS{1@*n-o(nnMd`FQoI7o2!wom? zI?`Lbwuu^F_xK?l+c7o)pg6k%g2YoWWNDwq%_ z;qAH&yfviPgn1B6FX^@+CsC^z*dT^=oVKg-(v7xdC$mx7+X#NV(Z6Hcw9$KdPmlCi z7y2A5Lztf#JtNYEm2hS>kz<|-UVeyYfmayf<-oItc=>exVmelm5Q;=Qru%zCnYBbjw&`$jYC%l1_wbKkzRePig`+kdvNioQKLWc$X^w2j$7FXl(cp{#Rr$psGR@lNH3m{*Mzo?c7jH*-jHs%?MLD6MCe8C8Q8nK76h9M(7gF4 zIgH#L=Z?NcGJkiHS!@I?>9iAOGJ)@PHeKO$5?*P(?rg6)%|Lc{N9LD0!gH=VognC_ zDz8VlVN-ftGTblp`C3^`wt}GLyNxKMg*Cdq<8{=5EOW3%HJ#=6m#Aj4>~xvK^9+_c z{J^fWw#`dcbHg)X{hP`95AAiazuV#Urb|2c)b7Cw4?necZ=p}0e6ZeHUJV9#Vd8DS z;_cb);kRddzx?)W|KRQL>B$kHn~o^iy_(2!Nzvdci|Izwsu!w4eLdLJ6Ni70o;XD0Fp=ko%o9067)WCf@s9rz3oK4gwtjHvJF8bMLx0NmKUQTQ_Y2Fm{*j-P zuVLPdgQqA)X3Gwq_7iFMw4NSom$VNidPb~2&|`xf{X+nutrFa<&Mv}ORwhDY&6E{9 zq)bN$*ab!JemDsm>)09v^AHC+h_NIMl^zRqNXEq`5JqCh|>;zlmQ+5m+;5&m+2}Pw3o}**8gr zjDh$4M^uu13tbp3MCVu&=A4FuwB|HKOEYcfxNQlmmy1mt`FDGH#H^gi%>d3oR+cC{ zG^Vs!t%qm@S>=U|c?zO<9}?3w8u_Ny%Wb{EGg#kXhL2eCrRBuN*i65JeMl0m{65H& zI@M-~ybkin;6jbTHrGAj0$tre+;AURatz&&Gt4PytHO52BebXTK#gukJhBswJ4b^? z5ila^ZEU(bp~{hZo=%EalCs8jQhAxk5|O9Lb`U;Tw!g=yM7D|Hf0b-k zFz(wbTMjrJv%ZOJCrEG8(9Obq{56I9-mq}V*QP#)Jn_KSQn-N6G2ji3!q*^#2%u5& zK)~%5YVbNDf8S(}JHd8eSeA>JVRvdHin`&M#lek+{ca!p0s6UaC&8owgVh9M!mbR~ zJwU+Z{%5jRV4GE?e%DRJDTBR&d>7Bb6LZXA4^SPr;r>{ljec!dEvx%u5?TD8FjvSa z1=dPipEu5r@U}8DIxuP$4!p^YS>yA|o#J zVaf5W1FS{4Wp+Hf2S47H-a?3^LEG&?nYh_;BR^>N7Fj{)_Kh)61p+^k0<6j>Nr6O1 zk|?1n&K;mMXASy=@DV~2hmsSj)Q7ATDF~pbiic~<9ZzgKJ^@md!%@i{Wv@A*3pnHr z>lKSS`x;88goI!k#to z9xi`z9wQ)FwBPU9xMJHRa0t5?7s0#OE5#*YZ5nbNKna)ZxYQT=55doY|4D2O@pIYs z)*D;L*%{FLaG+m$A2?5W-(QB__qhMQe-6Fxah!eMOyTNGiV*m1`YWifJ}>~SWKZCU!%um}A$JMmcLWmO)R z4`3H%-41aV&WY`IH)OQQSsZ2hZl_jeLsp~d6^gZQ&`fS$$_`|!5jm|;*{v?F%~UIQ zk$4I^+>k$@*CBJnDYXQuhLSy{&37T>>988g*zF7}@+3J(Sc&g7*5x@Q!W^eb4JiGg!S*cNNJfwXPONJL_2kPVGgd}1 zB@N!cMT27pMU_2(H=h7JfI37Pz7kT#fWclL-T~vl<6ALZ4wo?+fgbH7%%4R>0P>mB zaMjE|)Wc8GRw2`}AL??2fMn9oGp}r&=b+!mM`|*I2G^Vpl0tG7t7cElQGa)O057|1 z;R29fyF~zV_sD>~6TP~S_)nUQvp$`Ohs+Fyw2`fJ7U1Iq=PYp;blqgopi0Q{%P2%T zXQNg~4k^!y2PZqgnFE|4U3zStNQ=mLN+?Ane+hDtj%^kd*k3eAoTQ3rE3THCN3S1tguy7Y&UveQ~V z8KOpjw0v)Ef&YAAfF>iFXOiZzkv&Tu)CK-iiKP4$^}R-fZX%o@*=IZN5s$Dr8xSKX zw!Fls$V6C_`woGRgOhdk*>)hdefKS#h8EK71B=;T(y>t6Htam|cCXt7`z?FQp2XKi KS+-B2HvWG=!0#CV literal 0 HcmV?d00001 diff --git a/ICE/components/name_component.py b/ICE/components/name_component.py new file mode 100644 index 00000000..0bdbc55a --- /dev/null +++ b/ICE/components/name_component.py @@ -0,0 +1,362 @@ +from math import factorial +from utils.vpa import get_validity_for_all_vpaIds +from utils.get_entities import extract_entities +from utils.utils import get_url_to_enum_dict +from providers.google import search_image +from utils.get_entities import extract_entity_string +from providers.twitter import get_user_information +from providers.google import search_text +from providers.truecaller import numsearch +import re +import gender_guesser.detector as gender +from urllib.parse import urlparse +import json +import random + +def serialize_sets(obj): + if isinstance(obj, set): + return list(obj) + + return obj + +d = gender.Detector() + +def getDetailsFromName(name: str) -> dict: + ''' + 1. Call the truecaller API to get the data + 2. Call the google search API to get the data + search_text("Raghav Maheshwari", no_of_results=10) + Google: Name + Google: Email + Google: Name + Email + Google: Name + address['city'] + Google: Name + Twitter [omitting enity here as it wil not be apt in most cases] + Google: Name + Facebook + Google: Name + Instagram + Google: Name + Linkedin + 3. Call the Twitter API using usernames from the above result. + getEntityForDescription + search_text("Raghav Maheshwari", no_of_results=10) + Google: Name + entity + Facebook + Google: Name + entity + Instagram + Google: Name + entity + Linkedin + ''' + + google_ts_res_plain = search_text(name, no_of_results=10) + google_ts_res_facebook = search_text("site:facebook.com " + name, no_of_results=5) + google_ts_res_twitter = search_text("site:twitter.com " + name, no_of_results=5) + google_ts_res_instagram = search_text("site:instagram.com " + name, no_of_results=5) + google_ts_res_linkedin = search_text("site:linkedin.com " + name, no_of_results=5) + + twitter_res = list() + filtered_usernames = get_twitter_usernames(google_ts_res_twitter) + for username in filtered_usernames: + twitter_res.append(get_user_information(username)) + + google_ts_res_zabuacorp = search_text(name + " Zaubacorp", no_of_results=5) + google_ts_res_indiakanoon = search_text(name + " IndiaKanoon", no_of_results=5) + + if(len(twitter_res) > 0): + top_tweet_desc = twitter_res[0]['user_description'] + entity_context = extract_entity_string(top_tweet_desc) + google_ts_entity_res_facebook = search_text("site:facebook.com " + name + " " + entity_context, no_of_results=5) + google_ts_entity_res_twitter = search_text("site:twitter.com " + name + " " + entity_context, no_of_results=5) + google_ts_entity_res_instagram = search_text("site:instagram.com " + name + " " +entity_context, no_of_results=5) + google_ts_entity_res_linkedin = search_text("site:linkedin.com " + name + " " + entity_context, no_of_results=5) + google_is_entity_res_facebook = search_image("site:facebook.com " + name + " " + entity_context, no_of_results=5) + google_is_entity_res_twitter = search_image("site:twitter.com " + name + " " + entity_context, no_of_results=5) + google_is_entity_res_instagram = search_image("site:instagram.com " + name + " " +entity_context, no_of_results=5) + google_is_entity_res_linkedin = search_image("site:linkedin.com " + name + " " + entity_context, no_of_results=5) + google_is_res_entity = search_image(name + " " + entity_context, no_of_results=5) + google_is_res_name = search_image(name, no_of_results=5) + + data = { + "google_ts":{ + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin + }, + "google_is": { + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + }, + "twitter": twitter_res, + "name": name + } + + return generate_response(data) + + +def get_twitter_usernames(search_data): + twitter_urls = set() + for data in search_data: + twitter_urls.add((data['url'])) + + return get_usernames_from_urls(twitter_urls) + +def get_usernames_from_urls(twitter_urls): + + result = set() + for twitter_url in twitter_urls: + match = re.search(r'^.*?\btwitter\.com/@?(\w{1,15})(?:[?/,].*)?$',twitter_url) + if match: + result.add(match.group(1)) + return result + + +''' +name: Truecaller Data +gender: by gender guesser if not in truecaller response. +email: Truecaller Data +imageUrls: fetch all images that + start with: https://pbs.twimg.com and https://media.licdn.com, + https://yt3.googleusercontent.com +tagsApplicable: we will run anirudhs function on Top 3 results from + * Google + * Facebook + * Instagram + * Linkedin + * Twitter + * Twitter Header +availableApps: fetchAllUrls and then check if present in the map +primaryAddress: Get it from twitter API and GLE in all the entities etc. +additionalAddress: Get it from twitter API and GLE in all the entities etc. +relatedPeople: + TWITTER : URL's and images + LINKEDIN etc: channel and links. +socialFootprint: + return Top 3 links from each => + twitter, instagram, facebook, linkedin. +''' +''' +{ + "truecaller_res": truecaller_res, + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_email": google_ts_res_email, + "google_ts_res_email_name": google_ts_res_email_name, + "google_ts_res_address": google_ts_res_address, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "twitter_res": twitter_res, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin, + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin +} +''' + +# Add similarity later. +def get_facebook_data(data): + + result = list() + platform_keys = ["google_ts_entity_res_facebook", "google_ts_res_facebook"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_linkedin_data(data): + result = list() + platform_keys = ["google_ts_entity_res_linkedin", "google_ts_res_linkedin"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def get_instagram_data(data): + result = list() + platform_keys = ["google_ts_entity_res_instagram", "google_ts_res_instagram"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_twitter_data(data): + result = list() + twitter_profiles = data["twitter"] + for profile in twitter_profiles: + result.append({'url': "https://twitter.com/"+profile['user_screen_name'], 'confidenceScore': 100, 'profileUrl': profile['user_profile_image']}) + + return result + +def get_related_people(data): + return [ + { + "details": data['twitter'][0]['top_commentors'], + "platform":"TWITTER" + }, + { + "details": {}, + "platform":"LINKEDIN" + }, + { + "details": {}, + "platform":"INSTAGRAM" + }, + { + "details": {}, + "platform":"FACEBOOK" + } + ] + +def get_available_apps(data): + available_apps = [] + url_to_enum_dict = get_url_to_enum_dict() + google_res = data['google_ts'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + base_uri = urlparse('{uri.scheme}://{uri.netloc}/'.format(uri=entry['url'])) + if base_uri in url_to_enum_dict: + available_apps.append(url_to_enum_dict[base_uri]) + return available_apps +def get_applicable_tags(data): + google_ts_res = data['google_ts'] + google_is_res = data['google_is'] + + # ORG, GPE, PERSON + org_tags = list() + gpe_tags = list() + person_tags = list() + + for search in google_ts_res: + search_results = google_ts_res[search] + for result in search_results: + entities = extract_entities(result['title']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + + for search in google_is_res: + search_results = google_is_res[search] + for result in search_results: + entities = extract_entities(result['text']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + return { + 'ORG': json.dumps(set(org_tags),default=serialize_sets), + 'PERSON': json.dumps(set(person_tags),default=serialize_sets), + 'GPE': json.dumps(set(gpe_tags),default=serialize_sets), + 'location': data['twitter'][0]['user_location'] if len(data['twitter']) != 0 else "" + } + + +def get_informational_data(data): + result = list() + platform_keys = ["google_ts_res_zabuacorp", "google_ts_res_indiakanoon"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def generate_response(data): + + tag_related_data = get_applicable_tags(data) + result = {} + result['name'] = data["name"] + result['imageUrls'] = get_image_urls(data) + result['socialFootprint'] = dict() + result['socialFootprint']['twitter'] = get_twitter_data(data) + result['socialFootprint']['facebook'] = get_facebook_data(data) + result['socialFootprint']['linkedin'] = get_linkedin_data(data) + result['informationFootprint'] = get_informational_data(data) + result['primaryAddress'] = tag_related_data['location'] + result['tagsApplicable'] = tag_related_data['PERSON'] + result['additionalAddress'] = tag_related_data['GPE'] + result['relatedPeople'] = get_related_people(data) + return result + +def get_image_urls(data): + images = list() + + twitter_res = data['twitter'] + for res in twitter_res: + images.append(res['user_profile_banner']) + images.append(res['user_profile_image']) + + google_res = data['google_is'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + if entry['url'].startswith("https://pbs.twimg.com") or entry['url'].startswith("https://yt3.googleusercontent.com") or entry['url'].startswith("https://media.licdn.com"): + images.append(entry['url']) + return images diff --git a/ICE/components/phone_number_component.py b/ICE/components/phone_number_component.py new file mode 100644 index 00000000..81d693c7 --- /dev/null +++ b/ICE/components/phone_number_component.py @@ -0,0 +1,442 @@ +from audioop import add +from math import factorial +from utils.vpa import get_validity_for_all_vpaIds +from utils.get_entities import extract_entities +from utils.utils import get_url_to_enum_dict +from utils.utils import check_if_whatsapp_exists +from providers.google import search_image +from utils.get_entities import extract_entity_string +from providers.twitter import get_user_information +from providers.google import search_text +from providers.truecaller import numsearch +import re +import gender_guesser.detector as gender +from urllib.parse import urlparse +import json +import random + +def serialize_sets(obj): + if isinstance(obj, set): + return list(obj) + + return obj + +d = gender.Detector() + +def getDetailsFromPhoneNumber(phoneNumber: str) -> dict: + print(phoneNumber) + ''' + 1. Call the truecaller API to get the data + 2. Call the google search API to get the data + search_text("Raghav Maheshwari", no_of_results=10) + Google: Name + Google: Email + Google: Name + Email + Google: Name + address['city'] + Google: Name + Twitter [omitting enity here as it wil not be apt in most cases] + Google: Name + Facebook + Google: Name + Instagram + Google: Name + Linkedin + 3. Call the Twitter API using usernames from the above result. + getEntityForDescription + search_text("Raghav Maheshwari", no_of_results=10) + Google: Name + entity + Facebook + Google: Name + entity + Instagram + Google: Name + entity + Linkedin + ''' + + truecaller_res = numsearch(phoneNumber) + + address_details = "" + try: + address_details = truecaller_res['address'][0]['address'] + except Exception as e: + address_details = "" + + + google_ts_res_plain = search_text(truecaller_res['name'], no_of_results=10) + google_ts_res_email = search_text(truecaller_res['email'], no_of_results=10) + google_ts_res_email_name = search_text(str.split(truecaller_res['email'], '@')[0], no_of_results=10) + google_ts_res_address = search_text(truecaller_res['name'] + address_details, no_of_results=10) + google_ts_res_facebook = search_text("site:facebook.com " + truecaller_res['name'], no_of_results=5) + google_ts_res_twitter = search_text("site:twitter.com " + truecaller_res['name'], no_of_results=5) + google_ts_res_instagram = search_text("site:instagram.com " + truecaller_res['name'], no_of_results=5) + google_ts_res_linkedin = search_text("site:linkedin.com " + truecaller_res['name'], no_of_results=5) + + twitter_res = list() + filtered_usernames = get_twitter_usernames(google_ts_res_twitter) + for username in filtered_usernames: + twitter_res.append(get_user_information(username)) + + google_ts_res_zabuacorp = search_text(truecaller_res['name'] + " Zaubacorp", no_of_results=5) + google_ts_res_indiakanoon = search_text(truecaller_res['name'] + " IndiaKanoon", no_of_results=5) + + if(len(twitter_res) > 0): + top_tweet_desc = twitter_res[0]['user_description'] + entity_context = extract_entity_string(top_tweet_desc) + google_ts_entity_res_facebook = search_text("site:facebook.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_ts_entity_res_twitter = search_text("site:twitter.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_ts_entity_res_instagram = search_text("site:instagram.com " + truecaller_res['name'] + " " +entity_context, no_of_results=5) + google_ts_entity_res_linkedin = search_text("site:linkedin.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_facebook = search_image("site:facebook.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_twitter = search_image("site:twitter.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_instagram = search_image("site:instagram.com " + truecaller_res['name'] + " " +entity_context, no_of_results=5) + google_is_entity_res_linkedin = search_image("site:linkedin.com " + truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_res_entity = search_image(truecaller_res['name'] + " " + entity_context, no_of_results=5) + google_is_res_name = search_image(truecaller_res['name'], no_of_results=5) + + data = { + "truecaller": truecaller_res, + "google_ts":{ + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_email": google_ts_res_email, + "google_ts_res_email_name": google_ts_res_email_name, + "google_ts_res_address": google_ts_res_address, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin + }, + "google_is": { + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + }, + "twitter": twitter_res, + "phoneNumber": phoneNumber + } + + return generate_response(data) + + +def get_twitter_usernames(search_data): + twitter_urls = set() + for data in search_data: + twitter_urls.add((data['url'])) + + return get_usernames_from_urls(twitter_urls) + +def get_usernames_from_urls(twitter_urls): + + result = set() + for twitter_url in twitter_urls: + match = re.search(r'^.*?\btwitter\.com/@?(\w{1,15})(?:[?/,].*)?$',twitter_url) + if match: + result.add(match.group(1)) + return result + + +''' +name: Truecaller Data +gender: by gender guesser if not in truecaller response. +email: Truecaller Data +imageUrls: fetch all images that + start with: https://pbs.twimg.com and https://media.licdn.com, + https://yt3.googleusercontent.com +tagsApplicable: we will run anirudhs function on Top 3 results from + * Google + * Facebook + * Instagram + * Linkedin + * Twitter + * Twitter Header +availableApps: fetchAllUrls and then check if present in the map +primaryAddress: Get it from twitter API and GLE in all the entities etc. +additionalAddress: Get it from twitter API and GLE in all the entities etc. +relatedPeople: + TWITTER : URL's and images + LINKEDIN etc: channel and links. +socialFootprint: + return Top 3 links from each => + twitter, instagram, facebook, linkedin. +''' +''' +{ + "truecaller_res": truecaller_res, + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_email": google_ts_res_email, + "google_ts_res_email_name": google_ts_res_email_name, + "google_ts_res_address": google_ts_res_address, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "twitter_res": twitter_res, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin, + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin +} +''' + +# Add similarity later. +def get_facebook_data(data): + + result = list() + platform_keys = ["google_ts_entity_res_facebook", "google_ts_res_facebook"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def get_linkedin_data(data): + result = list() + platform_keys = ["google_ts_entity_res_linkedin", "google_ts_res_linkedin"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def get_instagram_data(data): + result = list() + platform_keys = ["google_ts_entity_res_instagram", "google_ts_res_instagram"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def get_twitter_data(data): + result = list() + twitter_profiles = data["twitter"] + for profile in twitter_profiles: + result.append({'url': "https://twitter.com/"+profile['user_screen_name'], 'confidenceScore': 100, 'profileUrl': profile['user_profile_image']}) + + return result + +def get_related_people(data): + return [ + { + "details": data['twitter'][0]['top_commentors'], + "platform":"TWITTER" + }, + { + "details": {}, + "platform":"LINKEDIN" + }, + { + "details": {}, + "platform":"INSTAGRAM" + }, + { + "details": {}, + "platform":"FACEBOOK" + } + ] + +def get_whatsapp_details(data): + phone_number = data["phoneNumber"] + if(check_if_whatsapp_exists(phone_number)): + return { + 'isAvailable': True, + 'link': "https://wa.me/" + phone_number + } + else: + return { + 'isAvailable': False, + 'link': "https://wa.me/" + phone_number + } +def get_available_apps(data): + available_apps = [] + url_to_enum_dict = get_url_to_enum_dict() + google_res = data['google_ts'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + base_uri = urlparse('{uri.scheme}://{uri.netloc}/'.format(uri=entry['url'])) + if base_uri in url_to_enum_dict: + available_apps.append(url_to_enum_dict[base_uri]) + return available_apps +def get_applicable_tags(data): + google_ts_res = data['google_ts'] + google_is_res = data['google_is'] + + # ORG, GPE, PERSON + org_tags = list() + gpe_tags = list() + person_tags = list() + + for search in google_ts_res: + search_results = google_ts_res[search] + for result in search_results: + entities = extract_entities(result['title']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + + for search in google_is_res: + search_results = google_is_res[search] + for result in search_results: + entities = extract_entities(result['text']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + return { + 'ORG': json.dumps(set(org_tags),default=serialize_sets), + 'PERSON': json.dumps(set(person_tags),default=serialize_sets), + 'GPE': json.dumps(set(gpe_tags),default=serialize_sets), + 'location': data['twitter'][0]['user_location'] if len(data['twitter']) != 0 else "" + } + + +def get_informational_data(data): + result = list() + platform_keys = ["google_ts_res_zabuacorp", "google_ts_res_indiakanoon"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + return result + +def generate_response(data): + + tag_related_data = get_applicable_tags(data) + result = {} + result['name'] = data['truecaller']['name'] + result['gender'] = data['truecaller']['gender'] if data['truecaller']['gender'] != "UNKNOWN" else d.get_gender(u"Bob") + result['email'] = data['truecaller']['email'] + result['imageUrls'] = get_image_urls(data) + result['socialFootprint'] = dict() + result['socialFootprint']['twitter'] = get_twitter_data(data) + result['socialFootprint']['facebook'] = get_facebook_data(data) + result['socialFootprint']['linkedin'] = get_linkedin_data(data) + result['informationFootprint'] = get_informational_data(data) + result['financialFootprint'] = get_validity_for_all_vpaIds(data["phoneNumber"]) + result['whatsappDetails'] = get_whatsapp_details(data) + # result['availableApps'] = get_available_apps(data) + result['primaryAddress'] = tag_related_data['location'] + result['tagsApplicable'] = tag_related_data['PERSON'] + result['additionalAddress'] = tag_related_data['GPE'] + result['relatedPeople'] = get_related_people(data) + return result + +def get_image_urls(data): + images = list() + + twitter_res = data['twitter'] + for res in twitter_res: + images.append(res['user_profile_banner']) + images.append(res['user_profile_image']) + + google_res = data['google_is'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + if entry['url'].startswith("https://pbs.twimg.com") or entry['url'].startswith("https://yt3.googleusercontent.com") or entry['url'].startswith("https://media.licdn.com"): + images.append(entry['url']) + return images + +''' + name + twitter + instagram + facebook + linkedin + Location + carrier + email + address +''' +def getDetailsFromPhoneNumberForTwilio(phoneNumber: str) -> dict: + phoneNumberWith91 = "+91" + phoneNumber + + print("test1") + truecaller_res = numsearch(phoneNumberWith91) + google_ts_res_twitter = search_text("site:twitter.com " + truecaller_res['name'], no_of_results=1) + + filtered_username = get_twitter_usernames(google_ts_res_twitter).pop() + twitter_res = get_user_information(filtered_username) + + google_ts_entity_res_facebook = search_text("site:facebook.com " + truecaller_res['name'] , no_of_results=5) + google_ts_entity_res_linkedin = search_text("site:linkedin.com " + truecaller_res['name'] , no_of_results=5) + print("test4") + vpa_data = get_validity_for_all_vpaIds(phoneNumber) + data = { + 'name':truecaller_res['name'], + 'email':truecaller_res['email'], + 'carrier':truecaller_res['carrier'], + 'facebook':google_ts_entity_res_facebook[0]['url'], + 'linkedin':google_ts_entity_res_linkedin[0]['url'], + 'twitter':"https://www.twitter.com/" + twitter_res['user_screen_name'], + 'location':twitter_res['user_location'], + 'email':truecaller_res['email'], + 'availableOnPhonePe': "Yes" if vpa_data['phonepe']['isAvailable'] else "No", + 'availableOnPaytm' : "Yes" if vpa_data['paytm']['isAvailable'] else "No" + } + str = "*Found the below details:* \n\n" + for key in data: + str += '*' + key.capitalize() + '*' + ": " + data[key] + '\n' + return str diff --git a/ICE/components/twitter_component.py b/ICE/components/twitter_component.py new file mode 100644 index 00000000..ce9c1c8e --- /dev/null +++ b/ICE/components/twitter_component.py @@ -0,0 +1,336 @@ +from math import factorial +from utils.vpa import get_validity_for_all_vpaIds +from utils.get_entities import extract_entities +from utils.utils import get_url_to_enum_dict +from utils.utils import check_if_whatsapp_exists +from providers.google import search_image +from utils.get_entities import extract_entity_string +from providers.twitter import get_user_information +from providers.google import search_text +from providers.truecaller import numsearch +import re +import random +import gender_guesser.detector as gender +from urllib.parse import urlparse +import json + +def serialize_sets(obj): + if isinstance(obj, set): + return list(obj) + + return obj + +d = gender.Detector() + +def getDetailsFromTwitterHandle(username: str) -> dict: + twitter_info = get_user_information(username) + + google_ts_res_plain = search_text(twitter_info['user_name'], no_of_results=10) + google_ts_res_facebook = search_text("site:facebook.com " + twitter_info['user_name'], no_of_results=5) + google_ts_res_twitter = search_text("site:twitter.com " + twitter_info['user_name'], no_of_results=5) + google_ts_res_instagram = search_text("site:instagram.com " + twitter_info['user_name'], no_of_results=5) + google_ts_res_linkedin = search_text("site:linkedin.com " + twitter_info['user_name'], no_of_results=5) + + google_ts_res_zabuacorp = search_text(twitter_info['user_name'] + " Zaubacorp", no_of_results=5) + google_ts_res_indiakanoon = search_text(twitter_info['user_name'] + " IndiaKanoon", no_of_results=5) + + top_tweet_desc = twitter_info['user_description'] + entity_context = extract_entity_string(top_tweet_desc) + google_ts_entity_res_facebook = search_text("site:facebook.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_ts_entity_res_twitter = search_text("site:twitter.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_ts_entity_res_instagram = search_text("site:instagram.com " + twitter_info['user_name'] + " " +entity_context, no_of_results=5) + google_ts_entity_res_linkedin = search_text("site:linkedin.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_facebook = search_image("site:facebook.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_twitter = search_image("site:twitter.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_entity_res_instagram = search_image("site:instagram.com " + twitter_info['user_name'] + " " +entity_context, no_of_results=5) + google_is_entity_res_linkedin = search_image("site:linkedin.com " + twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_res_entity = search_image(twitter_info['user_name'] + " " + entity_context, no_of_results=5) + google_is_res_name = search_image(twitter_info['user_name'], no_of_results=5) + + data = { + "google_ts":{ + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin + }, + "google_is": { + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + }, + "twitter": [twitter_info] + } + + return generate_response(data) + + +def get_twitter_usernames(search_data): + twitter_urls = set() + for data in search_data: + twitter_urls.add((data['url'])) + + return get_usernames_from_urls(twitter_urls) + +def get_usernames_from_urls(twitter_urls): + + result = set() + for twitter_url in twitter_urls: + match = re.search(r'^.*?\btwitter\.com/@?(\w{1,15})(?:[?/,].*)?$',twitter_url) + if match: + result.add(match.group(1)) + return result + + +''' +name: Truecaller Data +gender: by gender guesser if not in truecaller response. +email: Truecaller Data +imageUrls: fetch all images that + start with: https://pbs.twimg.com and https://media.licdn.com, + https://yt3.googleusercontent.com +tagsApplicable: we will run anirudhs function on Top 3 results from + * Google + * Facebook + * Instagram + * Linkedin + * Twitter + * Twitter Header +availableApps: fetchAllUrls and then check if present in the map +primaryAddress: Get it from twitter API and GLE in all the entities etc. +additionalAddress: Get it from twitter API and GLE in all the entities etc. +relatedPeople: + TWITTER : URL's and images + LINKEDIN etc: channel and links. +socialFootprint: + return Top 3 links from each => + twitter, instagram, facebook, linkedin. +''' +''' +{ + "truecaller_res": truecaller_res, + "google_ts_res_plain": google_ts_res_plain, + "google_ts_res_email": google_ts_res_email, + "google_ts_res_email_name": google_ts_res_email_name, + "google_ts_res_address": google_ts_res_address, + "google_ts_res_facebook": google_ts_res_facebook, + "google_ts_res_twitter": google_ts_res_twitter, + "google_ts_res_instagram": google_ts_res_instagram, + "google_ts_res_linkedin": google_ts_res_linkedin, + "twitter_res": twitter_res, + "google_is_res_entity": google_is_res_entity, + "google_is_res_name": google_is_res_name, + "google_ts_res_zabuacorp": google_ts_res_zabuacorp, + "google_ts_res_indiakanoon": google_ts_res_indiakanoon, + "google_ts_entity_res_facebook": google_ts_entity_res_facebook, + "google_ts_entity_res_twitter": google_ts_entity_res_twitter, + "google_ts_entity_res_instagram": google_ts_entity_res_instagram, + "google_ts_entity_res_linkedin": google_ts_entity_res_linkedin, + "google_is_entity_res_facebook": google_is_entity_res_facebook, + "google_is_entity_res_twitter": google_is_entity_res_twitter, + "google_is_entity_res_instagram": google_is_entity_res_instagram, + "google_is_entity_res_linkedin": google_is_entity_res_linkedin +} +''' + +# Add similarity later. +def get_facebook_data(data): + + result = list() + platform_keys = ["google_ts_entity_res_facebook", "google_ts_res_facebook"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_linkedin_data(data): + result = list() + platform_keys = ["google_ts_entity_res_linkedin", "google_ts_res_linkedin"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_instagram_data(data): + result = list() + platform_keys = ["google_ts_entity_res_instagram", "google_ts_res_instagram"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def get_twitter_data(data): + result = list() + twitter_profiles = data["twitter"] + for profile in twitter_profiles: + result.append({'url': "https://twitter.com/"+profile['user_screen_name'], 'confidenceScore': 100, 'profileUrl': profile['user_profile_image']}) + + return result + +def get_related_people(data): + return [ + { + "details": data['twitter'][0]['top_commentors'], + "platform":"TWITTER" + }, + { + "details": {}, + "platform":"LINKEDIN" + }, + { + "details": {}, + "platform":"INSTAGRAM" + }, + { + "details": {}, + "platform":"FACEBOOK" + } + ] + +def get_available_apps(data): + available_apps = [] + url_to_enum_dict = get_url_to_enum_dict() + google_res = data['google_ts'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + base_uri = urlparse('{uri.scheme}://{uri.netloc}/'.format(uri=entry['url'])) + if base_uri in url_to_enum_dict: + available_apps.append(url_to_enum_dict[base_uri]) + return available_apps +def get_applicable_tags(data): + google_ts_res = data['google_ts'] + google_is_res = data['google_is'] + + # ORG, GPE, PERSON + org_tags = list() + gpe_tags = list() + person_tags = list() + + for search in google_ts_res: + search_results = google_ts_res[search] + for result in search_results: + entities = extract_entities(result['title']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + + for search in google_is_res: + search_results = google_is_res[search] + for result in search_results: + entities = extract_entities(result['text']) + for entity in entities: + if entity == "ORG": + org_tags.extend(entities[entity]) + elif entity == "GPE": + gpe_tags.extend(entities[entity]) + else: + person_tags.extend(entities[entity]) + + return { + 'ORG': json.dumps(set(org_tags),default=serialize_sets), + 'PERSON': json.dumps(set(person_tags),default=serialize_sets), + 'GPE': json.dumps(set(gpe_tags),default=serialize_sets), + 'location': data['twitter'][0]['user_location'] if len(data['twitter']) != 0 else "" + } + + +def get_informational_data(data): + result = list() + platform_keys = ["google_ts_res_zabuacorp", "google_ts_res_indiakanoon"] + data1 = data["google_ts"][platform_keys[0]] + data2 = data["google_ts"][platform_keys[1]] + + i = 0 + for res in data1: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + + i = 0 + for res in data2: + confidenceScore = 90 - (i+1)*random.uniform(0,10) + result.append({'url': res['url'], 'confidenceScore': confidenceScore}) + i += 1 + return result + +def generate_response(data): + + tag_related_data = get_applicable_tags(data) + result = {} + result['name'] = data['twitter'][0]['user_name'] + result['imageUrls'] = get_image_urls(data) + result['socialFootprint'] = dict() + result['socialFootprint']['twitter'] = get_twitter_data(data) + result['socialFootprint']['facebook'] = get_facebook_data(data) + result['socialFootprint']['linkedin'] = get_linkedin_data(data) + result['informationFootprint'] = get_informational_data(data) + result['primaryAddress'] = tag_related_data['location'] + result['tagsApplicable'] = tag_related_data['PERSON'] + result['additionalAddress'] = tag_related_data['GPE'] + result['relatedPeople'] = get_related_people(data) + return result + +def get_image_urls(data): + images = list() + + twitter_res = data['twitter'] + for res in twitter_res: + images.append(res['user_profile_banner']) + images.append(res['user_profile_image']) + + google_res = data['google_is'] + for res in google_res: + individual_res = google_res[res] + for entry in individual_res: + if entry['url'].startswith("https://pbs.twimg.com") or entry['url'].startswith("https://yt3.googleusercontent.com") or entry['url'].startswith("https://media.licdn.com"): + images.append(entry['url']) + return images diff --git a/ICE/providers/__init__.py b/ICE/providers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ICE/providers/google.py b/ICE/providers/google.py new file mode 100644 index 00000000..5f9c405c --- /dev/null +++ b/ICE/providers/google.py @@ -0,0 +1,57 @@ +import json +import requests + + +def search_text(query, no_of_results): + """ + Args: + query (str): text to search + no_of_results (int): How many results do you want + + Returns: + result (list({'title': str, 'url': str, 'text': str})): list of search results + """ + if(query == ""): + return [] + url = f"https://customsearch.googleapis.com/customsearch/v1?q={query}&key={API_KEY}&cx=" + + results = [] + for i in range(((no_of_results-1)//10) + 1): + if i != 0: + url = f"{url}&start={i*10 + 1}" + response = requests.get(url) + result = json.loads(response.text) + for item in result.get('items', list()): + results.append({ + 'title': item.get('title', ''), + 'url': item.get('link', ''), + 'text': item.get('snippet', '') + }) + return results[:no_of_results] + + +def search_image(query, no_of_results): + """ + Args: + query (str): text to search + no_of_results (int): How many results do you want + + Returns: + result (list({'url': str, 'text': str})): list of image search results + """ + url = f"https://customsearch.googleapis.com/customsearch/v1?q={query}&key={API_KEY}&cx=" + url = f"{url}&searchType=image" + + results = [] + for i in range(((no_of_results-1)//10) + 1): + if i != 0: + url = f"{url}&start={i*10 + 1}" + response = requests.get(url) + result = json.loads(response.text) + for item in result.get('items', list()): + results.append({ + 'url': item['link'], + 'text': item['snippet'] if 'snippet' in item else "" + }) + return results[:no_of_results] + diff --git a/ICE/providers/truecaller.py b/ICE/providers/truecaller.py new file mode 100644 index 00000000..22be0ff6 --- /dev/null +++ b/ICE/providers/truecaller.py @@ -0,0 +1,37 @@ +import requests +from typing import Dict + +def numsearch(num:str) -> Dict: + authkey = creds['auth'] + """Searches the given number through TrueCaller Directory and returs the details of the given user + + Args: + num (str): "Number to be searched in the directory format 'NUMBER'" + authkey (_type_, optional): Defaults to authkey. + + Returns: + OUTPUT (Dict): {'name': 'Raghav Maheshwari', 'gender': 'UNKNOWN', + 'address': [{'address': 'IN', 'city': 'Uttar Pradesh West', 'countryCode': 'IN', 'timeZone': '+05:30', 'type': 'address'}], + 'email': 'raghav.ddps2@gmail.com'} + """ + params = {'q':num, 'countryCode':'', 'type':'4', 'locAddr':'', 'placement':'SEARCHRESULTS,HISTORY,DETAILS', 'encoding':'json'} + resp = requests.get('https://search5-noneu.truecaller.com/v2/search', headers=headers, params=params) + resp = resp.json() + + output = {"name":"","gender":"","address":[],"image":"","email":""} + if resp['data'][0].get('name'): + output["name"] = resp["data"][0]["name"] + if resp['data'][0].get('gender'): + output["gender"] = resp["data"][0]["gender"] + if resp['data'][0].get('addresses'): + output["address"] = resp["data"][0]["addresses"] + if resp['data'][0].get('image'): + output["image"] = resp["data"][0]["image"] + if resp['data'][0].get('phones'): + if "carrier" in resp['data'][0]["phones"][0]: + output["carrier"] = resp["data"][0]["phones"][0]["carrier"] + if resp['data'][0].get('internetAddresses'): + if "id" in resp["data"][0]["internetAddresses"][0]: + output["email"] = resp["data"][0]["internetAddresses"][0]["id"] + + return output \ No newline at end of file diff --git a/ICE/providers/twitter.py b/ICE/providers/twitter.py new file mode 100644 index 00000000..0dfa1c1f --- /dev/null +++ b/ICE/providers/twitter.py @@ -0,0 +1,49 @@ +import tweepy +import json +import time +import datetime + +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth) + +def filter_tweets(tweets): + filteredTweets = list() + for tweet in tweets: + filteredTweets.append({'tweet': tweet.text, 'tweet_date': tweet.created_at.now().strftime("%Y-%m-%d %H:%M:%S")}) + return filteredTweets + +def get_user_information(username): + user = api.get_user(screen_name=username) + tweets = api.user_timeline(screen_name=username, count=200) + sorted_tweets = sorted(tweets, key=lambda x: x.favorite_count, reverse=True)[:10] + + replies = list() + top_commentors = dict() + + # Fetching list of people who interacted in last 20 tweets + # Not the people most interacted with. + for tweet in tweepy.Cursor(api.search_tweets,q='to:'+username, result_type='recent').items(10): + replies.append(tweet) + for reply in replies: + commentor = api.get_user(screen_name=reply.user.screen_name) + top_commentors[reply.user.screen_name] = { + 'twitter_url':'https://twitter.com/' + reply.user.screen_name, + 'twitter_profile_url': commentor.profile_image_url + } + data = { + "user_id": user.id, + "user_name": user.name, + "user_screen_name": user.screen_name, + "user_entities": json.dumps(user.entities), + "user_location": json.dumps(user.location), + "user_description": user.description, + "user_followers_count": user.followers_count, + "user_friends_count": user.friends_count, + "user_created_at": user.created_at, + "user_profile_banner": user.profile_image_url_https, + "user_profile_image": user.profile_image_url_https, + "user_top_tweets": json.dumps(filter_tweets(sorted_tweets)), + "top_commentors": top_commentors + } + return data \ No newline at end of file diff --git a/ICE/requirements.txt b/ICE/requirements.txt new file mode 100644 index 00000000..f8168bdf --- /dev/null +++ b/ICE/requirements.txt @@ -0,0 +1,65 @@ +cachetools==5.2.0 +certifi==2022.5.18.1 +charset-normalizer==2.0.12 +click==8.1.3 +cmdstanpy==0.9.5 +convertdate==2.4.0 +cycler==0.11.0 +Cython==0.29.30 +distlib==0.3.4 +ephem==4.1.3 +fbprophet==0.7.1 +filelock==3.7.1 +Flask==2.1.2 +Flask-Cors==3.0.10 +fonttools==4.33.3 +google-api-core==2.8.1 +google-auth==2.7.0 +google-cloud-core==2.3.1 +google-cloud-firestore==2.5.2 +googleapis-common-protos==1.56.2 +grpcio==1.46.3 +grpcio-status==1.46.3 +hijri-converter==2.2.4 +holidays==0.14.2 +idna==3.3 +importlib-metadata==4.11.4 +itsdangerous==2.1.2 +Jinja2==3.1.2 +kiwisolver==1.4.2 +korean-lunar-calendar==0.2.1 +LunarCalendar==0.0.9 +MarkupSafe==2.1.1 +matplotlib==3.5.2 +numpy==1.22.4 +oauthlib==3.2.2 +packaging==21.3 +pandas==1.4.2 +Pillow==9.1.1 +platformdirs==2.5.2 +plotly==5.8.2 +prophet==1.0.1 +proto-plus==1.20.5 +protobuf==3.20.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +PyJWT==2.4.0 +PyMeeus==0.5.11 +pyparsing==3.0.9 +pystan==2.19.1.1 +python-dateutil==2.8.2 +pytz==2022.1 +requests==2.28.0 +requests-oauthlib==1.3.1 +rsa==4.8 +six==1.16.0 +tenacity==8.0.1 +tqdm==4.64.0 +tweepy==4.12.1 +twilio==6.38.0 +ujson==5.3.0 +urllib3==1.26.9 +virtualenv==20.14.1 +Werkzeug==2.1.2 +zipp==3.8.0 +requests diff --git a/ICE/utils/__init__.py b/ICE/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ICE/utils/__pycache__/__init__.cpython-38.pyc b/ICE/utils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54ec0e8833875f92f520e233f1065ab20c9af0ba GIT binary patch literal 158 zcmWIL<>g`k0-K_H$sqbMh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6vRKeRZts93)! zF+C%(EH^PDwK$_Zu_#mDCAB!aB)>r4J2)WTBQZHU-pIhnSl`pxRll?(GpATTK0Y%q bvm`!Vub}c4hfQvNN@-529mw#{K+FIDJ9i~g literal 0 HcmV?d00001 diff --git a/ICE/utils/__pycache__/get_entities.cpython-38.pyc b/ICE/utils/__pycache__/get_entities.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9464c76d06d052925c49d2808a6983655d0a22e4 GIT binary patch literal 972 zcmZ`&&2AGh5VpO}W}E&fR8WbVl@pN=L4{L<5U8yR0@^B3FN>?6Je`x?hx z)U5piT(sB3cA1M_I+2FwnQ0PBx4~QbHrFOz6Gf7hN+f6EIME}QHR7YOp%P}IiVQo9 zKSsg3u?0hK!er14ea3Taup!{d44(ws=xAde%*ZS_q4&{w1$=mcG5WAEBR~pYec#Pb zMZr?$?qR*W;4GDrnSo%UFuBQvX2+=(oRx)-n(0`uvbV@iYc&S;QuTG)uiVZfJF)0}+V2|+gvQas#(rnl z1#Tc3M{|E?vucAvjun9#SSq28mI8MjhcZ1DGO=Mg9*csj3Vd)$y|V!?GaHl>V*~j1 z5~(^QuXbC9S}5I8p7tmm2t7DURo>bbdT7dV>rM9{d6j0vWb^Unlh*64oz}$UQn&iT zB-izNG@jZzR8ytd-(U#v+IQemgd<$TcZphu*#9Mc3fXH(`9e+%a@OYoC+AMkzzexP zBODKjdOSm2eTg;|rT2-eD7Z0Sjv7yk8X#Rc7q7tT$_S2Wy(Dr8vwR{ gy@$C)#8dKYqGzL$Po#M6THY5f!YjCrYouQP4UGBj00000 literal 0 HcmV?d00001 diff --git a/ICE/utils/__pycache__/pdf.cpython-38.pyc b/ICE/utils/__pycache__/pdf.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44041c012cee126f6199e6b4220e4ea47783af9d GIT binary patch literal 628 zcmYjPJ8u**5VrSqF9oGQsa>SVHdz!JbV3nP-~dHJh)V=)vodGz-E!XbG4>|n6bhs? z`41FO@|WCDQ1KV27;jGKW;IXE;~URj?CcB)($|AuvoDm8-`?4#5S=H;<`D`(1XU!z z(=?~(2P&w;JglQUs^dJSL?*H)!f%0yM0^wFeL<%rP5xjtd6kAPo+xgXZeV01in3DD z^+0}VrLk$?Mhjc3!dR%9xl2IW7MdBN`cK|YkWZ1#K8p3OCEw{a;`$L#@|HCBDDmHW zL^dVwhzN0dw7z{V8wuP>!&ahXCFq*@S?q2W8x9%L3Czv7v%I&3g&kmCI^Lwc>2Pgj ze$ZNt+0=5|8m3D&)6Jy>Q|X4CLo3-en{Z`h*Lghzxzb<{5y#{lcTUh)R@RL?sLk#v zg&3FnNnn8L?{RivuwMpzzTlTNUr4jK;!tHzrCD0N%Fd>*ix+&hERG%=JG8Gp7uj<`?%0sEz$Z~y=R literal 0 HcmV?d00001 diff --git a/ICE/utils/__pycache__/utils.cpython-38.pyc b/ICE/utils/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b464d22dc75d82a013daa8fab4be03fac353d0f GIT binary patch literal 1172 zcmah}L2uhO6ecA_mYq0FilW$h9Cny7AWEG!b&F!C;<$;MST<}0X?PK6k+Bual19>X zZ8Sagx_{8aw%h)acGzL3opv8EU{A7v<~9<1dLQ{7-{a#)T~wqFZ zyG;l13Wj_Ff+2<@MDni|;i*P6jF?VzX0VMYs~ztu@bXB8N_6GfNJHxz!SMO zoEgVqph#T|Q+m)m^5m1Jjpxtk;URy@8+?Dr_gTGhK%dhFJ$yzF*x`Qfv>x0vCXa)B zhr8`QfF=z29SDY*#)=%6nMY=2R$-gaw^)^JvwLiZ?S9^cOzyi`$-8WOUaOy3=WX|_ zb=-Eda@!qv&Pm_tWY%k^**fZW&xkc}onG_Q@rXINY@eK55tDQ~j@Kon-f7ou^;*O@ z={Z+q1BBNhrqgjq*Cp2L^LFpd>4RVA%JupVF`PH;E-@~ST79B-&z;)ta#PJirMTD& zlIe0Q3gZc9VO*eU7)wP*f=*Y9*EHb6B$=#`jA2?8#X87z5i3E9y)1eYu0mL| z;X3=xB%%Qw%G}^LBua1{U&KGJ>ANW%b2*+<5!%OGPE?ZGXM>)9N`r}C z->)Co?PklKsW6f&xtA`o2P3ZhnTULq_&lCXeFhW)o+9}z2&8HFfBN5|40bgwGa_)$ zOu^Mm!~xE|*Da@b{s;;}^*~vEqtFdz_ybZJ(}2$UPuea1fPdD1LCp9XU7`UpkTNC} z@dl$?cm&JJCAvbt!ZR4a#a|$U!Wg%O0rDY{p_a}S-Q#zC(GgAuOkYsDM4qQ<)>D4weOYk3OTq>FAP%=@4 zV`RpSOyaEEdLQsqg-Kk~MSjjqPi88^{L4iNE1&+VNWu@V{W)+<(lqt?`w+H#1go<6 zO<=A3K-KV LW#KJsVC&(33Dz}o literal 0 HcmV?d00001 diff --git a/ICE/utils/__pycache__/vpa.cpython-38.pyc b/ICE/utils/__pycache__/vpa.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..631e7132ca7a7f156bdb16416c6c970e47539818 GIT binary patch literal 1457 zcmZ`&OK;mo5Z+xbAF?gibCzuU0K?a^vL%MWK##FekSk4T4ZH-x%|3rniKrm}#bf;rXroZt0 zAdBicNW@WkS`X8Dl=wm4bYx;p64u-gf(N@Vv$gCkR8Cnak?wU8wEuKq?JqUsfh6oI9GCY$thZv~W8z5a8CF z!BDMRChf_jW*454Cw-}nE_jw|Q*gXMVaI78z+~j9sab{9SFv&cMs(L%nrQh8QL|vU zT=Dlbk1?gWG5pd)qX`#?qhBdL2P&HN|3&sy6-*lR5>F5mNa);IjViDK;!vY?&~ zi%J{rT^7TUt2q#aa*NUO`NMH%nL=Yo@z2%5Qn0N`de31qY6u|n2l6A!X+(fd^74xA z3oEw_?J{AH=@=*$&MTgCV~u&2zoe8H`?Z66Al_XdNd5uP&xvt+Q|gS8+@@qS5B@fK zACMR1XD%wc(4x70W*>r{utU(GWu_XV5B|Spj)J7;_p``X3=6iQMe2)yZFLQLDOJ&^ z>>q_)s}PF9+S>TK#wM_H5tu-|1Lo!|5SaE8(~_qV(6(9gN_>=yG42(cg`KG=F=`sK zRZt*YMP=g@`v!WY<`fn%pk~UbEEpo;@6kExQH8&2teDO^X(Ai_<0GjG2im-^lK-&z@34LgK>`N94a3@| zf?oim$Y##Q^bz@HCAWl~bL4V?fPOo-4UVyWgCIX+o^2cUM$h@l#{$@m=$MVI2jt}D zm;q02W5id7M={618S%L@;YjH`4ovmAZiuI4#%Uyj8-9zaukjeV3|M5XXq`j?f>1 z>jYfpQ2v)=I{+uAGw{q?jIh6_xOyLaYHrE;gkT(!vaG_=MycDVqSLj5%Mwa5y$8ps T=*Myer%HPuJnAuT)|>waP!Vbf literal 0 HcmV?d00001 diff --git a/ICE/utils/get_entities.py b/ICE/utils/get_entities.py new file mode 100644 index 00000000..ac66da97 --- /dev/null +++ b/ICE/utils/get_entities.py @@ -0,0 +1,37 @@ +import spacy +from collections import defaultdict +from typing import Dict, List +nlp = spacy.load("en_core_web_sm") + +def extract_entities(text:str)->Dict: + """Given a text find all the entities based on the type of entity + + Args: + text (str): Raghav works in Microsoft + + Returns: + Dict: {"PERSON":["Raghav"],"ORG":["Microsoft"]} + """ + doc = nlp(text) + out = defaultdict(list) + for ent in doc.ents: + out[ent.label_].append(ent.text) + + return out + +def extract_entity_string(text:str)->Dict: + """Given a text find all the entities based on the type of entity + + Args: + text (str): Raghav works in Microsoft + + Returns: + Dict: {"PERSON":["Raghav"],"ORG":["Microsoft"]} + """ + doc = nlp(text) + out = "" + for ent in doc.ents: + " ".join((ent.text)) + if out == "": + return text + return out \ No newline at end of file diff --git a/ICE/utils/pdf.py b/ICE/utils/pdf.py new file mode 100644 index 00000000..adf4f26c --- /dev/null +++ b/ICE/utils/pdf.py @@ -0,0 +1,17 @@ +import pdfkit +import json +from flask import Flask, send_file, request +def generate_pdf_from_html(html_string:str) -> bool: + """Generates pdf from html string + + Args: + html_string (str): + + Returns: + bool: Status of conversion True | False + """ + try: + pdfkit.from_string(html_string,'report.pdf') + except: + return False + return True \ No newline at end of file diff --git a/ICE/utils/utils.py b/ICE/utils/utils.py new file mode 100644 index 00000000..41719e42 --- /dev/null +++ b/ICE/utils/utils.py @@ -0,0 +1,39 @@ +import requests +from typing import Dict +def get_url_to_enum_dict(): + return { + "twitter.com": "TWITTER", + "linkedin.com": "LINKEDIN", + "instagram.com": "INSTAGRAM", + "facebook.com": "FACEBOOK", + "snapchat.com": "SNAPCHAT", + "swiggy.com": "SWIGGY", + "zomato.com": "ZOMATO", + "phonepe.com": "PHONEPE", + "pay.google.com": "GPAY", + "paytm.com": "PAYTM", + "amazon.in": "AMAZON", + "amazon.com": "AMAZON", + "flipkart.com": "FLIPKART", + "myntra.com": "MYNTRA", + "ajio.com": "AJIO", + "uber.com": "UBER", + "olacabs.com": "OLA" + } + + +def check_if_whatsapp_exists(lnum:str)-> Dict: + try: + + """Checks if the number has a whatsapp account + + Args: + lnum (str): given number of the person ("8384852943") + Returns: + out (Dict): {'balance': 8, 'status': True, 'numberstatus': True, 'businessnumber': False} + """ + out = requests.get(url="https://proweblook.com/api/v1/checkwanumber",params= {"number":lnum,"api_key":api_key}) + + return out.json()['numberstatus'] + except Exception as e: + return True \ No newline at end of file diff --git a/ICE/utils/vpa.py b/ICE/utils/vpa.py new file mode 100644 index 00000000..d8086359 --- /dev/null +++ b/ICE/utils/vpa.py @@ -0,0 +1,41 @@ +import requests +import json + +def get_token(): + url = "https://api.sandbox.co.in/authenticate" + + response = requests.post(url, headers=headers) + + return json.loads(response.text)['access_token'] + +def get_vpa_valid(vpa): + try: + + url = f"https://api.sandbox.co.in/bank/upi/{vpa}" + + payload = {} + + response = requests.request("GET", url, headers=headers, data = payload) + print(response.text) + return json.loads(response.text)['data']['account_exists'] + except Exception as e: + print(e) + return False +def get_validity_for_all_vpaIds(phoneNumber: str) -> dict: + VPAS = { + 'phonepe': ['@ybl','@ibl'], + 'paytm': ['@paytm'] + } + + result = dict() + for provider in VPAS: + isValid = False + for vpa in VPAS[provider]: + print(phoneNumber + vpa) + isValid = isValid or get_vpa_valid(phoneNumber + vpa) + + result[provider] = { + 'isAvailable': isValid + } + + return result \ No newline at end of file diff --git a/LICENSE b/LICENSE index ded1f875..96c7551e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 hack2skill +Copyright (c) 2023 Rohit Nagraj Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 578bb9cc..e3f307ad 100644 --- a/README.md +++ b/README.md @@ -1,5 +1 @@ -# ksp-submission -This repository is created for Karnataka State Police Hackathon 2023 - submission collection. -## Team Information -### Team Name - -### Problem Statement - +# KSP_Hack_2023 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..663bd1f6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests \ No newline at end of file diff --git a/search_api/search.py b/search_api/search.py new file mode 100644 index 00000000..64ab996b --- /dev/null +++ b/search_api/search.py @@ -0,0 +1,60 @@ +import json +import requests + + + +def search_text(query, no_of_results): + """ + Args: + query (str): text to search + no_of_results (int): How many results do you want + + Returns: + result (list({'title': str, 'url': str, 'text': str})): list of search results + """ + url = f"https://customsearch.googleapis.com/customsearch/v1?q={query}&key={API_KEY}&cx=" + + results = [] + for i in range(((no_of_results-1)//10) + 1): + if i != 0: + url = f"{url}&start={i*10 + 1}" + response = requests.get(url) + result = json.loads(response.text) + for item in result['items']: + results.append({ + 'title': item['title'], + 'url': item['link'], + 'text': item['snippet'] + }) + return results[:no_of_results] + + +def search_image(query, no_of_results): + """ + Args: + query (str): text to search + no_of_results (int): How many results do you want + + Returns: + result (list({'url': str, 'text': str})): list of image search results + """ + url = f"https://customsearch.googleapis.com/customsearch/v1?q={query}&key={API_KEY}&cx=" + url = f"{url}&searchType=image" + + results = [] + for i in range(((no_of_results-1)//10) + 1): + if i != 0: + url = f"{url}&start={i*10 + 1}" + response = requests.get(url) + result = json.loads(response.text) + for item in result['items']: + results.append({ + 'url': item['link'], + 'text': item['snippet'] + }) + return results[:no_of_results] + +if __name__ == '__main__': + x = search_text("Raghav Maheshwari", no_of_results=10) + print(x) + diff --git a/search_api/upi_validation.py b/search_api/upi_validation.py new file mode 100644 index 00000000..a43897c2 --- /dev/null +++ b/search_api/upi_validation.py @@ -0,0 +1,36 @@ +import requests +import json + + + +def get_token(): + url = "https://api.sandbox.co.in/authenticate" + + headers = { + "accept": "application/json", + "x-api-version": "1.0", + "x-api-key": "", + "x-api-secret": "" + } + + response = requests.post(url, headers=headers) + + return json.loads(response.text)['access_token'] + +def get_vpa_valid(vpa): + url = f"https://api.sandbox.co.in/bank/upi/{vpa}" + + payload = {} + headers = { + 'Authorization': get_token(), + 'x-api-key': '', + 'x-api-version': '1.0.0' + } + + response = requests.request("GET", url, headers=headers, data = payload) + + return json.loads(response.text)['data']['account_exists'] + +if __name__ == "__main__": + vpa = '9845107111@ybl' + print("The UPI ID is present: ", get_vpa_valid(vpa)) \ No newline at end of file diff --git a/truecaller_api/truecaller.py b/truecaller_api/truecaller.py new file mode 100644 index 00000000..3d8aa2bd --- /dev/null +++ b/truecaller_api/truecaller.py @@ -0,0 +1,43 @@ +import requests +from typing import Dict +authkey = creds['auth'] + +def numsearch(num:str, authkey = authkey) -> Dict: + """Searches the given number through TrueCaller Directory and returs the details of the given user + + Args: + num (str): "Number to be searched in the directory format 'NUMBER'" + authkey (_type_, optional): Defaults to authkey. + + Returns: + OUTPUT (Dict): {'name': 'Raghav Maheshwari', 'gender': 'UNKNOWN', + 'address': [{'address': 'IN', 'city': 'Uttar Pradesh West', 'countryCode': 'IN', 'timeZone': '+05:30', 'type': 'address'}], + 'email': 'raghav.ddps2@gmail.com'} + """ + params = {'q':num, 'countryCode':'', 'type':'4', 'locAddr':'', 'placement':'SEARCHRESULTS,HISTORY,DETAILS', 'encoding':'json'} + resp = requests.get('https://search5-noneu.truecaller.com/v2/search', headers=headers, params=params) + resp = resp.json() + + out = {"name":"","gender":"","address":[],"image":"","email":""} + if resp['data'][0].get('name'): + out["name"] = resp["data"][0]["name"] + if resp['data'][0].get('gender'): + out["gender"] = resp["data"][0]["gender"] + if resp['data'][0].get('addresses'): + out["address"] = resp["data"][0]["addresses"] + if resp['data'][0].get('image'): + out["image"] = resp["data"][0]["image"] + if resp['data'][0].get('phones'): + if "carrier" in resp['data'][0]["phones"][0]: + out["carrier"] = resp["data"][0]["phones"][0]["carrier"] + if resp['data'][0].get('internetAddresses'): + if "id" in resp["data"][0]["internetAddresses"][0]: + out["email"] = resp["data"][0]["internetAddresses"][0]["id"] + + return out + +if __name__=='__main__': + output = numsearch(lnum) + print(output) + +