From 41491d1baade29a49331fa19a14ac399f7b7f6c5 Mon Sep 17 00:00:00 2001 From: Mofostupiud <47454e45@gmail.com> Date: Sat, 8 Oct 2022 21:23:50 +0700 Subject: [PATCH 1/2] add Main --- TransformData.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 TransformData.py diff --git a/TransformData.py b/TransformData.py new file mode 100644 index 0000000..e69de29 From 9f2d19ee07f222798b103b4de0e22d9ebd8a971b Mon Sep 17 00:00:00 2001 From: Mofostupiud <47454e45@gmail.com> Date: Sun, 9 Oct 2022 19:59:13 +0700 Subject: [PATCH 2/2] finish --- Hackaton.db | Bin 0 -> 8192 bytes TransformData.py | 157 +++++++++++++++++++++++++++++++++++++++++++++++ empClubData | 1 + model.py | 41 +++++++++++++ 4 files changed, 199 insertions(+) create mode 100644 Hackaton.db create mode 100644 empClubData create mode 100644 model.py diff --git a/Hackaton.db b/Hackaton.db new file mode 100644 index 0000000000000000000000000000000000000000..f3a4e60f22cd8a8ad29315cac035e9612eb53737 GIT binary patch literal 8192 zcmeI1QE%E-6vrWL*RV9{qzW+aL8v_Omj%}b+tPK-THZ#F5de!11CEe0Y-okU<4Qe zMt~7u1Q-EEfDvE>7=izsz;}H1exX##exFbAyMU0B<#ZOT-d$d|bjMIVgY#6aXKnf&u!Ip?SbQQBh}H{s`Eq?gs0D&rsH~j)iSP!d+Ke`meFq*&L>69 zbi8&$om_vkuX^U7ul7vu#ushVF&fv`HVpelJ@&vg>GRjuxt{6`-A{=fqjhu1Q~KL{ zXXp3K!=D~%d%x}du=nNe#qM7y(9r5%`}Hc(b#c zJt!7(e35^#pDQBE(Iu(X?x5~i$H6y#=tOw75aqH^DU0=b-6CiHED~#FK`J9b6q|wn zevxu&nv!si!x?c?a*E@*v5KQ5nSKuHh5a1A&sPh%VoA5UG8(E4Y1DP1Qylh46vAS$ zS{4+LS1K(6|2c-|NZIHM71fVtF+NH;3;QADpebxWh|1pd8%?tr9>fu4{ff zeATN+=3p!jM(rq!Vw^@6K075rAV7^0EgUst+BnQ$qxmm2Z}JR2gFN}-HeXuTe5W%s zTmAQUJBdb%xB>Hd+KWg3A)>=Si_zl1U?QOLJ~sp)0x)L@Hhqs5^9?&RYo_I zKl*+{YO)3D)Gh`71)qELmDY4?$TB99{>ml63D#E00>Pf5!&Q)(;^15q%5Z{k?JryE z%8Wx-#aWQf7jwV`q>IzR(3ZsUz(6KAF~(y*m_=u3ZEo1FvI|Y)WfT*-WohBJY}qUQ HP4#~O<7$q3 literal 0 HcmV?d00001 diff --git a/TransformData.py b/TransformData.py index e69de29..82c121e 100644 --- a/TransformData.py +++ b/TransformData.py @@ -0,0 +1,157 @@ +from lxml import etree as ET +from model import DevMountainData,ClubData +from Util import DataUtility,FileUtility,DateUtility +import json +import time +import sys +from collections import Counter + +import matplotlib.pyplot as plt + +class Execute(object): + config={} + total=int() + valid=int() + invalid=int() + ListAllresult=[] + results=[] + + def setup(resource,config): + resource.config=config + + def readfile(resource): + print("readfile..") + xmlData=resource.config['datasource'] + print(xmlData) + parser = ET.XMLParser(remove_comments=False) + xml = ET.parse(xmlData, parser=parser) + alldata=xml.xpath("count(/records/record)") + resource.total=int(alldata) + print("Raw Data Total:",resource.total) + resource.rawData=xml.xpath("/records/record[STATUS/text()='1' and (POSITION/text()='Steward' or POSITION/text()='Pilot' or POSITION/text()='Airhostess') and EMPID/text()!=PASSPORT/text()]") + # print("Raw Data Total:",resource.rawData) + + def transform(resource): + print("transform Data..") + dateUtil=DateUtility() + print(dateUtil) + print("rawData in condition ..", len(resource.rawData)) + currentDate=dateUtil.currentDate() + YEAR_EXP=3 + for element in resource.rawData: + data=DevMountainData(element) + clubData=ClubData(data) + checkdupEmp = True + # print(f'clubData {clubData.toSet()}') + diffYear=dateUtil.diffYear(dateUtil.toDate(clubData.hired),currentDate) + + + ## check grater year + if(diffYear>YEAR_EXP): + if resource.ListAllresult: + for checkEmp in resource.ListAllresult: + + if checkEmp[0] == clubData.emp_id: + checkdupEmp = False + if checkdupEmp: resource.ListAllresult.append((clubData.toSet())) + else: + resource.ListAllresult.append((clubData.toSet())) + + ## for drop duplicate data + resource.results = set(resource.ListAllresult) + + print("Raw Data :",len(resource.results)) + print("Raw Data Duplicate:",str(len(resource.ListAllresult)-len(resource.results))) + print("Raw Data InValid:",str(resource.total-len(resource.ListAllresult))) + + + def generateJson(resource): + print("Generate Json..") + results=[] + for data in resource.results: + tmpData={ + "emp_id":data[0], + "passport":data[1], + "firstname":data[2], + "lastname":data[3], + "gender":data[4], + "birthday":data[5], + "nationality":data[6], + "hired":data[7], + "dept":data[8], + "position":data[9], + "status":data[10], + "region":data[11] + } + # print(data) + results.append(tmpData) + + FileUtility().write(resource.config['clubDataReport'],json.dumps(results)) + + def loadDb(resource): + print("loadDb..") + dataUtility=DataUtility(resource.config['dbName']) + dataUtility.dbSetup() + dataUtility.save(resource.results) + + + def generateGraph(resource): + print("generate Graph..") + + Pilot = 0 + Airhostess = 0 + Steward = 0 + + for countPosition in resource.results: + if "Airhostess" == countPosition[9]: + Airhostess = Airhostess + 1 + if "Pilot" == countPosition[9]: + Pilot = Pilot + 1 + if "Steward" == countPosition[9]: + Steward = Steward + 1 + + left = [1, 2, 3] + + # heights of bars + height = [Pilot, Airhostess, Steward] + + # labels for bars + tick_label = ['Pilot', 'Airhostess', 'Steward'] + + # plotting a bar chart + plt.bar(left, height, tick_label = tick_label, + width = 0.8, color = ['red', 'green', 'blue']) + + # plot title + plt.title('Summary Position Chart') + + # function to show the plot + plt.show() + +def main(): + + print("#####start#####") + + config={ + "datasource":"data-devclub-1.xml", + "dbName":"Hackaton.db", + "clubDataReport":"empClubData" + } + + startTime = time.perf_counter() + + + exe=Execute() + exe.setup(config) + exe.readfile() + exe.transform() + exe.loadDb() + exe.generateJson() + exe.generateGraph() + + endTime = time.perf_counter() + totalTime = endTime - startTime + print(f'##Total used time {totalTime:.4f} seconds##') + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/empClubData b/empClubData new file mode 100644 index 0000000..099d3fe --- /dev/null +++ b/empClubData @@ -0,0 +1 @@ +[{"emp_id": 95, "passport": "OUP31WOE2IE", "firstname": "Dara", "lastname": "Wilcox", "gender": 1, "birthday": "29-06-1996", "nationality": "Singapore", "hired": "18-05-2011", "dept": "Flight Attendance", "position": "Airhostess", "status": 1, "region": "Canada"}, {"emp_id": 34, "passport": "BFS82MEY3CX", "firstname": "Selma", "lastname": "Bush", "gender": 0, "birthday": "26-03-1972", "nationality": "Italy", "hired": "10-10-2008", "dept": "Flight Attendance", "position": "Airhostess", "status": 1, "region": "USA"}, {"emp_id": 5, "passport": "AZE20CSG4MU", "firstname": "Lillian", "lastname": "Reese", "gender": 0, "birthday": "03-12-1982", "nationality": "Ukraine", "hired": "19-05-2002", "dept": "Flight Planning", "position": "Steward", "status": 1, "region": "Canada"}, {"emp_id": 33, "passport": "EWD45RJW5YK", "firstname": "Carter", "lastname": "Velasquez", "gender": 0, "birthday": "23-11-1967", "nationality": "Indonesia", "hired": "27-02-2005", "dept": "Flight Planning", "position": "Pilot", "status": 1, "region": "APAC"}, {"emp_id": 66, "passport": "WKV12UQC6QF", "firstname": "Zachery", "lastname": "Valentine", "gender": 0, "birthday": "04-06-1971", "nationality": "Philippines", "hired": "25-08-2011", "dept": "Flight Attendance", "position": "Steward", "status": 1, "region": "Middle East"}, {"emp_id": 3, "passport": "JUI65YBK7AF", "firstname": "Jada", "lastname": "Bender", "gender": 0, "birthday": "28-05-1963", "nationality": "Pakistan", "hired": "11-02-2001", "dept": "Pilot", "position": "Pilot", "status": 1, "region": "Canada"}, {"emp_id": 93, "passport": "UXL43IOW6OV", "firstname": "Honorato", "lastname": "Maxwell", "gender": 1, "birthday": "09-03-1982", "nationality": "France", "hired": "04-02-2017", "dept": "Aircraft Maintenance", "position": "Airhostess", "status": 1, "region": "Europe"}, {"emp_id": 97, "passport": "SUF73DKV4QE", "firstname": "Dante", "lastname": "Hart", "gender": 0, "birthday": "21-12-1999", "nationality": "Peru", "hired": "22-02-2016", "dept": "Pilot", "position": "Pilot", "status": 1, "region": "Europe"}, {"emp_id": 23, "passport": "NFH65BYM0VB", "firstname": "Armand", "lastname": "Horn", "gender": 0, "birthday": "24-05-1987", "nationality": "Netherlands", "hired": "19-06-2007", "dept": "Aircraft Maintenance", "position": "Airhostess", "status": 1, "region": "Ocenia"}, {"emp_id": 29, "passport": "CMK62UAD3VK", "firstname": "Rowan", "lastname": "Leonard", "gender": 1, "birthday": "15-07-1974", "nationality": "Germany", "hired": "27-03-2004", "dept": "Aircraft Maintenance", "position": "Pilot", "status": 1, "region": "Ocenia"}, {"emp_id": 50, "passport": "MRC33GHJ2KW", "firstname": "Calvin", "lastname": "Roach", "gender": 1, "birthday": "16-04-1999", "nationality": "Mexico", "hired": "18-03-2011", "dept": "Flight Attendance", "position": "Steward", "status": 1, "region": "Europe"}] \ No newline at end of file diff --git a/model.py b/model.py new file mode 100644 index 0000000..328e00b --- /dev/null +++ b/model.py @@ -0,0 +1,41 @@ +import json + +class DevMountainData(object): + def __init__(self, data): + self.emp_id=int(data[0].text) + self.passport=data[1].text + self.firstname=data[2].text + self.lastname=data[3].text + self.gender=int(data[4].text) + self.birthday=data[5].text + self.nationality=data[6].text + self.hired=data[7].text + self.dept=data[8].text + self.position=data[9].text + self.status=int(data[10].text) + self.region=data[11].text + + def toSet(self): + return (self.emp_id,self.passport,self.firstname,self.lastname,self.gender,self.birthday,self.nationality,self.hired,self.dept,self.position,self.status,self.region) + + +class ClubData(object): + def __init__(self, data): + self.emp_id=data.emp_id + self.passport=data.passport + self.firstname=data.firstname + self.lastname=data.lastname + self.gender=data.gender + self.birthday=data.birthday + self.nationality=data.nationality + self.hired=data.hired + self.dept=data.dept + self.position=data.position + self.status=data.status + self.region=data.region + + def toSet(self): + return (self.emp_id,self.passport,self.firstname,self.lastname,self.gender,self.birthday,self.nationality,self.hired,self.dept,self.position,self.status,self.region) + + def toJson(self): + return json.dumps(self.__dict__) \ No newline at end of file