diff --git a/Hackaton.db b/Hackaton.db new file mode 100644 index 0000000..f3a4e60 Binary files /dev/null and b/Hackaton.db differ diff --git a/TransformData.py b/TransformData.py new file mode 100644 index 0000000..82c121e --- /dev/null +++ b/TransformData.py @@ -0,0 +1,157 @@ +from lxml import etree as ET +from model import DevMountainData,ClubData +from Util import DataUtility,FileUtility,DateUtility +import json +import time +import sys +from collections import Counter + +import matplotlib.pyplot as plt + +class Execute(object): + config={} + total=int() + valid=int() + invalid=int() + ListAllresult=[] + results=[] + + def setup(resource,config): + resource.config=config + + def readfile(resource): + print("readfile..") + xmlData=resource.config['datasource'] + print(xmlData) + parser = ET.XMLParser(remove_comments=False) + xml = ET.parse(xmlData, parser=parser) + alldata=xml.xpath("count(/records/record)") + resource.total=int(alldata) + print("Raw Data Total:",resource.total) + resource.rawData=xml.xpath("/records/record[STATUS/text()='1' and (POSITION/text()='Steward' or POSITION/text()='Pilot' or POSITION/text()='Airhostess') and EMPID/text()!=PASSPORT/text()]") + # print("Raw Data Total:",resource.rawData) + + def transform(resource): + print("transform Data..") + dateUtil=DateUtility() + print(dateUtil) + print("rawData in condition ..", len(resource.rawData)) + currentDate=dateUtil.currentDate() + YEAR_EXP=3 + for element in resource.rawData: + data=DevMountainData(element) + clubData=ClubData(data) + checkdupEmp = True + # print(f'clubData {clubData.toSet()}') + diffYear=dateUtil.diffYear(dateUtil.toDate(clubData.hired),currentDate) + + + ## check grater year + if(diffYear>YEAR_EXP): + if resource.ListAllresult: + for checkEmp in resource.ListAllresult: + + if checkEmp[0] == clubData.emp_id: + checkdupEmp = False + if checkdupEmp: resource.ListAllresult.append((clubData.toSet())) + else: + resource.ListAllresult.append((clubData.toSet())) + + ## for drop duplicate data + resource.results = set(resource.ListAllresult) + + print("Raw Data :",len(resource.results)) + print("Raw Data Duplicate:",str(len(resource.ListAllresult)-len(resource.results))) + print("Raw Data InValid:",str(resource.total-len(resource.ListAllresult))) + + + def generateJson(resource): + print("Generate Json..") + results=[] + for data in resource.results: + tmpData={ + "emp_id":data[0], + "passport":data[1], + "firstname":data[2], + "lastname":data[3], + "gender":data[4], + "birthday":data[5], + "nationality":data[6], + "hired":data[7], + "dept":data[8], + "position":data[9], + "status":data[10], + "region":data[11] + } + # print(data) + results.append(tmpData) + + FileUtility().write(resource.config['clubDataReport'],json.dumps(results)) + + def loadDb(resource): + print("loadDb..") + dataUtility=DataUtility(resource.config['dbName']) + dataUtility.dbSetup() + dataUtility.save(resource.results) + + + def generateGraph(resource): + print("generate Graph..") + + Pilot = 0 + Airhostess = 0 + Steward = 0 + + for countPosition in resource.results: + if "Airhostess" == countPosition[9]: + Airhostess = Airhostess + 1 + if "Pilot" == countPosition[9]: + Pilot = Pilot + 1 + if "Steward" == countPosition[9]: + Steward = Steward + 1 + + left = [1, 2, 3] + + # heights of bars + height = [Pilot, Airhostess, Steward] + + # labels for bars + tick_label = ['Pilot', 'Airhostess', 'Steward'] + + # plotting a bar chart + plt.bar(left, height, tick_label = tick_label, + width = 0.8, color = ['red', 'green', 'blue']) + + # plot title + plt.title('Summary Position Chart') + + # function to show the plot + plt.show() + +def main(): + + print("#####start#####") + + config={ + "datasource":"data-devclub-1.xml", + "dbName":"Hackaton.db", + "clubDataReport":"empClubData" + } + + startTime = time.perf_counter() + + + exe=Execute() + exe.setup(config) + exe.readfile() + exe.transform() + exe.loadDb() + exe.generateJson() + exe.generateGraph() + + endTime = time.perf_counter() + totalTime = endTime - startTime + print(f'##Total used time {totalTime:.4f} seconds##') + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/empClubData b/empClubData new file mode 100644 index 0000000..099d3fe --- /dev/null +++ b/empClubData @@ -0,0 +1 @@ +[{"emp_id": 95, "passport": "OUP31WOE2IE", "firstname": "Dara", "lastname": "Wilcox", "gender": 1, "birthday": "29-06-1996", "nationality": "Singapore", "hired": "18-05-2011", "dept": "Flight Attendance", "position": "Airhostess", "status": 1, "region": "Canada"}, {"emp_id": 34, "passport": "BFS82MEY3CX", "firstname": "Selma", "lastname": "Bush", "gender": 0, "birthday": "26-03-1972", "nationality": "Italy", "hired": "10-10-2008", "dept": "Flight Attendance", "position": "Airhostess", "status": 1, "region": "USA"}, {"emp_id": 5, "passport": "AZE20CSG4MU", "firstname": "Lillian", "lastname": "Reese", "gender": 0, "birthday": "03-12-1982", "nationality": "Ukraine", "hired": "19-05-2002", "dept": "Flight Planning", "position": "Steward", "status": 1, "region": "Canada"}, {"emp_id": 33, "passport": "EWD45RJW5YK", "firstname": "Carter", "lastname": "Velasquez", "gender": 0, "birthday": "23-11-1967", "nationality": "Indonesia", "hired": "27-02-2005", "dept": "Flight Planning", "position": "Pilot", "status": 1, "region": "APAC"}, {"emp_id": 66, "passport": "WKV12UQC6QF", "firstname": "Zachery", "lastname": "Valentine", "gender": 0, "birthday": "04-06-1971", "nationality": "Philippines", "hired": "25-08-2011", "dept": "Flight Attendance", "position": "Steward", "status": 1, "region": "Middle East"}, {"emp_id": 3, "passport": "JUI65YBK7AF", "firstname": "Jada", "lastname": "Bender", "gender": 0, "birthday": "28-05-1963", "nationality": "Pakistan", "hired": "11-02-2001", "dept": "Pilot", "position": "Pilot", "status": 1, "region": "Canada"}, {"emp_id": 93, "passport": "UXL43IOW6OV", "firstname": "Honorato", "lastname": "Maxwell", "gender": 1, "birthday": "09-03-1982", "nationality": "France", "hired": "04-02-2017", "dept": "Aircraft Maintenance", "position": "Airhostess", "status": 1, "region": "Europe"}, {"emp_id": 97, "passport": "SUF73DKV4QE", "firstname": "Dante", "lastname": "Hart", "gender": 0, "birthday": "21-12-1999", "nationality": "Peru", "hired": "22-02-2016", "dept": "Pilot", "position": "Pilot", "status": 1, "region": "Europe"}, {"emp_id": 23, "passport": "NFH65BYM0VB", "firstname": "Armand", "lastname": "Horn", "gender": 0, "birthday": "24-05-1987", "nationality": "Netherlands", "hired": "19-06-2007", "dept": "Aircraft Maintenance", "position": "Airhostess", "status": 1, "region": "Ocenia"}, {"emp_id": 29, "passport": "CMK62UAD3VK", "firstname": "Rowan", "lastname": "Leonard", "gender": 1, "birthday": "15-07-1974", "nationality": "Germany", "hired": "27-03-2004", "dept": "Aircraft Maintenance", "position": "Pilot", "status": 1, "region": "Ocenia"}, {"emp_id": 50, "passport": "MRC33GHJ2KW", "firstname": "Calvin", "lastname": "Roach", "gender": 1, "birthday": "16-04-1999", "nationality": "Mexico", "hired": "18-03-2011", "dept": "Flight Attendance", "position": "Steward", "status": 1, "region": "Europe"}] \ No newline at end of file diff --git a/model.py b/model.py new file mode 100644 index 0000000..328e00b --- /dev/null +++ b/model.py @@ -0,0 +1,41 @@ +import json + +class DevMountainData(object): + def __init__(self, data): + self.emp_id=int(data[0].text) + self.passport=data[1].text + self.firstname=data[2].text + self.lastname=data[3].text + self.gender=int(data[4].text) + self.birthday=data[5].text + self.nationality=data[6].text + self.hired=data[7].text + self.dept=data[8].text + self.position=data[9].text + self.status=int(data[10].text) + self.region=data[11].text + + def toSet(self): + return (self.emp_id,self.passport,self.firstname,self.lastname,self.gender,self.birthday,self.nationality,self.hired,self.dept,self.position,self.status,self.region) + + +class ClubData(object): + def __init__(self, data): + self.emp_id=data.emp_id + self.passport=data.passport + self.firstname=data.firstname + self.lastname=data.lastname + self.gender=data.gender + self.birthday=data.birthday + self.nationality=data.nationality + self.hired=data.hired + self.dept=data.dept + self.position=data.position + self.status=data.status + self.region=data.region + + def toSet(self): + return (self.emp_id,self.passport,self.firstname,self.lastname,self.gender,self.birthday,self.nationality,self.hired,self.dept,self.position,self.status,self.region) + + def toJson(self): + return json.dumps(self.__dict__) \ No newline at end of file