Skip to content
Closed

#1 #21

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added Hackaton.db
Binary file not shown.
157 changes: 157 additions & 0 deletions TransformData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
from lxml import etree as ET
from model import DevMountainData,ClubData
from Util import DataUtility,FileUtility,DateUtility
import json
import time
import sys
from collections import Counter

import matplotlib.pyplot as plt

class Execute(object):
config={}
total=int()
valid=int()
invalid=int()
ListAllresult=[]
results=[]

def setup(resource,config):
resource.config=config

def readfile(resource):
print("readfile..")
xmlData=resource.config['datasource']
print(xmlData)
parser = ET.XMLParser(remove_comments=False)
xml = ET.parse(xmlData, parser=parser)
alldata=xml.xpath("count(/records/record)")
resource.total=int(alldata)
print("Raw Data Total:",resource.total)
resource.rawData=xml.xpath("/records/record[STATUS/text()='1' and (POSITION/text()='Steward' or POSITION/text()='Pilot' or POSITION/text()='Airhostess') and EMPID/text()!=PASSPORT/text()]")
# print("Raw Data Total:",resource.rawData)

def transform(resource):
print("transform Data..")
dateUtil=DateUtility()
print(dateUtil)
print("rawData in condition ..", len(resource.rawData))
currentDate=dateUtil.currentDate()
YEAR_EXP=3
for element in resource.rawData:
data=DevMountainData(element)
clubData=ClubData(data)
checkdupEmp = True
# print(f'clubData {clubData.toSet()}')
diffYear=dateUtil.diffYear(dateUtil.toDate(clubData.hired),currentDate)


## check grater year
if(diffYear>YEAR_EXP):
if resource.ListAllresult:
for checkEmp in resource.ListAllresult:

if checkEmp[0] == clubData.emp_id:
checkdupEmp = False
if checkdupEmp: resource.ListAllresult.append((clubData.toSet()))
else:
resource.ListAllresult.append((clubData.toSet()))

## for drop duplicate data
resource.results = set(resource.ListAllresult)

print("Raw Data :",len(resource.results))
print("Raw Data Duplicate:",str(len(resource.ListAllresult)-len(resource.results)))
print("Raw Data InValid:",str(resource.total-len(resource.ListAllresult)))


def generateJson(resource):
print("Generate Json..")
results=[]
for data in resource.results:
tmpData={
"emp_id":data[0],
"passport":data[1],
"firstname":data[2],
"lastname":data[3],
"gender":data[4],
"birthday":data[5],
"nationality":data[6],
"hired":data[7],
"dept":data[8],
"position":data[9],
"status":data[10],
"region":data[11]
}
# print(data)
results.append(tmpData)

FileUtility().write(resource.config['clubDataReport'],json.dumps(results))

def loadDb(resource):
print("loadDb..")
dataUtility=DataUtility(resource.config['dbName'])
dataUtility.dbSetup()
dataUtility.save(resource.results)


def generateGraph(resource):
print("generate Graph..")

Pilot = 0
Airhostess = 0
Steward = 0

for countPosition in resource.results:
if "Airhostess" == countPosition[9]:
Airhostess = Airhostess + 1
if "Pilot" == countPosition[9]:
Pilot = Pilot + 1
if "Steward" == countPosition[9]:
Steward = Steward + 1

left = [1, 2, 3]

# heights of bars
height = [Pilot, Airhostess, Steward]

# labels for bars
tick_label = ['Pilot', 'Airhostess', 'Steward']

# plotting a bar chart
plt.bar(left, height, tick_label = tick_label,
width = 0.8, color = ['red', 'green', 'blue'])

# plot title
plt.title('Summary Position Chart')

# function to show the plot
plt.show()

def main():

print("#####start#####")

config={
"datasource":"data-devclub-1.xml",
"dbName":"Hackaton.db",
"clubDataReport":"empClubData"
}

startTime = time.perf_counter()


exe=Execute()
exe.setup(config)
exe.readfile()
exe.transform()
exe.loadDb()
exe.generateJson()
exe.generateGraph()

endTime = time.perf_counter()
totalTime = endTime - startTime
print(f'##Total used time {totalTime:.4f} seconds##')

if __name__ == "__main__":
sys.exit(main())
1 change: 1 addition & 0 deletions empClubData
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"emp_id": 95, "passport": "OUP31WOE2IE", "firstname": "Dara", "lastname": "Wilcox", "gender": 1, "birthday": "29-06-1996", "nationality": "Singapore", "hired": "18-05-2011", "dept": "Flight Attendance", "position": "Airhostess", "status": 1, "region": "Canada"}, {"emp_id": 34, "passport": "BFS82MEY3CX", "firstname": "Selma", "lastname": "Bush", "gender": 0, "birthday": "26-03-1972", "nationality": "Italy", "hired": "10-10-2008", "dept": "Flight Attendance", "position": "Airhostess", "status": 1, "region": "USA"}, {"emp_id": 5, "passport": "AZE20CSG4MU", "firstname": "Lillian", "lastname": "Reese", "gender": 0, "birthday": "03-12-1982", "nationality": "Ukraine", "hired": "19-05-2002", "dept": "Flight Planning", "position": "Steward", "status": 1, "region": "Canada"}, {"emp_id": 33, "passport": "EWD45RJW5YK", "firstname": "Carter", "lastname": "Velasquez", "gender": 0, "birthday": "23-11-1967", "nationality": "Indonesia", "hired": "27-02-2005", "dept": "Flight Planning", "position": "Pilot", "status": 1, "region": "APAC"}, {"emp_id": 66, "passport": "WKV12UQC6QF", "firstname": "Zachery", "lastname": "Valentine", "gender": 0, "birthday": "04-06-1971", "nationality": "Philippines", "hired": "25-08-2011", "dept": "Flight Attendance", "position": "Steward", "status": 1, "region": "Middle East"}, {"emp_id": 3, "passport": "JUI65YBK7AF", "firstname": "Jada", "lastname": "Bender", "gender": 0, "birthday": "28-05-1963", "nationality": "Pakistan", "hired": "11-02-2001", "dept": "Pilot", "position": "Pilot", "status": 1, "region": "Canada"}, {"emp_id": 93, "passport": "UXL43IOW6OV", "firstname": "Honorato", "lastname": "Maxwell", "gender": 1, "birthday": "09-03-1982", "nationality": "France", "hired": "04-02-2017", "dept": "Aircraft Maintenance", "position": "Airhostess", "status": 1, "region": "Europe"}, {"emp_id": 97, "passport": "SUF73DKV4QE", "firstname": "Dante", "lastname": "Hart", "gender": 0, "birthday": "21-12-1999", "nationality": "Peru", "hired": "22-02-2016", "dept": "Pilot", "position": "Pilot", "status": 1, "region": "Europe"}, {"emp_id": 23, "passport": "NFH65BYM0VB", "firstname": "Armand", "lastname": "Horn", "gender": 0, "birthday": "24-05-1987", "nationality": "Netherlands", "hired": "19-06-2007", "dept": "Aircraft Maintenance", "position": "Airhostess", "status": 1, "region": "Ocenia"}, {"emp_id": 29, "passport": "CMK62UAD3VK", "firstname": "Rowan", "lastname": "Leonard", "gender": 1, "birthday": "15-07-1974", "nationality": "Germany", "hired": "27-03-2004", "dept": "Aircraft Maintenance", "position": "Pilot", "status": 1, "region": "Ocenia"}, {"emp_id": 50, "passport": "MRC33GHJ2KW", "firstname": "Calvin", "lastname": "Roach", "gender": 1, "birthday": "16-04-1999", "nationality": "Mexico", "hired": "18-03-2011", "dept": "Flight Attendance", "position": "Steward", "status": 1, "region": "Europe"}]
41 changes: 41 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import json

class DevMountainData(object):
def __init__(self, data):
self.emp_id=int(data[0].text)
self.passport=data[1].text
self.firstname=data[2].text
self.lastname=data[3].text
self.gender=int(data[4].text)
self.birthday=data[5].text
self.nationality=data[6].text
self.hired=data[7].text
self.dept=data[8].text
self.position=data[9].text
self.status=int(data[10].text)
self.region=data[11].text

def toSet(self):
return (self.emp_id,self.passport,self.firstname,self.lastname,self.gender,self.birthday,self.nationality,self.hired,self.dept,self.position,self.status,self.region)


class ClubData(object):
def __init__(self, data):
self.emp_id=data.emp_id
self.passport=data.passport
self.firstname=data.firstname
self.lastname=data.lastname
self.gender=data.gender
self.birthday=data.birthday
self.nationality=data.nationality
self.hired=data.hired
self.dept=data.dept
self.position=data.position
self.status=data.status
self.region=data.region

def toSet(self):
return (self.emp_id,self.passport,self.firstname,self.lastname,self.gender,self.birthday,self.nationality,self.hired,self.dept,self.position,self.status,self.region)

def toJson(self):
return json.dumps(self.__dict__)