Skip to content

Commit 0b33bd9

Browse files
Katherine CarigliaKatherine Cariglia
authored andcommitted
avoid open file handles, print success message on download
1 parent 73d748b commit 0b33bd9

File tree

2 files changed

+98
-82
lines changed

2 files changed

+98
-82
lines changed

madrigalWeb/madrigalWeb.py

Lines changed: 97 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -667,16 +667,15 @@ def isprint(self, filename, parms, filters, user_fullname, user_email, user_affi
667667
# read main url
668668
url = url.replace('+', '%2B')
669669
try:
670-
mainUrl = urllib.request.urlopen(url, timeout=TIMEOUT)
670+
with urllib.request.urlopen(url, timeout=TIMEOUT) as mainUrl:
671+
if format == 'ascii':
672+
page = mainUrl.read().decode('utf-8')
673+
else:
674+
page = mainUrl.read()
671675
except:
672676
raise ValueError('unable to open url ' + str(url))
673677

674-
if format == 'ascii':
675-
page = mainUrl.read().decode('utf-8')
676-
else:
677-
page = mainUrl.read()
678-
679-
mainUrl.close()
678+
680679

681680
if format == 'ascii':
682681
if page.find('Error occurred') != -1:
@@ -687,11 +686,20 @@ def isprint(self, filename, parms, filters, user_fullname, user_email, user_affi
687686

688687
else:
689688
if format == 'ascii':
690-
f = open(outputFile, 'w')
689+
with open(outputFile, 'w') as f:
690+
f.write(page)
691691
else:
692-
f = open(outputFile, 'wb')
693-
f.write(page)
694-
f.close()
692+
with open(outputFile, 'wb') as f:
693+
f.write(page)
694+
695+
# check that final file is not empty
696+
if os.path.getsize(outputFile) == 0:
697+
print(f"Downloaded file {outputFile} is empty, removing..")
698+
os.system(f"rm {outputFile}")
699+
700+
# success
701+
print(f"Downloaded file {outputFile}")
702+
695703

696704

697705
def madCalculator(self,
@@ -1767,39 +1775,60 @@ def downloadFile(self, filename, destination, user_fullname, user_email, user_af
17671775

17681776
CHUNK = 16 * 1024
17691777

1770-
urlFile = urllib.request.urlopen(url, timeout=TIMEOUT)
1771-
1772-
if format in ('ascii', 'simple'):
1773-
f = open(destination, 'w')
1774-
else:
1775-
f = open(destination, 'wb')
1778+
isgzip = False
17761779

1777-
while(True):
1778-
if format in ('ascii', 'simple'):
1779-
try:
1780-
data = urlFile.read(CHUNK).decode('utf8')
1781-
except:
1782-
# probably gzip ascii - convert
1783-
f.close()
1784-
try:
1785-
os.remove(destination)
1786-
except:
1787-
pass
1788-
urlFile.close()
1789-
urlFile = urllib.request.urlopen(url, timeout=TIMEOUT)
1790-
f = open(destination + '.gz', 'wb')
1791-
format = 'gzip'
1792-
data = urlFile.read(CHUNK)
1793-
1794-
else:
1795-
data = urlFile.read(CHUNK)
1796-
if not data:
1797-
break
1798-
f.write(data)
1780+
if not isgzip:
1781+
with urllib.request.urlopen(url, timeout=TIMEOUT) as urlFile:
1782+
readtype = None
1783+
if format in ('ascii', 'simple'):
1784+
readtype = 'w'
1785+
#f = open(destination, 'w')
1786+
else:
1787+
readtype = 'wb'
1788+
#f = open(destination, 'wb')
1789+
1790+
with open(destination, readtype) as f:
1791+
while(True):
1792+
if format in ('ascii', 'simple'):
1793+
try:
1794+
data = urlFile.read(CHUNK).decode('utf8')
1795+
except:
1796+
# probably gzip ascii - convert
1797+
isgzip = True
1798+
break
1799+
1800+
1801+
else:
1802+
data = urlFile.read(CHUNK)
1803+
if not data:
1804+
break
1805+
f.write(data)
1806+
else:
1807+
# handle gzip ascii
1808+
try:
1809+
os.remove(destination)
1810+
except:
1811+
pass
17991812

1800-
urlFile.close()
1813+
with urllib.request.urlopen(url, timeout=TIMEOUT) as urlFile:
1814+
with open(destination + '.gz', 'wb') as f:
1815+
format = 'gzip'
1816+
while(True):
1817+
try:
1818+
data = urlFile.read(CHUNK)
1819+
except:
1820+
pass
1821+
if not data:
1822+
break
1823+
18011824

1802-
f.close()
1825+
# check that final file is not empty
1826+
if os.path.getsize(destination) == 0:
1827+
print(f"Downloaded file {destination} is empty, removing..")
1828+
os.system(f"rm {destination}")
1829+
1830+
print(f"Downloaded file {destination}")
1831+
18031832

18041833

18051834

@@ -1827,11 +1856,11 @@ def listFileTimes(self, expDir=None):
18271856

18281857
url = url.replace('+', '%2B')
18291858

1830-
urlFile = urllib.request.urlopen(url, timeout=TIMEOUT)
1859+
with urllib.request.urlopen(url, timeout=TIMEOUT) as urlFile:
1860+
data = urlFile.read().decode('utf-8')
18311861

18321862
retList = []
18331863

1834-
data = urlFile.read().decode('utf-8')
18351864
lines = data.split('\n')
18361865
for line in lines:
18371866
items = line.split(',')
@@ -1863,17 +1892,19 @@ def downloadWebFile(self, expPath, destination):
18631892
url += '?expPath=%s' % (expPath.replace(' ', '+'))
18641893
url = url.replace('+', '%2B')
18651894

1866-
urlFile = urllib.request.urlopen(url, timeout=TIMEOUT2)
1867-
1868-
data = urlFile.read()
1869-
1870-
urlFile.close()
1895+
with urllib.request.urlopen(url, timeout=TIMEOUT2) as urlFile:
1896+
data = urlFile.read()
18711897

1872-
f = open(destination, 'wb')
1873-
1874-
f.write(data)
1898+
with open(destination, 'wb') as f:
1899+
f.write(data)
18751900

1876-
f.close()
1901+
# check that final file is not empty
1902+
if os.path.getsize(destination) == 0:
1903+
print(f"Downloaded file {destination} is empty, removing..")
1904+
os.system(f"rm {destination}")
1905+
1906+
# success
1907+
print(f"Downloaded file {destination}")
18771908

18781909

18791910

@@ -1964,15 +1995,12 @@ def traceMagneticField(self, year, month, day, hour, minute,second,
19641995

19651996
# read main url
19661997
try:
1967-
mainUrl = urllib.request.urlopen(url, timeout=TIMEOUT2)
1998+
with urllib.request.urlopen(url, timeout=TIMEOUT2) as mainUrl:
1999+
page = mainUrl.read().decode('utf8')
2000+
page = page.split('\n')
19682001
except:
19692002
raise ValueError('unable to open url ' + str(url))
19702003

1971-
page = mainUrl.read().decode('utf8')
1972-
page = page.split('\n')
1973-
1974-
mainUrl.close()
1975-
19762004
# parse the result
19772005
if len(page) == 0:
19782006
raise ValueError('No data found at url' + str(url))
@@ -2006,14 +2034,11 @@ def getVersion(self):
20062034

20072035
# read main url
20082036
try:
2009-
mainUrl = urllib.request.urlopen(url, timeout=TIMEOUT2)
2037+
with urllib.request.urlopen(url, timeout=TIMEOUT2) as mainUrl:
2038+
page = mainUrl.read().decode('utf-8')
20102039
except:
20112040
# if this fails, must be 2.5
20122041
return('2.5')
2013-
2014-
page = mainUrl.read().decode('utf-8')
2015-
2016-
mainUrl.close()
20172042

20182043
return(page.strip())
20192044

@@ -2032,14 +2057,11 @@ def getCitedFilesFromUrl(self, url):
20322057
"""
20332058
# read main url
20342059
try:
2035-
mainUrl = urllib.request.urlopen(url, timeout=TIMEOUT2)
2060+
with urllib.request.urlopen(url, timeout=TIMEOUT2) as mainUrl:
2061+
page = mainUrl.read().decode('utf8')
2062+
page = page.strip().split('\n')
20362063
except:
20372064
raise ValueError('unable to open url ' + str(url))
2038-
2039-
page = mainUrl.read().decode('utf8')
2040-
page = page.strip().split('\n')
2041-
2042-
mainUrl.close()
20432065

20442066
return(page)
20452067

@@ -2084,14 +2106,11 @@ def createCitationGroupFromList(self, citationList, user_fullname, user_email, u
20842106
try:
20852107
data = urllib.parse.urlencode(citationDict).encode()
20862108
req = urllib.request.Request(url, data=data)
2087-
mainUrl = urllib.request.urlopen(req, timeout=TIMEOUT2)
2109+
with urllib.request.urlopen(req, timeout=TIMEOUT2) as mainUrl:
2110+
page = mainUrl.read().decode('utf8')
2111+
page = page.strip()
20882112
except:
20892113
raise ValueError('unable to open url ' + str(url))
2090-
2091-
page = mainUrl.read().decode('utf8')
2092-
page = page.strip()
2093-
2094-
mainUrl.close()
20952114

20962115
return(page)
20972116

@@ -2152,14 +2171,11 @@ def getCitationListFromFilters(self, startDate, endDate, inst=None, kindat=None,
21522171

21532172
# read main url
21542173
try:
2155-
mainUrl = urllib.request.urlopen(url, timeout=TIMEOUT2)
2174+
with urllib.request.urlopen(url, timeout=TIMEOUT2) as mainUrl:
2175+
page = mainUrl.read().decode('utf8')
2176+
page = page.strip().split('\n')
21562177
except:
21572178
raise ValueError('unable to open url ' + str(url))
2158-
2159-
page = mainUrl.read().decode('utf8')
2160-
page = page.strip().split('\n')
2161-
2162-
mainUrl.close()
21632179

21642180
return(page)
21652181

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "madrigalWeb"
7-
version = "3.3.3"
7+
version = "3.3.4"
88
authors = [
99
{ name="Bill Rideout", email="brideout@mit.edu" },
1010
{ name="Katherine Cariglia", email="cariglia@mit.edu" },

0 commit comments

Comments
 (0)