@@ -48,30 +48,44 @@ def _is_url(self, path: str) -> bool:
4848 """Check if the path is a URL."""
4949 parsed = urllib .parse .urlparse (path )
5050 return parsed .scheme in ('http' , 'https' )
51-
5251 def _download_url (self ) -> None :
5352 """Download the URL to a temporary file and update the path."""
5453 original_url = self .path
55- # Create a temporary file with a suffix based on the URL path
56- suffix = os .path .splitext (urllib .parse .urlparse (original_url ).path )[1 ]
57- tmp_file = tempfile .NamedTemporaryFile (delete = False , suffix = suffix )
58- self ._tmp_path = tmp_file .name
59-
60- # Set up request with user agent
61- headers = {
62- 'User-Agent' : (
63- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
64- 'AppleWebKit/537.36 (KHTML, like Gecko) '
65- 'Chrome/91.0.4472.124 Safari/537.36'
66- )
67- }
68- req = urllib .request .Request (original_url , headers = headers )
69-
70- # Download the file
71- print (f"Downloading URL: { original_url } to { self ._tmp_path } " )
72- with urllib .request .urlopen (req ) as response , open (self ._tmp_path , 'wb' ) as out_file :
73- out_file .write (response .read ())
74- self .path = self ._tmp_path
54+ tmp_file = None
55+ try :
56+ # Create a temporary file with a suffix based on the URL path
57+ suffix = os .path .splitext (urllib .parse .urlparse (original_url ).path )[1 ]
58+ tmp_file = tempfile .NamedTemporaryFile (delete = False , suffix = suffix )
59+ self ._tmp_path = tmp_file .name
60+
61+ # Set up request with user agent
62+ headers = {
63+ 'User-Agent' : (
64+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
65+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
66+ 'Chrome/91.0.4472.124 Safari/537.36'
67+ )
68+ }
69+ req = urllib .request .Request (original_url , headers = headers )
70+
71+ # Download the file
72+ print (f"Downloading URL: { original_url } to { self ._tmp_path } " )
73+ try :
74+ with urllib .request .urlopen (req ) as response , open (self ._tmp_path , 'wb' ) as out_file :
75+ out_file .write (response .read ())
76+ self .path = self ._tmp_path
77+ except (urllib .error .URLError , urllib .error .HTTPError ) as e :
78+ raise RuntimeError (f"Failed to download URL { original_url } : { str (e )} " )
79+ except IOError as e :
80+ raise RuntimeError (f"Failed to write downloaded file to { self ._tmp_path } : { str (e )} " )
81+ except Exception as e :
82+ # Clean up temp file if something went wrong
83+ if tmp_file is not None and hasattr (self , '_tmp_path' ):
84+ try :
85+ os .unlink (self ._tmp_path )
86+ except :
87+ pass
88+ raise RuntimeError (f"Error downloading URL { original_url } : { str (e )} " )
7589
7690 def __del__ (self ):
7791 """Cleanup temporary file if it exists."""
0 commit comments