From 4a508e19e705594bfd3f6bf9cb0319d04b7d2087 Mon Sep 17 00:00:00 2001 From: axesve Date: Sat, 21 Apr 2018 21:40:20 +0200 Subject: [PATCH 1/3] Update readme.md --- readme.md | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/readme.md b/readme.md index febc64f..1930b59 100644 --- a/readme.md +++ b/readme.md @@ -1,21 +1,12 @@ -[![Build Status](https://travis-ci.org/HoverHell/RedditImageGrab.svg?branch=master)](https://travis-ci.org/HoverHell/RedditImageGrab) - # RedditImageGrab - -I created this script to download the latest (and greatest) wallpapers -off of image subreddits like wallpaper to keep my desktop wallpaper -fresh and interesting. The main idea is that the script would download -any JPEG or PNG formatted image that it found listed in the specified -subreddit and download them to a folder. +Forked and Python 3 updated version. # Requirements: - * Python 2 (Python3 might be supported over 2to3, but see for - yourself and report back). + * Python 3 * Optional requirements: listed in setup.py under extras_require. - # Usage: See `./redditdl.py --help` for uptodate details. From 0349e4d45da9eff0e500a88d96e789c946cdd9e1 Mon Sep 17 00:00:00 2001 From: axesve Date: Sat, 21 Apr 2018 21:41:56 +0200 Subject: [PATCH 2/3] Python v3 update --- redditdownload/__init__.py | 2 +- redditdownload/deviantart.py | 2 +- redditdownload/gfycat.py | 16 ++++++++-------- redditdownload/img_scrap_stuff.py | 22 +++++++++++----------- redditdownload/reddit.py | 13 +++++++------ redditdownload/redditdownload.py | 17 ++++++++--------- redditdownload/scrap_wrongies.py | 8 ++++---- 7 files changed, 40 insertions(+), 40 deletions(-) diff --git a/redditdownload/__init__.py b/redditdownload/__init__.py index 8d43315..1b5312b 100644 --- a/redditdownload/__init__.py +++ b/redditdownload/__init__.py @@ -1 +1 @@ -from redditdownload import * +from .redditdownload import * diff --git a/redditdownload/deviantart.py b/redditdownload/deviantart.py index db56860..569d25b 100644 --- a/redditdownload/deviantart.py +++ b/redditdownload/deviantart.py @@ -1,6 +1,6 @@ """module to parse deviantart page.""" try: # py2 - from urllib2 import urlopen + from urllib.request import urlopen except ImportError: # py3 from urllib.request import urlopen diff --git a/redditdownload/gfycat.py b/redditdownload/gfycat.py index 41debf2..5cc0b45 100644 --- a/redditdownload/gfycat.py +++ b/redditdownload/gfycat.py @@ -23,14 +23,14 @@ def __init__(self): super(gfycat, self).__init__() def __fetch(self, url, param): - import urllib2 + import urllib.request, urllib.error, urllib.parse import json try: # added simple User-Ajent string to avoid CloudFlare block this request headers = {'User-Agent': 'Mozilla/5.0'} - req = urllib2.Request(url+param, None, headers) - connection = urllib2.urlopen(req).read() - except urllib2.HTTPError, err: + req = urllib.request.Request(url+param, None, headers) + connection = urllib.request.urlopen(req).read() + except urllib.error.HTTPError as err: raise ValueError(err.read()) result = namedtuple("result", "raw json") return result(raw=connection, json=json.loads(connection)) @@ -117,14 +117,14 @@ def get(self, what): return ("Sorry, can't find %s" % error) def download(self, location): - import urllib2 + import urllib.request, urllib.error, urllib.parse if not location.endswith(".mp4"): location = location + self.get("gfyName") + ".mp4" try: # added simple User-Ajent string to avoid CloudFlare block this request headers = {'User-Agent': 'Mozilla/5.0'} - req = urllib2.Request(self.get("mp4Url"), None, headers) - file = urllib2.urlopen(req) + req = urllib.request.Request(self.get("mp4Url"), None, headers) + file = urllib.request.urlopen(req) # make sure that the status code is 200, and the content type is mp4 if int(file.code) is not 200 or file.headers["content-type"] != "video/mp4": raise ValueError("Problem downlading the file. Status code is %s or the content-type is not right %s" @@ -132,7 +132,7 @@ def download(self, location): data = file.read() with open(location, "wb") as mp4: mp4.write(data) - except urllib2.HTTPError, err: + except urllib.error.HTTPError as err: raise ValueError(err.read()) def formated(self, ignoreNull=False): diff --git a/redditdownload/img_scrap_stuff.py b/redditdownload/img_scrap_stuff.py index 025656d..4137d9e 100755 --- a/redditdownload/img_scrap_stuff.py +++ b/redditdownload/img_scrap_stuff.py @@ -10,11 +10,11 @@ import re import json import logging -import urlparse +import urllib.parse import traceback from PIL import Image -from cStringIO import StringIO +from io import StringIO import lxml import html5lib # Heavily recommended for bs4 (apparently) import bs4 @@ -52,7 +52,7 @@ def indexall_re(topstr, substr_re): def walker(text, opening='{', closing='}'): """ A near-useless experiment that was intended for `get_all_objects` """ stack = [] - for pos in xrange(len(text)): + for pos in range(len(text)): if text[pos:pos + len(opening)] == opening: stack.append(pos) continue @@ -88,7 +88,7 @@ def get_all_objects(text, beginning=r'{', debug=False): """ def _dbg_actual(st, *ar): - print "D: ", st % ar + print("D: ", st % ar) _dbg = _dbg_actual if debug else (lambda *ar: None) @@ -106,9 +106,9 @@ def __getitem__(self, key): class TheLoader(yaml.SafeLoader): ESCAPE_REPLACEMENTS = ddd(yaml.SafeLoader.ESCAPE_REPLACEMENTS) - from cStringIO import StringIO + from io import StringIO # optimised slicing - if isinstance(text, unicode): + if isinstance(text, str): _dbg("encoding") text = text.encode('utf-8') _dbg("Length: %r", len(text)) @@ -214,13 +214,13 @@ def get_get_get(url, **kwa): def get_get(*ar, **kwa): retries = kwa.pop('_xretries', 5) - for retry in xrange(retries): + for retry in range(retries): try: return get_get_get(*ar, **kwa) except Exception as exc: traceback.print_exc() ee = exc - print "On retry #%r (%s)" % (retry, repr(exc)[:30]) + print("On retry #%r (%s)" % (retry, repr(exc)[:30])) raise GetError(ee) @@ -244,7 +244,7 @@ def get(url, cache_file=None, req_params=None, bs=True, response=False, undecode for chunk in resp.iter_content(chunk_size=16384): data += chunk if len(data) > _max_len: - print "Too large" + print("Too large") break data = bytes(data) ## Have to, alas. data_bytes = data @@ -274,7 +274,7 @@ def _filter(l): def _url_abs(l, base_url): - return (urlparse.urljoin(base_url, v) for v in l) + return (urllib.parse.urljoin(base_url, v) for v in l) def _preprocess_bs_links(bs, links): @@ -413,7 +413,7 @@ def _pp(lst): for val in lst if val.startswith('http') or val.startswith('/')] # (urljoin should be done already though) - return [urlparse.urljoin(url, val) for val in res] + return [urllib.parse.urljoin(url, val) for val in res] imgs, links = bs2img(bs), bs2lnk(bs) to_check = imgs + links diff --git a/redditdownload/reddit.py b/redditdownload/reddit.py index 9130af7..ba925d3 100755 --- a/redditdownload/reddit.py +++ b/redditdownload/reddit.py @@ -2,8 +2,9 @@ """Return list of items from a sub-reddit of reddit.com.""" import sys -import HTMLParser -from urllib2 import urlopen, Request, HTTPError +import html.parser +from urllib.request import urlopen, Request +from urllib.error import HTTPError from json import JSONDecoder @@ -33,7 +34,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None): if '/m/' not in subreddit: warning = ('That doesn\'t look like a multireddit. Are you sure' 'you need that multireddit flag?') - print warning + print(warning) sys.exit(1) url = 'http://www.reddit.com/user/%s.json' % subreddit if not multireddit: @@ -41,7 +42,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None): warning = ('It looks like you are trying to fetch a multireddit. \n' 'Check the multireddit flag. ' 'Call --help for more info') - print warning + print (warning) sys.exit(1) # no sorting needed if reddit_sort is None: @@ -95,7 +96,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None): try: req = Request(url, headers=hdr) - json = urlopen(req).read() + json = urlopen(req).read().decode('utf-8') data = JSONDecoder().decode(json) if isinstance(data, dict): items = [x['data'] for x in data['data']['children']] @@ -119,7 +120,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None): # returns `url` values html-escaped, whereas we normally need them # in the way they are meant to be downloaded (i.e. urlquoted at # most). - htmlparser = HTMLParser.HTMLParser() + htmlparser = html.parser.HTMLParser() for item in items: if item.get('url'): item['url'] = htmlparser.unescape(item['url']) diff --git a/redditdownload/redditdownload.py b/redditdownload/redditdownload.py index 6c807c1..c3b5fe9 100755 --- a/redditdownload/redditdownload.py +++ b/redditdownload/redditdownload.py @@ -1,15 +1,15 @@ #!/usr/bin/env python2 """Download images from a reddit.com subreddit.""" -from __future__ import print_function + import os import re -import StringIO +import io import sys import logging -from urllib2 import urlopen, HTTPError, URLError -from httplib import InvalidURL +from urllib.request import urlopen, HTTPError, URLError +from http.client import InvalidURL from argparse import ArgumentParser from os.path import ( exists as pathexists, join as pathjoin, basename as pathbasename, @@ -17,7 +17,6 @@ from os import mkdir, getcwd import time -from .gfycat import gfycat from .reddit import getitems from .deviantart import process_deviant_url @@ -29,7 +28,7 @@ def request(url, *ar, **kwa): _retries = kwa.pop('_retries', 4) _retry_pause = kwa.pop('_retry_pause', 0) res = None - for _try in xrange(_retries): + for _try in range(_retries): try: res = urlopen(url, *ar, **kwa) except Exception as exc: @@ -83,7 +82,7 @@ def extract_imgur_album_urls(album_url): match = re.compile(r'\"hash\":\"(.[^\"]*)\",\"title\"') items = [] - memfile = StringIO.StringIO(filedata) + memfile = io.StringIO.StringIO(filedata) for line in memfile.readlines(): results = re.findall(match, line) @@ -130,7 +129,7 @@ def download_from_url(url, dest_file): raise HTTPError(actual_url, 404, "Imgur suggests the image was removed", None, None) # Work out file type either from the response or the url. - if 'content-type' in info.keys(): + if 'content-type' in list(info.keys()): filetype = info['content-type'] elif url.endswith('.jpg') or url.endswith('.jpeg'): filetype = 'image/jpeg' @@ -232,7 +231,7 @@ def slugify(value): # with some modification import unicodedata value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') - value = unicode(re.sub(r'[^\w\s-]', '', value).strip()) + value = str(re.sub(r'[^\w\s-]', '', value).strip()) # value = re.sub(r'[-\s]+', '-', value) # not replacing space with hypen return value diff --git a/redditdownload/scrap_wrongies.py b/redditdownload/scrap_wrongies.py index 0df5dbd..46fedd6 100755 --- a/redditdownload/scrap_wrongies.py +++ b/redditdownload/scrap_wrongies.py @@ -3,7 +3,7 @@ """ Scrap stuff from "wrong data type" pages of RedditImageGrab """ try: - from redditdownload import _WRONGDATA_LOGFILE + from .redditdownload import _WRONGDATA_LOGFILE except ImportError: _WRONGDATA_LOGFILE = '.wrong_type_pages.jsl' @@ -30,8 +30,8 @@ from atomicfile import AtomicFile import magic -import img_scrap_stuff -from img_scrap_stuff import GetError +from . import img_scrap_stuff +from .img_scrap_stuff import GetError _log = logging.getLogger(__name__) @@ -108,7 +108,7 @@ def consecutive_filename(filename): filebase, fileext = fileparts[0], None else: filebase, fileext = fileparts - for i in xrange(1, 9000): + for i in range(1, 9000): filetry = '%s__%02d' % (filebase, i) if fileext is not None: filetry = '%s.%s' % (filetry, fileext) From 826e2111ab6e8c0d01343f7949d8b02f1045980b Mon Sep 17 00:00:00 2001 From: axesve Date: Sat, 21 Apr 2018 21:46:07 +0200 Subject: [PATCH 3/3] Update readme.md --- readme.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 1930b59..545c74d 100644 --- a/readme.md +++ b/readme.md @@ -1,6 +1,12 @@ +[![Build Status](https://travis-ci.org/HoverHell/RedditImageGrab.svg?branch=master)](https://travis-ci.org/HoverHell/RedditImageGrab) + # RedditImageGrab -Forked and Python 3 updated version. +I created this script to download the latest (and greatest) wallpapers +off of image subreddits like wallpaper to keep my desktop wallpaper +fresh and interesting. The main idea is that the script would download +any JPEG or PNG formatted image that it found listed in the specified +subreddit and download them to a folder. # Requirements: