Python v3 update #82

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

axesve wants to merge 3 commits into HoverHell:master from axesve:master

readme.md

-Original file line number
+Diff line change
@@ Expand Up @@
     any JPEG or PNG formatted image that it found listed in the specified
     subreddit and download them to a folder.
     # Requirements:
-     * Python 2 (Python3 might be supported over 2to3, but see for
-       yourself and report back).
+     * Python 3
      * Optional requirements: listed in setup.py under extras_require.
     # Usage:
     See `./redditdl.py --help` for uptodate details.
@@ Expand Down @@

redditdownload/__init__.py

Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from redditdownload import *
		from .redditdownload import *

redditdownload/deviantart.py

-Original file line number
+Diff line change
@@ -1,6 +1,6 @@
     """module to parse deviantart page."""
     try:  # py2
-        from urllib2 import urlopen
+        from urllib.request import urlopen
     except ImportError:  # py3
         from urllib.request import urlopen
@@ Expand Down @@

redditdownload/gfycat.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -23,14 +23,14 @@ def __init__(self):
  
            super(gfycat, self).__init__()

        def __fetch(self, url, param):

            import urllib2

            import urllib.request, urllib.error, urllib.parse

            import json

            try:

                # added simple User-Ajent string to avoid CloudFlare block this request

                headers = {'User-Agent': 'Mozilla/5.0'}

                req = urllib2.Request(url+param, None, headers)

                connection = urllib2.urlopen(req).read()

            except urllib2.HTTPError, err:

                req = urllib.request.Request(url+param, None, headers)

                connection = urllib.request.urlopen(req).read()

            except urllib.error.HTTPError as err:

                raise ValueError(err.read())

            result = namedtuple("result", "raw json")

            return result(raw=connection, json=json.loads(connection))

    @@ -117,22 +117,22 @@ def get(self, what):
  
                return ("Sorry, can't find %s" % error)

        def download(self, location):

            import urllib2

            import urllib.request, urllib.error, urllib.parse

            if not location.endswith(".mp4"):

                location = location + self.get("gfyName") + ".mp4"

            try:

                # added simple User-Ajent string to avoid CloudFlare block this request

                headers = {'User-Agent': 'Mozilla/5.0'}

                req = urllib2.Request(self.get("mp4Url"), None, headers)

                file = urllib2.urlopen(req)

                req = urllib.request.Request(self.get("mp4Url"), None, headers)

                file = urllib.request.urlopen(req)

                # make sure that the status code is 200, and the content type is mp4

                if int(file.code) is not 200 or file.headers["content-type"] != "video/mp4":

                    raise ValueError("Problem downlading the file. Status code is %s or the content-type is not right %s"

                        % (file.code, file.headers["content-type"]))

                data = file.read()

                with open(location, "wb") as mp4:

                    mp4.write(data)

            except urllib2.HTTPError, err:

            except urllib.error.HTTPError as err:

                raise ValueError(err.read())

        def formated(self, ignoreNull=False):

redditdownload/img_scrap_stuff.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -10,11 +10,11 @@
  
    import re

    import json

    import logging

    import urlparse

    import urllib.parse

    import traceback

    from PIL import Image

    from cStringIO import StringIO

    from io import StringIO

    import lxml

    import html5lib  # Heavily recommended for bs4 (apparently)

    import bs4

    @@ -52,7 +52,7 @@ def indexall_re(topstr, substr_re):
  
    def walker(text, opening='{', closing='}'):

        """ A near-useless experiment that was intended for `get_all_objects` """

        stack = []

        for pos in xrange(len(text)):

        for pos in range(len(text)):

            if text[pos:pos + len(opening)] == opening:

                stack.append(pos)

                continue

    @@ -88,7 +88,7 @@ def get_all_objects(text, beginning=r'{', debug=False):
  
        """

        def _dbg_actual(st, *ar):

            print "D: ", st % ar

            print("D: ", st % ar)

        _dbg = _dbg_actual if debug else (lambda *ar: None)

    @@ -106,9 +106,9 @@ def __getitem__(self, key):
  
        class TheLoader(yaml.SafeLoader):

            ESCAPE_REPLACEMENTS = ddd(yaml.SafeLoader.ESCAPE_REPLACEMENTS)

        from cStringIO import StringIO

        from io import StringIO

        # optimised slicing

        if isinstance(text, unicode):

        if isinstance(text, str):

            _dbg("encoding")

            text = text.encode('utf-8')

        _dbg("Length: %r", len(text))

    @@ -214,13 +214,13 @@ def get_get_get(url, **kwa):
  
    def get_get(*ar, **kwa):

        retries = kwa.pop('_xretries', 5)

        for retry in xrange(retries):

        for retry in range(retries):

            try:

                return get_get_get(*ar, **kwa)

            except Exception as exc:

                traceback.print_exc()

                ee = exc

                print "On retry #%r   (%s)" % (retry, repr(exc)[:30])

                print("On retry #%r   (%s)" % (retry, repr(exc)[:30]))

        raise GetError(ee)

    @@ -244,7 +244,7 @@ def get(url, cache_file=None, req_params=None, bs=True, response=False, undecode
  
                for chunk in resp.iter_content(chunk_size=16384):

                    data += chunk

                    if len(data) > _max_len:

                        print "Too large"

                        print("Too large")

                        break

                data = bytes(data)  ## Have to, alas.

                data_bytes = data

    @@ -274,7 +274,7 @@ def _filter(l):
  
    def _url_abs(l, base_url):

        return (urlparse.urljoin(base_url, v) for v in l)

        return (urllib.parse.urljoin(base_url, v) for v in l)

    def _preprocess_bs_links(bs, links):

    @@ -413,7 +413,7 @@ def _pp(lst):
  
                   for val in lst

                   if val.startswith('http') or val.startswith('/')]

            # (urljoin should be done already though)

            return [urlparse.urljoin(url, val) for val in res]

            return [urllib.parse.urljoin(url, val) for val in res]

        imgs, links = bs2img(bs), bs2lnk(bs)

        to_check = imgs + links

redditdownload/reddit.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -2,8 +2,9 @@
  
    """Return list of items from a sub-reddit of reddit.com."""

    import sys

    import HTMLParser

    from urllib2 import urlopen, Request, HTTPError

    import html.parser

    from urllib.request import urlopen, Request

    from urllib.error import HTTPError

    from json import JSONDecoder

    @@ -33,15 +34,15 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
  
            if '/m/' not in subreddit:

                warning = ('That doesn\'t look like a multireddit. Are you sure'

                           'you need that multireddit flag?')

                print warning

                print(warning)

                sys.exit(1)

            url = 'http://www.reddit.com/user/%s.json' % subreddit

        if not multireddit:

            if '/m/' in subreddit:

                warning = ('It looks like you are trying to fetch a multireddit. \n'

                           'Check the multireddit flag. '

                           'Call --help for more info')

                print warning

                print (warning)

                sys.exit(1)

            # no sorting needed

            if reddit_sort is None:

    @@ -95,7 +96,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
  
        try:

            req = Request(url, headers=hdr)

            json = urlopen(req).read()

            json = urlopen(req).read().decode('utf-8')

            data = JSONDecoder().decode(json)

            if isinstance(data, dict):

                items = [x['data'] for x in data['data']['children']]

    @@ -119,7 +120,7 @@ def getitems(subreddit, multireddit=False, previd='', reddit_sort=None):
  
        # returns `url` values html-escaped, whereas we normally need them

        # in the way they are meant to be downloaded (i.e. urlquoted at

        # most).

        htmlparser = HTMLParser.HTMLParser()

        htmlparser = html.parser.HTMLParser()

        for item in items:

            if item.get('url'):

                item['url'] = htmlparser.unescape(item['url'])

redditdownload/redditdownload.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,23 +1,22 @@
  
    #!/usr/bin/env python2

    """Download images from a reddit.com subreddit."""

    from __future__ import print_function

    import os

    import re

    import StringIO

    import io

    import sys

    import logging

    from urllib2 import urlopen, HTTPError, URLError

    from httplib import InvalidURL

    from urllib.request import urlopen, HTTPError, URLError

    from http.client import InvalidURL

    from argparse import ArgumentParser

    from os.path import (

        exists as pathexists, join as pathjoin, basename as pathbasename,

        splitext as pathsplitext)

    from os import mkdir, getcwd

    import time

    from .gfycat import gfycat

    from .reddit import getitems

    from .deviantart import process_deviant_url

    @@ -29,7 +28,7 @@ def request(url, *ar, **kwa):
  
        _retries = kwa.pop('_retries', 4)

        _retry_pause = kwa.pop('_retry_pause', 0)

        res = None

        for _try in xrange(_retries):

        for _try in range(_retries):

            try:

                res = urlopen(url, *ar, **kwa)

            except Exception as exc:

    @@ -83,7 +82,7 @@ def extract_imgur_album_urls(album_url):
  
        match = re.compile(r'\"hash\":\"(.[^\"]*)\",\"title\"')

        items = []

        memfile = StringIO.StringIO(filedata)

        memfile = io.StringIO.StringIO(filedata)

        for line in memfile.readlines():

            results = re.findall(match, line)

    @@ -130,7 +129,7 @@ def download_from_url(url, dest_file):
  
            raise HTTPError(actual_url, 404, "Imgur suggests the image was removed", None, None)

        # Work out file type either from the response or the url.

        if 'content-type' in info.keys():

        if 'content-type' in list(info.keys()):

            filetype = info['content-type']

        elif url.endswith('.jpg') or url.endswith('.jpeg'):

            filetype = 'image/jpeg'

    @@ -232,7 +231,7 @@ def slugify(value):
  
        # with some modification

        import unicodedata

        value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')

        value = unicode(re.sub(r'[^\w\s-]', '', value).strip())

        value = str(re.sub(r'[^\w\s-]', '', value).strip())

        # value = re.sub(r'[-\s]+', '-', value) # not replacing space with hypen

        return value

redditdownload/scrap_wrongies.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -3,7 +3,7 @@
  
    """ Scrap stuff from "wrong data type" pages of RedditImageGrab """

    try:

        from redditdownload import _WRONGDATA_LOGFILE

        from .redditdownload import _WRONGDATA_LOGFILE

    except ImportError:

        _WRONGDATA_LOGFILE = '.wrong_type_pages.jsl'

    @@ -30,8 +30,8 @@
  
    from atomicfile import AtomicFile

    import magic

    import img_scrap_stuff

    from img_scrap_stuff import GetError

    from . import img_scrap_stuff

    from .img_scrap_stuff import GetError

    _log = logging.getLogger(__name__)

    @@ -108,7 +108,7 @@ def consecutive_filename(filename):
  
            filebase, fileext = fileparts[0], None

        else:

            filebase, fileext = fileparts

        for i in xrange(1, 9000):

        for i in range(1, 9000):

            filetry = '%s__%02d' % (filebase, i)

            if fileext is not None:

                filetry = '%s.%s' % (filetry, fileext)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Python v3 update #82

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Python v3 update #82

Are you sure you want to change the base?

Uh oh!

Python v3 update #82

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing