From d66bb3931e3d50d6421614c8803b891de378fe37 Mon Sep 17 00:00:00 2001
From: Joe Gordon <jogo@pinterest.com>
Date: Thu, 8 Feb 2018 10:53:02 -0800
Subject: [PATCH 1/6] Add tox and update .gitignore

In order to make it easy to test against python2 and python3 use tox.

Also update .gitignore to cover related files
---
 .gitignore | 5 +++--
 tox.ini    | 6 ++++++
 2 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 tox.ini

diff --git a/.gitignore b/.gitignore
index b9e02ae..1b692ae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@ pip-log.txt
 build
 dist
 MANIFEST
-
-__pycache__/
 *.pyc
+.pytest_cache/
+.tox/
+__pycache__/
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..f53598a
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,6 @@
+[tox]
+envlist=py27,py3
+
+[testenv]
+deps = pytest
+commands = pytest test_urlnorm.py

From 36e0609f6717312b2eba2b87a1793f93d9cb1ffb Mon Sep 17 00:00:00 2001
From: Joe Gordon <jogo@pinterest.com>
Date: Thu, 8 Feb 2018 16:07:44 -0800
Subject: [PATCH 2/6] First pass at python3 support

* use print from future
* new notation for longs
---
 test_urlnorm.py | 3 ++-
 urlnorm.py      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/test_urlnorm.py b/test_urlnorm.py
index 99c25a5..358a9d6 100644
--- a/test_urlnorm.py
+++ b/test_urlnorm.py
@@ -1,6 +1,7 @@
 """
 this is a py.test test file
 """
+from __future__ import print_function
 import urlnorm
 from urlnorm import _unicode
 
@@ -107,7 +108,7 @@ def pytest_generate_tests(metafunc):
 def test_invalid_urls(url):
     try:
         output = urlnorm.norm(url)
-        print '%r' % output
+        print('%r' % output)
     except urlnorm.InvalidUrl:
         return
     assert 1 == 0, "this should have raised an InvalidUrl exception"
diff --git a/urlnorm.py b/urlnorm.py
index e8ce93d..fc18daa 100644
--- a/urlnorm.py
+++ b/urlnorm.py
@@ -193,7 +193,7 @@ def norm_path(scheme, path):
         return '/'
     return path
 
-MAX_IP=0xffffffffL
+MAX_IP=0xffffffff
 def int2ip(ipnum):
     assert isinstance(ipnum, int)
     if MAX_IP < ipnum or ipnum < 0:

From 947ffd32d1a2267a88d43da320736783981758d4 Mon Sep 17 00:00:00 2001
From: Joe Gordon <jogo@pinterest.com>
Date: Thu, 8 Feb 2018 13:13:47 -0800
Subject: [PATCH 3/6] Manually update for python3 compatability

* Stop using decode('utf8') in tests
* str.lower() instead of string.lower(str)
---
 test_urlnorm.py | 10 +++++-----
 urlnorm.py      |  5 ++---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/test_urlnorm.py b/test_urlnorm.py
index 358a9d6..b96ff46 100644
--- a/test_urlnorm.py
+++ b/test_urlnorm.py
@@ -21,7 +21,7 @@ def pytest_generate_tests(metafunc):
             'http://USER:pass@www.Example.COM/foo/bar': 'http://USER:pass@www.example.com/foo/bar',
             'http://www.example.com./':      'http://www.example.com/',
             'http://test.example/?a=%26&b=1': 'http://test.example/?a=%26&b=1', # should not un-encode the & that is part of a parameter value
-            'http://test.example/?a=%e3%82%82%26': 'http://test.example/?a=\xe3\x82\x82%26'.decode('utf8'), # should return a unicode character
+            'http://test.example/?a=%e3%82%82%26': u'http://test.example/?a=\u3082%26', # should return a unicode character
             # note: this breaks the internet for parameters that are positional (stupid nextel) and/or don't have an = sign
             # 'http://test.example/?a=1&b=2&a=3': 'http://test.example/?a=1&a=3&b=2', # should be in sorted/grouped order
 
@@ -30,12 +30,12 @@ def pytest_generate_tests(metafunc):
             'http://test.example?':       'http://test.example/', # with trailing /
             'http://a.COM/path/?b&a' : 'http://a.com/path/?b&a',
             # test utf8 and unicode
-            u'http://XBLA\u306eXbox.com': 'http://xbla\xe3\x81\xaexbox.com/'.decode('utf8'),
-            u'http://XBLA\u306eXbox.com'.encode('utf8'): 'http://xbla\xe3\x81\xaexbox.com/'.decode('utf8'),
-            u'http://XBLA\u306eXbox.com': 'http://xbla\xe3\x81\xaexbox.com/'.decode('utf8'),
+            u'http://XBLA\u306eXbox.com': u'http://xbla\u306exbox.com/',
+            u'http://XBLA\u306eXbox.com'.encode('utf8'): u'http://xbla\u306exbox.com/',
+            u'http://XBLA\u306eXbox.com': u'http://xbla\u306exbox.com/',
             # test idna + utf8 domain
             # u'http://xn--q-bga.XBLA\u306eXbox.com'.encode('utf8'): 'http://q\xc3\xa9.xbla\xe3\x81\xaexbox.com'.decode('utf8'),
-            'http://ja.wikipedia.org/wiki/%E3%82%AD%E3%83%A3%E3%82%BF%E3%83%94%E3%83%A9%E3%83%BC%E3%82%B8%E3%83%A3%E3%83%91%E3%83%B3': 'http://ja.wikipedia.org/wiki/\xe3\x82\xad\xe3\x83\xa3\xe3\x82\xbf\xe3\x83\x94\xe3\x83\xa9\xe3\x83\xbc\xe3\x82\xb8\xe3\x83\xa3\xe3\x83\x91\xe3\x83\xb3'.decode('utf8'),
+            'http://ja.wikipedia.org/wiki/%E3%82%AD%E3%83%A3%E3%82%BF%E3%83%94%E3%83%A9%E3%83%BC%E3%82%B8%E3%83%A3%E3%83%91%E3%83%B3': u'http://ja.wikipedia.org/wiki/\u30ad\u30e3\u30bf\u30d4\u30e9\u30fc\u30b8\u30e3\u30d1\u30f3',
             'http://test.example/\xe3\x82\xad': 'http://test.example/\xe3\x82\xad',
 
             # check that %23 (#) is not escaped where it shouldn't be
diff --git a/urlnorm.py b/urlnorm.py
index fc18daa..fdb7817 100644
--- a/urlnorm.py
+++ b/urlnorm.py
@@ -66,7 +66,6 @@
 __version__ = "1.1.2.pinterest2"
 
 from urlparse import urlparse, urlunparse
-from string import lower
 import re
 
 class InvalidUrl(Exception):
@@ -151,7 +150,7 @@ def norm(url):
 
 def norm_tuple(scheme, authority, path, parameters, query, fragment):
     """given individual url components, return its normalized form"""
-    scheme = lower(scheme)
+    scheme = scheme.lower()
     if not scheme:
         raise InvalidUrl('missing URL scheme')
     authority = norm_netloc(scheme, authority)
@@ -221,7 +220,7 @@ def norm_netloc(scheme, netloc):
     if host[-1] == '.':
         host = host[:-1]
 
-    authority = lower(host)
+    authority = host.lower()
     if 'xn--' in authority:
         subdomains = [_idn(subdomain) for subdomain in authority.split('.')]
         authority = '.'.join(subdomains)

From 42aee5e4ece668d3804fad5a1e904173fb0f0737 Mon Sep 17 00:00:00 2001
From: Joe Gordon <jogo@pinterest.com>
Date: Thu, 8 Feb 2018 15:41:27 -0800
Subject: [PATCH 4/6] Add python3 support via modernize

Add python3 support using six.
---
 setup.py        |  1 +
 test_urlnorm.py |  2 ++
 urlnorm.py      | 27 ++++++++++++++++-----------
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/setup.py b/setup.py
index 5d0672d..645087d 100644
--- a/setup.py
+++ b/setup.py
@@ -9,6 +9,7 @@
         description="Normalize a URL to a standard unicode encoding",
         py_modules=['urlnorm'],
         license='MIT License',
+        install_requires=['six'],
         author='Jehiah Czebotar',
         author_email='jehiah@gmail.com',
         url='http://github.com/jehiah/urlnorm',
diff --git a/test_urlnorm.py b/test_urlnorm.py
index b96ff46..e4baff3 100644
--- a/test_urlnorm.py
+++ b/test_urlnorm.py
@@ -1,3 +1,4 @@
+# -*- coding: utf8 -*-
 """
 this is a py.test test file
 """
@@ -43,6 +44,7 @@ def pytest_generate_tests(metafunc):
             # check that %20 or %25 is not unescaped to ' ' or %
             'http://test.example/%25/?p=%20val%20%25' : 'http://test.example/%25/?p=%20val%20%25',
             "http://test.domain/I%C3%B1t%C3%ABrn%C3%A2ti%C3%B4n%EF%BF%BDliz%C3%A6ti%C3%B8n" : "http://test.domain/I\xc3\xb1t\xc3\xabrn\xc3\xa2ti\xc3\xb4n\xef\xbf\xbdliz\xc3\xa6ti\xc3\xb8n",
+            "http://test.domain/I%C3%B1t%C3%ABrn%C3%A2ti%C3%B4n%EF%BF%BDliz%C3%A6ti%C3%B8n" : u"http://test.domain/Iñtërnâtiôn�lizætiøn",
             # check that spaces are collated to '+'
             "http://test.example/path/with a%20space+/" : "http://test.example/path/with%20a%20space+/",
             "http://[2001:db8:1f70::999:de8:7648:6e8]/test" : "http://[2001:db8:1f70::999:de8:7648:6e8]/test", #ipv6 address
diff --git a/urlnorm.py b/urlnorm.py
index fdb7817..5c9ffa5 100644
--- a/urlnorm.py
+++ b/urlnorm.py
@@ -39,6 +39,10 @@
      - more fine-grained authority parsing and normalisation
 """
 
+from __future__ import absolute_import
+from six import unichr
+import six
+from six.moves import range
 __license__ = """
 Copyright (c) 1999-2002 Mark Nottingham <mnot@pobox.com>
 Copyright (c) 2010 Jehiah Czebotar <jehiah@gmail.com>
@@ -65,7 +69,7 @@
 # also update in setup.py
 __version__ = "1.1.2.pinterest2"
 
-from urlparse import urlparse, urlunparse
+from six.moves.urllib.parse import urlparse, urlunparse, unquote
 import re
 
 class InvalidUrl(Exception):
@@ -104,8 +108,8 @@ class InvalidUrl(Exception):
 qs_unsafe_list = ' ?&=+%#'
 fragment_unsafe_list = ' +%#'
 path_unsafe_list = ' /?;%+#'
-_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
-_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+_hextochr = dict((b'%02x' % i, six.int2byte(i)) for i in range(256))
+_hextochr.update((b'%02X' % i, six.int2byte(i)) for i in range(256))
 
 def unquote_path(s):
     return unquote_safe(s, path_unsafe_list)
@@ -123,22 +127,23 @@ def unquote_safe(s, unsafe_list):
     """unquote percent escaped string except for percent escape sequences that are in unsafe_list"""
     # note: this build utf8 raw strings ,then does a .decode('utf8') at the end.
     # as a result it's doing .encode('utf8') on each block of the string as it's processed.
-    res = _utf8(s).split('%')
-    for i in xrange(1, len(res)):
+    unsafe_list = [_utf8(i) for i in unsafe_list]
+    res = _utf8(s).split(b'%')
+    for i in range(1, len(res)):
         item = res[i]
         try:
             raw_chr = _hextochr[item[:2]]
             if raw_chr in unsafe_list or ord(raw_chr) < 20:
                 # leave it unescaped (but uppercase the percent escape)
-                res[i] = '%' + item[:2].upper() + item[2:]
+                res[i] = b'%' + item[:2].upper() + item[2:]
             else:
                 res[i] = raw_chr + item[2:]
         except KeyError:
-            res[i] = '%' + item
+            res[i] = b'%' + item
         except UnicodeDecodeError:
             # note: i'm not sure what this does
             res[i] = unichr(int(item[:2], 16)) + item[2:]
-    o = "".join(res)
+    o = b"".join(res)
     return _unicode(o)
 
 def norm(url):
@@ -242,14 +247,14 @@ def _idn(subdomain):
 
 
 def _utf8(value):
-    if isinstance(value, unicode):
+    if isinstance(value, six.text_type):
         return value.encode("utf-8")
     assert isinstance(value, str)
     return value
 
 
 def _unicode(value):
-    if isinstance(value, str):
+    if isinstance(value, six.binary_type):
         return value.decode("utf-8")
-    assert isinstance(value, unicode)
+    assert isinstance(value, six.text_type)
     return value

From c90e8e58a777269173e64f3b1370aa290a15e2ab Mon Sep 17 00:00:00 2001
From: Joe Gordon <jogo@pinterest.com>
Date: Thu, 8 Feb 2018 15:57:16 -0800
Subject: [PATCH 5/6] Add python 3 classifiers

So https://github.com/brettcannon/caniusepython3 will detect urlnorm as
supporting python3.
---
 setup.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/setup.py b/setup.py
index 645087d..2e024f0 100644
--- a/setup.py
+++ b/setup.py
@@ -14,4 +14,10 @@
         author_email='jehiah@gmail.com',
         url='http://github.com/jehiah/urlnorm',
         download_url="http://github.com/downloads/jehiah/urlnorm/urlnorm-%s.tar.gz" % version,
+        classifiers=[
+            'Operating System :: OS Independent',
+            'Programming Language :: Python :: 2.7',
+            'Programming Language :: Python :: 3',
+            'Programming Language :: Python :: 3.6',
+        ],
         )

From 9d76bad48e91fa8ecb9d75f7a9c24b74fa8a9af3 Mon Sep 17 00:00:00 2001
From: Joe Gordon <jogo@pinterest.com>
Date: Thu, 8 Feb 2018 16:13:36 -0800
Subject: [PATCH 6/6] Bump version

new version supports python3
---
 setup.py   | 2 +-
 urlnorm.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 2e024f0..3b9915a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from distutils.core import setup
 
 # also update in urlnorm.py
-version = '1.1.2.pinterest3'
+version = '1.1.2.pinterest4'
 
 setup(name='urlnorm',
         version=version,
diff --git a/urlnorm.py b/urlnorm.py
index 5c9ffa5..fe8b0bd 100644
--- a/urlnorm.py
+++ b/urlnorm.py
@@ -67,7 +67,7 @@
 """
 
 # also update in setup.py
-__version__ = "1.1.2.pinterest2"
+__version__ = "1.1.2.pinterest4"
 
 from six.moves.urllib.parse import urlparse, urlunparse, unquote
 import re