diff --git a/.gitignore b/.gitignore index 8dbb815..b4c34da 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ # repository level. *.egg-info +# Editor-specific files. +.idea/ diff --git a/tests/extras_json_test.py b/tests/extras_json_test.py index 917848a..cb80d85 100644 --- a/tests/extras_json_test.py +++ b/tests/extras_json_test.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -from webapp2_extras import json + +from webapp2_extras import escape as json import test_base diff --git a/tests/extras_security_test.py b/tests/extras_security_test.py index 9a0a41c..7d37784 100644 --- a/tests/extras_security_test.py +++ b/tests/extras_security_test.py @@ -13,19 +13,21 @@ def test_create_token(self): self.assertRaises(ValueError, security.create_token, 0) self.assertRaises(ValueError, security.create_token, -1) - token = security.create_token(16) + token = security.create_token(16, 16) self.assertTrue(re.match(r'^[a-f0-9]{4}$', token) is not None) - token = security.create_token(32) + token = security.create_token(16, 10) + self.assertTrue(int(token, 10) >= 0) + + token = security.create_token(32, 16) self.assertTrue(re.match(r'^[a-f0-9]{8}$', token) is not None) - token = security.create_token(64) + token = security.create_token(64, 16) self.assertTrue(re.match(r'^[a-f0-9]{16}$', token) is not None) - token = security.create_token(128) + token = security.create_token(128, 16) self.assertTrue(re.match(r'^[a-f0-9]{32}$', token) is not None) - token = security.create_token(16, True) def test_create_check_password_hash(self): self.assertRaises(TypeError, security.create_password_hash, 'foo', diff --git a/tests/misc_test.py b/tests/misc_test.py index 5390429..15a7469 100644 --- a/tests/misc_test.py +++ b/tests/misc_test.py @@ -64,10 +64,10 @@ def test_import_string(self): self.assertRaises(AttributeError, webapp2.import_string, 'webob.dfasfasdfdsfsd') def test_to_utf8(self): - res = webapp2._to_utf8('ábcdéf'.decode('utf-8')) + res = webapp2.types.unicode_to_utf8('ábcdéf'.decode('utf-8')) self.assertEqual(isinstance(res, str), True) - res = webapp2._to_utf8('abcdef') + res = webapp2.types.unicode_to_utf8('abcdef') self.assertEqual(isinstance(res, str), True) ''' diff --git a/webapp2.py b/webapp2/__init__.py old mode 100755 new mode 100644 similarity index 97% rename from webapp2.py rename to webapp2/__init__.py index bef33cd..3b4ec4d --- a/webapp2.py +++ b/webapp2/__init__.py @@ -14,7 +14,13 @@ import logging import re import urllib -import urlparse + +try: + # Python 3. + from urllib.parse import urljoin, urlunsplit +except ImportError: + # Python 2.x + from urlparse import urljoin, urlunsplit import webob from webob import exc @@ -44,6 +50,10 @@ def _run(self, app): run_wsgi_app = run_bare_wsgi_app = classmethod(_run) +from webapp2.types import \ + bytes, is_bytes, is_bytes_or_unicode, bytes_to_unicode, \ + is_unicode, is_list, is_tuple, is_dict, unicode_to_utf8, to_unicode_if_bytes, unicode_string + __version_info__ = ('1', '8', '1') __version__ = '.'.join(__version_info__) @@ -234,11 +244,11 @@ def blank(cls, path, environ=None, base_url=None, environ['REQUEST_METHOD'] = 'POST' if hasattr(data, 'items'): data = data.items() - if not isinstance(data, str): + if not is_bytes(data): data = urllib.urlencode(data) environ['wsgi.input'] = StringIO(data) environ['webob.is_body_seekable'] = True - environ['CONTENT_LENGTH'] = str(len(data)) + environ['CONTENT_LENGTH'] = bytes(len(data)) environ['CONTENT_TYPE'] = 'application/x-www-form-urlencoded' base = super(Request, cls).blank(path, environ=environ, @@ -326,10 +336,12 @@ def write(self, text): """Appends a text to the response body.""" # webapp uses StringIO as Response.out, so we need to convert anything # that is not str or unicode to string to keep same behavior. - if not isinstance(text, basestring): - text = unicode(text) + #if not is_bytes_or_unicode(text): + # text = bytes(text) + if not is_bytes_or_unicode(text): + text = unicode_string(text) - if isinstance(text, unicode) and not self.charset: + if is_unicode(text) and not self.charset: self.charset = self.default_charset super(Response, self).write(text) @@ -341,11 +353,11 @@ def _set_status(self, value): if isinstance(value, (int, long)): code = int(value) else: - if isinstance(value, unicode): + if is_unicode(value): # Status messages have to be ASCII safe, so this is OK. - value = str(value) + value = bytes(value) - if not isinstance(value, str): + if not is_bytes(value): raise TypeError( 'You must set status to a string or integer (not %s)' % type(value)) @@ -397,7 +409,7 @@ def _get_headers(self): def _set_headers(self, value): if hasattr(value, 'items'): value = value.items() - elif not isinstance(value, list): + elif not is_list(value): raise TypeError('Response headers must be a list or dictionary.') self.headerlist = value @@ -424,7 +436,7 @@ def wsgi_write(self, start_response): if (self.headers.get('Cache-Control') == 'no-cache' and not self.headers.get('Expires')): self.headers['Expires'] = 'Fri, 01 Jan 1990 00:00:00 GMT' - self.headers['Content-Length'] = str(len(self.body)) + self.headers['Content-Length'] = bytes(len(self.body)) write = start_response(self.status, self.headerlist) write(self.body) @@ -895,7 +907,7 @@ def __init__(self, template, handler=None, name=None, defaults=None, self.defaults = defaults or {} self.methods = methods self.schemes = schemes - if isinstance(handler, basestring) and ':' in handler: + if is_bytes_or_unicode(handler) and ':' in handler: if handler_method: raise ValueError( "If handler_method is defined in a Route, handler " @@ -974,8 +986,8 @@ def _build(self, args, kwargs): raise KeyError('Missing argument "%s" to build URI.' % \ name.strip('_')) - if not isinstance(value, basestring): - value = str(value) + if not is_bytes_or_unicode(value): + value = bytes(value) if not regex.match(value): raise ValueError('URI buiding error: Value "%s" is not ' @@ -1092,7 +1104,7 @@ def add(self, route): A :class:`Route` instance or, for compatibility with webapp, a tuple ``(regex, handler_class)``. """ - if isinstance(route, tuple): + if is_tuple(route): # Exceptional compatibility case: route compatible with webapp. route = self.route_class(*route) @@ -1224,7 +1236,7 @@ def default_dispatcher(self, request, response): if route.handler_adapter is None: handler = route.handler - if isinstance(handler, basestring): + if is_bytes_or_unicode(handler): if handler not in self.handlers: self.handlers[handler] = handler = import_string(handler) else: @@ -1527,7 +1539,7 @@ def handle_exception(self, request, response, e): handler = self.error_handlers.get(code) if handler: - if isinstance(handler, basestring): + if is_bytes_or_unicode(handler): self.error_handlers[code] = handler = import_string(handler) return handler(request, response, e) @@ -1644,7 +1656,7 @@ def redirect(uri, permanent=False, abort=False, code=None, body=None, """ if uri.startswith(('.', '/')): request = request or get_request() - uri = str(urlparse.urljoin(request.url, uri)) + uri = bytes(urljoin(request.url, uri)) if code is None: if permanent: @@ -1724,7 +1736,7 @@ def import_string(import_name, silent=False): :returns: The imported object. """ - import_name = _to_utf8(import_name) + import_name = unicode_to_utf8(import_name) try: if '.' in import_name: module, obj = import_name.rsplit('.', 1) @@ -1760,19 +1772,19 @@ def _urlunsplit(scheme=None, netloc=None, path=None, query=None, netloc = None if path: - path = urllib.quote(_to_utf8(path)) + path = urllib.quote(unicode_to_utf8(path)) - if query and not isinstance(query, basestring): - if isinstance(query, dict): + if query and not is_bytes_or_unicode(query): + if is_dict(query): query = query.iteritems() # Sort args: commonly needed to build signatures for services. query = urllib.urlencode(sorted(query)) if fragment: - fragment = urllib.quote(_to_utf8(fragment)) + fragment = urllib.quote(unicode_to_utf8(fragment)) - return urlparse.urlunsplit((scheme, netloc, path, query, fragment)) + return urlunsplit((scheme, netloc, path, query, fragment)) def _get_handler_methods(handler): @@ -1796,14 +1808,6 @@ def _normalize_handler_method(method): return method.lower().replace('-', '_') -def _to_utf8(value): - """Encodes a unicode value to UTF-8 if not yet encoded.""" - if isinstance(value, str): - return value - - return value.encode('utf-8') - - def _parse_route_template(template, default_sufix=''): """Lazy route template parser.""" variables = {} diff --git a/webapp2/types.py b/webapp2/types.py new file mode 100644 index 0000000..fa67a3f --- /dev/null +++ b/webapp2/types.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Type conversion utilities. +# +# Copyright (C) 2009 Facebook +# Copyright (C) 2011 tipfy.org +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +:module: webapp2.types +:synopsis: Common portable Python type conversion and detection. + +``bytes``, ``str``, ``unicode``, and ``basestring`` mean different +things to Python 2.5, 2.6, and 3.x. + +Python 2.5 +* ``bytes`` is not available. +* ``str`` is a byte string. +* ``unicode`` converts to unicode string. +* ``basestring`` exists. + +Python 2.6 +* ``bytes`` is available and maps to str +* ``str`` is a byte string. +* ``unicode`` converts to unicode string +* ``basestring`` exists. + +Python 3.x +* ``bytes`` is available and does not map to ``str``. +* ``str`` maps to the earlier ``unicode``, but ``unicode`` has been removed. +* ``basestring`` has been removed. +* ``unicode`` has been removed + +This module adds portable support for all three versions +of Python. It introduces these portable types that you can use +in your code: + +* ``bytes`` where you need byte strings. +* ``unicode_string`` where you need unicode strings +* a few other utility functions that hide all the + complications behind type checking therefore cleaning + up the code base. + +Type detection +-------------- +.. autofunction:: is_sequence +.. autofunction:: is_list +.. autofunction:: is_tuple +.. autofunction:: is_dict +.. autofunction:: is_unicode +.. autofunction:: is_bytes +.. autofunction:: is_bytes_or_unicode +.. autofunction:: unicode_to_utf8 +.. autofunction:: bytes_to_unicode +.. autofunction:: to_utf8_if_unicode +.. autofunction:: to_unicode_if_bytes +.. autofunction:: to_unicode_recursively +""" + +# Essentially, avoid using "str", "basestring", and "unicode". +# Use these types instead. They're portable between Python versions. + + +try: + # Python 2.6 + + bytes = bytes +except Exception: + # Python 2.5 does not have a built in bytes type. + bytes = str + +# Python 3.x also removes "basestring" and "unicode" +try: + # Not Python3 + unicode_string = unicode +except Exception: + # Python3. + unicode_string = str + basestring = (str, bytes) + + +def is_sequence(value): + """ + Determines whether the given value is a sequence. + + :param value: + The value to test. + :returns: + ``True`` if the value is a sequence; ``False`` otherwise. + """ + try: + list(value) + return True + except TypeError, exception: + assert "is not iterable" in bytes(exception) + return False + + +def is_list(value): + """ + Determines whether the given value is a list. + + :param value: + The value to test. + :returns: + ``True`` if the value is a list instance; ``False`` otherwise. + """ + return isinstance(value, list) + + +def is_tuple(value): + """ + Determines whether the given value is a tuple. + + :param value: + The value to test. + :returns: + ``True`` if the value is a tuple instance; ``False`` otherwise. + """ + return isinstance(value, tuple) + + +def is_dict(value): + """ + Determines whether the given value is a dict. + + :param value: + The value to test. + :returns: + ``True`` if the value is a dict instance; ``False`` otherwise. + """ + return isinstance(value, dict) + + +def is_unicode(value): + """ + Determines whether the given value is a Unicode string. + + :param value: + The value to test. + :returns: + ``True`` if ``value`` is a Unicode string; ``False`` otherwise. + """ + return isinstance(value, unicode_string) + + +def is_bytes(value): + """ + Determines whether the given value is a byte string. + + :param value: + The value to test. + :returns: + ``True`` if ``value`` is a byte string; ``False`` otherwise. + """ + return isinstance(value, bytes) + + +def is_bytes_or_unicode(value): + """ + Determines whether the given value is an instance of a string irrespective + of whether it is a byte string or a Unicode string. + + :param value: + The value to test. + :returns: + ``True`` if ``value`` is a string; ``False`` otherwise. + """ + return isinstance(value, basestring) + + +def unicode_to_utf8(value): + """ + Converts a string argument to a UTF-8 encoded byte string if it is a + Unicode string. + + :param value: + If already a byte string or None, it is returned unchanged. + Otherwise it must be a Unicode string and is encoded as UTF-8. + """ + if value is None or is_bytes(value): + return value + assert is_unicode(value) + return value.encode("utf-8") + + +def bytes_to_unicode(value, encoding="utf-8"): + """ + Converts bytes to a Unicode string decoding it according to the encoding + specified. + + :param value: + If already a Unicode string or None, it is returned unchanged. + Otherwise it must be a byte string. + :param encoding: + The encoding used to decode bytes. Defaults to UTF-8 + """ + if value is None or is_unicode(value): + return value + assert is_bytes(value) + return value.decode(encoding) + + +def to_utf8_if_unicode(value): + """ + Converts an argument to a UTF-8 encoded byte string if the argument + is a Unicode string. + + :param value: + The value that will be UTF-8 encoded if it is a string. + :returns: + UTF-8 encoded byte string if the argument is a Unicode string; otherwise + the value is returned unchanged. + """ + return unicode_to_utf8(value) if is_unicode(value) else value + + +def to_unicode_if_bytes(value, encoding="utf-8"): + """ + Converts an argument to Unicode string if the argument is a byte string + decoding it as specified by the encoding. + + :param value: + The value that will be converted to a Unicode string. + :param encoding: + The encoding used to decode bytes. Defaults to UTF-8. + :returns: + Unicode string if the argument is a byte string. Otherwise the value + is returned unchanged. + """ + return bytes_to_unicode(value, encoding) if is_bytes(value) else value + + +def to_unicode_recursively(obj): + """ + Walks a simple data structure, converting byte strings to unicode. + + Supports lists, tuples, and dictionaries. + + :param obj: + The object to walk. + :returns: + obj with all byte strings converted into Unicode. + """ + if isinstance(obj, dict): + return dict((to_unicode_recursively(k), + to_unicode_recursively(v)) for (k, v) in obj.iteritems()) + elif isinstance(obj, list): + return list(to_unicode_recursively(i) for i in obj) + elif isinstance(obj, tuple): + return tuple(to_unicode_recursively(i) for i in obj) + elif is_bytes(obj): + return bytes_to_unicode(obj) + else: + return obj diff --git a/webapp2_extras/json.py b/webapp2_extras/escape.py similarity index 77% rename from webapp2_extras/json.py rename to webapp2_extras/escape.py index 33256bb..d93a899 100644 --- a/webapp2_extras/json.py +++ b/webapp2_extras/escape.py @@ -14,14 +14,30 @@ try: # Preference for installed library with updated fixes. import simplejson as json + json_loads = json.loads + json_dumps = json.dumps except ImportError: # pragma: no cover try: # Standard library module in Python >= 2.6. + # If you name this module as "json", this import + # will fail *always*. Therefore, this module has been renamed + # to "escape". import json - except ImportError: + + # Stray modules from other libraries named "json" can cause + # "loads" or "dumps" attributes missing errors. For instance, + # all the JSON tests are failing on my machine with Python 2.7. + # We use the Python built-in json module only if it has the + # attributes we need. See tornado commit log. + assert hasattr(json, "loads") and hasattr(json, "dumps") + json_loads = json.loads + json_dumps = json.dumps + except Exception: try: # Google App Engine. from django.utils import simplejson as json + json_loads = json.loads + json_dumps = json.dumps except ImportError: raise RuntimeError( 'A JSON parser is required, e.g., simplejson at ' @@ -49,7 +65,7 @@ def encode(value, *args, **kwargs): # although python's standard library does not, so we do it here. # http://stackoverflow.com/questions/1580647/json-why-are-forward-slashes-escaped kwargs.setdefault('separators', (',', ':')) - return json.dumps(value, *args, **kwargs).replace("