diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..6f13978 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,5 @@ +test: + script: + - set PYTHONPATH=.\source + - py.test test --html=report.html +# - coverage html -d coverage_html \ No newline at end of file diff --git a/source/lucidity/__init__.py b/source/lucidity/__init__.py index 74d1cfe..9df286b 100644 --- a/source/lucidity/__init__.py +++ b/source/lucidity/__init__.py @@ -8,6 +8,7 @@ from ._version import __version__ from .template import Template, Resolver +from .key import Key from .error import ParseError, FormatError, NotFound diff --git a/source/lucidity/key.py b/source/lucidity/key.py new file mode 100644 index 0000000..265cb87 --- /dev/null +++ b/source/lucidity/key.py @@ -0,0 +1,138 @@ +# :coding: utf-8 +import re +import logging + +class Key(object): + '''Key baseClass + used to store and validate values for templates + a dict needs to be provided + {'name': 'shot', + 'regex': r'^([0-9]){3}$', + 'type': int, + 'padding': '%04d' + } + name and type are the main keys that need to be provided + without them the object cannot initialize + types are "str" and "int" + + ''' + def __init__(self, name, type,**kwargs): + super(Key, self).__init__() + + self.__name = name + self.__type = type + self.__value = None + self.__regex = None + self.__padding = None + self.__function = None + self.__abstract = '' + self.__dbEntity = self.__name + self.__dbField = '' + + for key, value in kwargs.items(): + if key == 'regex': + self.__regex = re.compile(value) + if key == 'abstract': + self.__abstract = value + if key == 'function': + self.__function = value + self.__value = value() #call the function once at init to set a value + if key == 'padding': + if re.match(r'(\%)([0-9]{0,2})(d)', value): + self.__padding = value + else: + raise Exception('provided padding {0} is not a valid padding pattern must be like "%04d"'.format(value)) + if key == 'dbEntity': + self.__dbEntity = value + if key == 'dbField': + self.__dbField = value + + @property + def name(self): + return self.__name + + @property + def type(self): + return self.__type + + @property + def abstract(self): + return self.__abstract + + @property + def function(self): + return self.__function + + @property + def padding(self): + return self.__padding + + @property + def regex(self): + return self.__regex + + @property + def dbEntity(self): + return self.__dbEntity + + @property + def dbField(self): + return self.__dbField + + @property + def value(self): + return self.__value + + def setValue(self, value): + if value: + if self.type == int and isinstance(value,int) and self.padding: + ## we can skip the regex check if the incoming value is an int and we do have a padding + self.__value = self.type(value) + return + + if self.regex: + if re.match(self.regex, value): + if self.abstract: + if value == self.abstract: + return + self.__value = self.type(value) + return + elif str(value) == str(self.name): + self.__value = value + return + else: + raise Exception('provided value {0} does not match regex {1} for {2}'.format(value, self.regex.pattern,self.__repr__())) + else: + self.__value = self.type(value) + return + + def __repr__(self): + if self.value: + return ''.format(self.name,str(self)) + else: + return ''.format(self.name) + + def __str__(self): + ''' + used in the format method to fill the keys + ''' + if not self.value and not self.value == 0: + if self.abstract: + return str(self.abstract) + return str(self.name) + if self.type == str: + return str(self.value) + elif self.type == int and self.padding: + return self.padding % self.value + elif self.type == int: + return str(self.value) + elif self.function: + self.__value = self.function() + return str(self.value) + + def __cmp__(self,other): + ''' + compare against name + ''' + return cmp(self.name,other) + \ No newline at end of file diff --git a/source/lucidity/template.py b/source/lucidity/template.py index 1393803..66f49f0 100644 --- a/source/lucidity/template.py +++ b/source/lucidity/template.py @@ -14,21 +14,32 @@ _RegexType = type(re.compile('')) +def OrderedSet(alist): + """ Creates an ordered set of type list + from a list of tuples or other hashable items + """ + oset = [] + for item in alist: + if item not in oset: + oset.append(item) + return oset + class Template(object): '''A template.''' _STRIP_EXPRESSION_REGEX = re.compile(r'{(.+?)(:(\\}|.)+?)}') _PLAIN_PLACEHOLDER_REGEX = re.compile(r'{(.+?)}') _TEMPLATE_REFERENCE_REGEX = re.compile(r'{@(?P.+?)}') - + _OPTIONAL_KEY_REGEX = re.compile(r'(\[.+?\])') + ANCHOR_START, ANCHOR_END, ANCHOR_BOTH = (1, 2, 3) RELAXED, STRICT = (1, 2) - def __init__(self, name, pattern, anchor=ANCHOR_START, - default_placeholder_expression='[\w_.\-]+', - duplicate_placeholder_mode=RELAXED, - template_resolver=None): + def __init__(self, name, pattern, anchor=ANCHOR_BOTH, + default_placeholder_expression='[A-Za-z0-9\-]+', + duplicate_placeholder_mode=STRICT, validateOnInit = False, + template_resolver=None, key_resolver={}): '''Initialise with *name* and *pattern*. *anchor* determines how the pattern is anchored during a parse. A @@ -54,6 +65,7 @@ def __init__(self, name, pattern, anchor=ANCHOR_START, super(Template, self).__init__() self.duplicate_placeholder_mode = duplicate_placeholder_mode self.template_resolver = template_resolver + self.key_resolver = key_resolver self._default_placeholder_expression = default_placeholder_expression self._period_code = '_LPD_' @@ -61,9 +73,11 @@ def __init__(self, name, pattern, anchor=ANCHOR_START, self._name = name self._pattern = pattern self._anchor = anchor + self.__regexes = None ## once recompiled store the regexes here # Check that supplied pattern is valid and able to be compiled. - self._construct_regular_expression(self.pattern) + if validateOnInit: + self._construct_regular_expression(self.pattern) def __repr__(self): '''Return unambiguous representation of template.''' @@ -118,49 +132,83 @@ def parse(self, path): parsable by this template. ''' - # Construct regular expression for expanded pattern. - regex = self._construct_regular_expression(self.expanded_pattern()) - + # Construct a list of regular expression for expanded pattern. + if not self.__regexes: + self.__regexes = self._construct_regular_expression(self.expanded_pattern()) # Parse. parsed = {} - - match = regex.search(path) - if match: - data = {} - for key, value in sorted(match.groupdict().items()): - # Strip number that was added to make group name unique. - key = key[:-3] - - # If strict mode enabled for duplicate placeholders, ensure that - # all duplicate placeholders extract the same value. - if self.duplicate_placeholder_mode == self.STRICT: - if key in parsed: - if parsed[key] != value: - raise lucidity.error.ParseError( - 'Different extracted values for placeholder ' - '{0!r} detected. Values were {1!r} and {2!r}.' - .format(key, parsed[key], value) - ) - else: - parsed[key] = value - - # Expand dot notation keys into nested dictionaries. - target = data - - parts = key.split(self._period_code) - for part in parts[:-1]: - target = target.setdefault(part, {}) - - target[parts[-1]] = value - - return data - + for regex in self.__regexes: + match = regex.search(path) + + if match: + data = {} + for key, value in sorted(match.groupdict().items()): + # Strip number that was added to make group name unique. + key = key[:-3] + + # If strict mode enabled for duplicate placeholders, ensure that + # all duplicate placeholders extract the same value. + if self.duplicate_placeholder_mode == self.STRICT: + if key in parsed: + if parsed[key] != value: + raise lucidity.error.ParseError( + 'Different extracted values for placeholder ' + '{0!r} detected. Values were {1!r} and {2!r}.' + .format(key, parsed[key], value) + ) + else: + if value: + parsed[key] = value + + # Expand dot notation keys into nested dictionaries. + target = data + + parts = key.split(self._period_code) + for part in parts[:-1]: + target = target.setdefault(part, {}) + + target[parts[-1]] = value + + newData=dict() + for key,value in data.items(): + if value != None: + newData[key]=value + return newData + else: raise lucidity.error.ParseError( 'Path {0!r} did not match template pattern.'.format(path) ) - def format(self, data): + def missing(self, data, ignoreOptionals=False): + '''Returns an unsorted set of missing keys + optional keys are ignored/subtracted + ''' + data_keys = set(data.keys()) + if self.key_resolver: + new_data_keys = list() + for key in data_keys: + if key in self.key_resolver: + new_data_keys.append(self.key_resolver.get(key)) + else: + new_data_keys.append(key) + data_keys = new_data_keys + all_key = set(self.keys()).difference(data_keys) + if ignoreOptionals: + return all_key + minus_opt = all_key.difference(self.optional_keys()) + return minus_opt + + def apply_fields(self,data,abstract=False): + ''' + here for convenience + + :param data: dict of fields + :param abstract: if there are lucidity.key objects with an abstract key the formatting will use the abstract definition + ''' + self.format(data, abstract=abstract) + + def format(self, data, abstract=False): '''Return a path formatted by applying *data* to this template. Raise :py:class:`~lucidity.error.FormatError` if *data* does not @@ -171,37 +219,79 @@ def format(self, data): format_specification = self._construct_format_specification( self.expanded_pattern() ) + + #remove all missing optional keys from the format spec + format_specification = re.sub( + self._OPTIONAL_KEY_REGEX, + functools.partial(self._remove_optional_keys, data = data), + format_specification + ) return self._PLAIN_PLACEHOLDER_REGEX.sub( - functools.partial(self._format, data=data), + functools.partial(self._format, data=data,abstract=abstract), format_specification ) - def _format(self, match, data): + def _format(self, match, data, abstract= False): '''Return value from data for *match*.''' + placeholder = match.group(1) parts = placeholder.split('.') - try: value = data for part in parts: value = value[part] - + if part in self.key_resolver: + key = self.key_resolver.get(part) + key.setValue(value) + value = str(key) + if abstract and key.abstract: + value = str(key.abstract) + except (TypeError, KeyError): raise lucidity.error.FormatError( - 'Could not format data {0!r} due to missing key {1!r}.' - .format(data, placeholder) + 'Could not format data {0!r} due to missing key(s) {1!r}.' + .format(data, list(self.missing(data))) ) else: return value def keys(self): - '''Return unique set of placeholders in pattern.''' + '''Return unique list of placeholders in pattern.''' + format_specification = self._construct_format_specification( + self.expanded_pattern() + ) + if not self.key_resolver: + return OrderedSet(self._PLAIN_PLACEHOLDER_REGEX.findall(format_specification)) + else: + keys = list() + for key in OrderedSet(self._PLAIN_PLACEHOLDER_REGEX.findall(format_specification)): + if key in self.key_resolver: + keys.append(self.key_resolver.get(key)) + else: + keys.append(key) + return OrderedSet(keys) + + def optional_keys(self): format_specification = self._construct_format_specification( self.expanded_pattern() ) - return set(self._PLAIN_PLACEHOLDER_REGEX.findall(format_specification)) + optional_keys = list() + temp_keys = self._OPTIONAL_KEY_REGEX.findall(format_specification) + for key in temp_keys: + optional_keys.extend(self._PLAIN_PLACEHOLDER_REGEX.findall(key)) + if not self.key_resolver: + return OrderedSet(optional_keys) + else: + keys = list() + for key in set(optional_keys): + if key in self.key_resolver: + keys.append(self.key_resolver.get(key)) + else: + keys.append(key) + return OrderedSet(keys) + def references(self): '''Return unique set of referenced templates in pattern.''' @@ -210,51 +300,97 @@ def references(self): ) return set(self._TEMPLATE_REFERENCE_REGEX.findall(format_specification)) + def _remove_optional_keys(self, match, data): + pattern = match.group(0) + placeholders = list(set(self._PLAIN_PLACEHOLDER_REGEX.findall(pattern))) + for placeholder in placeholders: + if not placeholder in data: + return "" + return pattern[1:-1] + def _construct_format_specification(self, pattern): '''Return format specification from *pattern*.''' return self._STRIP_EXPRESSION_REGEX.sub('{\g<1>}', pattern) + def _construct_expressions(self, pattern): + optionalKeys = re.split(self._OPTIONAL_KEY_REGEX, pattern) + options = [''] + for opt in optionalKeys: + temp_options = [] + if opt == '': + continue + if opt.startswith('['): + temp_options = options[:] + opt = opt[1:-1] + for option in options: + temp_options.append(option + opt) + options = temp_options + if self.duplicate_placeholder_mode == self.STRICT: + temp = list() + for key in self.optional_keys(): + if isinstance(key,lucidity.Key): + key = key.name + occurences = 0 + for optKey in optionalKeys: + if optKey.__contains__(key): + occurences += 1 + if occurences > 1: + # we do have the same key twice or three times as an optional + # we only keep the options where we find the exact number of occurences + # all other variations will be dismissed + for option in options: + if option.__contains__(key): + if len(re.findall(key,option)) != occurences: + temp.append(option) + if temp: + options = list( set(options)-set(temp)) + return options + + def _construct_regular_expression(self, pattern): '''Return a regular expression to represent *pattern*.''' # Escape non-placeholder components. - expression = re.sub( - r'(?P{(.+?)(:(\\}|.)+?)?})|(?P.+?)', - self._escape, - pattern - ) - - # Replace placeholders with regex pattern. - expression = re.sub( - r'{(?P.+?)(:(?P(\\}|.)+?))?}', - functools.partial( - self._convert, placeholder_count=defaultdict(int) - ), - expression - ) - - if self._anchor is not None: - if bool(self._anchor & self.ANCHOR_START): - expression = '^{0}'.format(expression) - - if bool(self._anchor & self.ANCHOR_END): - expression = '{0}$'.format(expression) - - # Compile expression. - try: - compiled = re.compile(expression) - except re.error as error: - if any([ - 'bad group name' in str(error), - 'bad character in group name' in str(error) - ]): - raise ValueError('Placeholder name contains invalid ' - 'characters.') - else: - _, value, traceback = sys.exc_info() - message = 'Invalid pattern: {0}'.format(value) - raise ValueError, message, traceback #@IgnorePep8 - - return compiled + compiles = list() + + expressions = self._construct_expressions(pattern) + for expression in expressions: + expression = re.sub( + r'(?P{(.+?)(:(\\}|.)+?)?})|(?P.+?)', + self._escape, + expression + ) + + # Replace placeholders with regex pattern. + expression = re.sub( + r'{(?P.+?)(:(?P(\\}|.)+?))?}', + functools.partial( + self._convert, placeholder_count=defaultdict(int) + ), + expression + ) + + if self._anchor is not None: + if bool(self._anchor & self.ANCHOR_START): + expression = '^{0}'.format(expression) + + if bool(self._anchor & self.ANCHOR_END): + expression = '{0}$'.format(expression) + # Compile expression. + try: + compiled = re.compile(expression) + except re.error as error: + if any([ + 'bad group name' in str(error), + 'bad character in group name' in str(error) + ]): + raise ValueError('Placeholder name contains invalid ' + 'characters.') + else: + _, value, traceback = sys.exc_info() + message = 'Invalid pattern: {0}'.format(value) + raise ValueError, message, traceback #@IgnorePep8 + compiles.append(compiled) + return compiles def _convert(self, match, placeholder_count): '''Return a regular expression to represent *match*. @@ -286,6 +422,12 @@ def _convert(self, match, placeholder_count): ) expression = match.group('expression') + if self.key_resolver: + if placeholder_name[:-3] in self.key_resolver: + #check if there is a regex on the key object + key = self.key_resolver.get(placeholder_name[:-3]) + if key.regex: + expression = key.regex.pattern if expression is None: expression = self._default_placeholder_expression @@ -299,7 +441,7 @@ def _escape(self, match): groups = match.groupdict() if groups['other'] is not None: return re.escape(groups['other']) - + return groups['placeholder'] @@ -323,4 +465,4 @@ def __subclasshook__(cls, subclass): if cls is Resolver: return callable(getattr(subclass, 'get', None)) - return NotImplemented + return NotImplemented \ No newline at end of file diff --git a/test/unit/test_key.py b/test/unit/test_key.py new file mode 100644 index 0000000..4484952 --- /dev/null +++ b/test/unit/test_key.py @@ -0,0 +1,80 @@ +# :coding: utf-8 +# :copyright: Copyright (c) 2013 Martin Pengelly-Phillips +# :license: See LICENSE.txt. + +import os +import operator +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '..','..', 'source')) + +import pytest + +import lucidity + +TEST_TEMPLATE_PATH = os.path.join( + os.path.dirname(__file__), '..', 'fixture', 'template' +) +from lucidity import Template +from lucidity import key as TemplateKeys + +@pytest.fixture(scope='session') +def keys(): + '''Register templates.''' + + keys = [ + {'name': 'ver', + 'regex': r'([0-9]){3}', + 'type': int, + 'padding': '%03d' + } + , + {'name': 'asset', + 'regex': r'[a-zA-Z]*', + 'type': str, + } + , + {'name': 'frame', + 'regex': r'([0-9]+|%[0-9]+[di]|[#@?]+)', + 'type': int, + 'abstract': '%04d', + 'padding': '%04d' + } + ] + keyResolver = dict() + for key in keys: + keyResolver[key.get('name')]= TemplateKeys.Key(**key) + return keyResolver + + +@pytest.mark.parametrize(('name','type'), [ + ('version', int), + ('asset', str) +], ids=[ + 'int key', + 'string key' +]) +def test_key(name, type): + '''Construct Key Objects''' + TemplateKeys.Key(name=name,type=type) + + +@pytest.mark.parametrize(('keyName', 'input','expected'), [ + ('ver', None , 'ver'), + ('ver', 3 , '003'), + ('asset', 'test', 'test'), + ('frame', 1, '0001'), + ('frame', 50, '0050'), + ('frame', 15550, '15550') +], ids=[ + 'version key no value', + 'version padding', + 'string key test', + 'frame 0001', + 'frame 0050', + 'frame 15550' +]) +def test_padding(keyName, input, expected, keys): + key = keys[keyName] + key.setValue(input) + assert str(key) == expected diff --git a/test/unit/test_lucidity.py b/test/unit/test_lucidity.py index 2bb9ea6..836cffe 100644 --- a/test/unit/test_lucidity.py +++ b/test/unit/test_lucidity.py @@ -4,6 +4,8 @@ import os import operator +import sys +sys.path.append(os.path.join(os.path.dirname(__file__), '..','..', 'source')) import pytest diff --git a/test/unit/test_template.py b/test/unit/test_template.py index d7139e4..b856df5 100644 --- a/test/unit/test_template.py +++ b/test/unit/test_template.py @@ -3,7 +3,9 @@ # :license: See LICENSE.txt. import pytest - +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), '..','..', 'source')) from lucidity import Template, Resolver from lucidity.error import ParseError, FormatError, ResolveError @@ -70,7 +72,7 @@ def test_valid_pattern(pattern): def test_invalid_pattern(pattern): '''Construct template with invalid pattern.''' with pytest.raises(ValueError): - Template('test', pattern) + Template('test', pattern, validateOnInit = True) @pytest.mark.parametrize(('pattern', 'path', 'expected'), [ @@ -97,7 +99,7 @@ def test_invalid_pattern(pattern): ]) def test_matching_parse(pattern, path, expected, template_resolver): '''Extract data from matching path.''' - template = Template('test', pattern, template_resolver=template_resolver) + template = Template('test', pattern, template_resolver=template_resolver, duplicate_placeholder_mode=1) data = template.parse(path) assert data == expected @@ -301,14 +303,14 @@ def test_keys_mutable_side_effect(): '''Avoid side effects mutating internal keys set.''' template = Template('test', '/single/{variable}') placeholders = template.keys() - assert placeholders == set(['variable']) + assert placeholders == ['variable'] # Mutate returned set. - placeholders.add('other') + placeholders.append('other') # Newly returned set should be unaffected. placeholders_b = template.keys() - assert placeholders_b == set(['variable']) + assert placeholders_b == ['variable'] @pytest.mark.parametrize(('pattern', 'expected'), [