From fb856db312aad98b4db634238b32b2e2006c1f6c Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Mon, 3 Jul 2017 17:23:35 +0200 Subject: [PATCH 01/19] conform print calls to python3 --- namelist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/namelist.py b/namelist.py index c5a0e4d..4143220 100644 --- a/namelist.py +++ b/namelist.py @@ -452,8 +452,8 @@ def test_inline_array(self): namelist = Namelist(input_str) - print input_str - print namelist.dump() + print(input_str) + print(namelist.dump()) self.assertEqual(namelist.dump(), input_str) From b0313ac377d4db8502e20d5dbb74d02c354a0c19 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Mon, 3 Jul 2017 17:22:12 +0200 Subject: [PATCH 02/19] rewrite parser to be able to parse more complicated namelist files. Valid logical values in other formats than .false. and F are recognised now. The parser relies in parts on the ast.literal_eval() function. The result namelist.groups is not case-insensitive, i.e. the keys 'mode' and 'MODE' refer to the same item. Treatment of comments is much improved. --- namelist.py | 259 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 160 insertions(+), 99 deletions(-) diff --git a/namelist.py b/namelist.py index 4143220..6652a9f 100644 --- a/namelist.py +++ b/namelist.py @@ -1,3 +1,4 @@ + import unittest try: from collections import OrderedDict @@ -40,6 +41,47 @@ def __setattr__(self, attr, value): def __dir__(self): return self.data.keys() +class CaseInsensitiveDict(OrderedDict): + """ + This is an ordered dictionary which ignores the letter case of the + keys given (if the respective key is a string). + + Many thanks to user m000 who answered + https://stackoverflow.com/questions/2082152/case-insensitive-dictionary + so helpfully. + """ + + @classmethod + def _k(cls, key): + return key.lower() if isinstance(key, str) else key + + def __init__(self, *args, **kwargs): + super(CaseInsensitiveDict, self).__init__(*args, **kwargs) + self._convert_keys() + def __getitem__(self, key): + return super(CaseInsensitiveDict, self).__getitem__(self.__class__._k(key)) + def __setitem__(self, key, value): + super(CaseInsensitiveDict, self).__setitem__(self.__class__._k(key), value) + def __delitem__(self, key): + return super(CaseInsensitiveDict, self).__delitem__(self.__class__._k(key)) + def __contains__(self, key): + return super(CaseInsensitiveDict, self).__contains__(self.__class__._k(key)) + def has_key(self, key): + return super(CaseInsensitiveDict, self).has_key(self.__class__._k(key)) + def pop(self, key, *args, **kwargs): + return super(CaseInsensitiveDict, self).pop(self.__class__._k(key), *args, **kwargs) + def get(self, key, *args, **kwargs): + return super(CaseInsensitiveDict, self).get(self.__class__._k(key), *args, **kwargs) + def setdefault(self, key, *args, **kwargs): + return super(CaseInsensitiveDict, self).setdefault(self.__class__._k(key), *args, **kwargs) + def update(self, E={}, **F): + super(CaseInsensitiveDict, self).update(self.__class__(E)) + super(CaseInsensitiveDict, self).update(self.__class__(**F)) + def _convert_keys(self): + for k in list(self.keys()): + v = super(CaseInsensitiveDict, self).pop(k) + self.__setitem__(k, v) + class Namelist(): """ Parses namelist files in Fortran 90 format, recognised groups are @@ -47,118 +89,137 @@ class Namelist(): """ def __init__(self, input_str): - self.groups = OrderedDict() - - group_re = re.compile(r'&([^&/]+)/', re.DOTALL) # allow blocks to span multiple lines - array_re = re.compile(r'(\w+)\((\d+)\)') + self.groups = CaseInsensitiveDict() + + namelist_start_line_re = re.compile(r'^\s*&(\w+)\s*$') + namelist_end_line_re = re.compile(r'^\s*/\s*$') + + # a pattern matching an array of stuff + a_number = r'[0-9\.\+\-eE]+' + #array_re = re.compile(r'(?:\s*([^,](?:\'[^\']*\')(?:\(\s*'+a_number+'\s*,\s*'+a_number+'\s*\))*)\s,)*') + + # a comma-separated list, of elements which either do not + # contain a comma, or may contain commas inside strings, or + # may contain commas inside paretheses. + # At the end of the line, the comma is optional. + # FIXME deal with abbrev. lists! + # FIXME strings containing parentheses will cause problems with this expression + array_re = re.compile(r"([\w.+-]+|\s*\'[^\']*\'\s*|\s[(][^),]+,[^),]+[)]\s*)(?:,|,?\s*$)") string_re = re.compile(r"\'\s*\w[^']*\'") self._complex_re = re.compile(r'^\((\d+.?\d*),(\d+.?\d*)\)$') - # remove all comments, since they may have forward-slashes - # TODO: store position of comments so that they can be re-inserted when - # we eventually save - filtered_lines = [] - for line in input_str.split('\n'): - if line.strip().startswith('!'): - continue - else: - filtered_lines.append(line) - - group_blocks = re.findall(group_re, "\n".join(filtered_lines)) - - for group_block in group_blocks: - block_lines = group_block.split('\n') - group_name = block_lines.pop(0).strip() - - group = {} - - for line in block_lines: - line = line.strip() - if line == "": - continue - if line.startswith('!'): - continue - - # commas at the end of lines seem to be optional - if line.endswith(','): - line = line[:-1] + # a pattern to match the non-comment part of a line. This + # should be able to deal with ! signs inside strings. + comment_re = re.compile(r"((?:[^\'!]*(?:\'[^\']*\'))*)!.*") - k, v = line.split('=') - variable_name = k.strip() - variable_value = v.strip() + # match notation for Fortran logicals in namelist files: + self.logical_true_re = re.compile(r"[^tTfF\']*[tT].*") + self.logical_false_re = re.compile(r"[^tTfF\']*[fF].*") - variable_name_groups = re.findall(array_re, k) + # match abbreviated lists of identical items, like + # 509*-1.0000000000000000 + # 253*0 + # NEO_EQUIL_PARSE_SP_SEQ= 1, 2, 2*3 , 28*-1 + # 60*" " + self.abbrev_list_re = re.compile(r"([0-9]+)\*(.+)") - variable_index = None - if len(variable_name_groups) == 1: - variable_name, variable_index = variable_name_groups[0] - variable_index = int(variable_index)-1 # python indexing starts at 0 - - try: - parsed_value = self._parse_value(variable_value) - - if variable_index is None: - group[variable_name] = parsed_value - else: - if not variable_name in group: - group[variable_name] = {'_is_list': True} - group[variable_name][variable_index] = parsed_value - - except NoSingleValueFoundException as e: - # see we have several values inlined - if variable_value.count("'") in [0, 2]: - variable_arr_entries = variable_value.split() - else: - # we need to be more careful with lines with escaped - # strings, since they might contained spaces - matches = re.findall(string_re, variable_value) - variable_arr_entries = [s.strip() for s in matches] + # commas at the end of lines seem to be optional + keyval_line_re = re.compile(r"\s*(\w+)\s*=\s*(.+),?") + group = CaseInsensitiveDict() + current_group = None + for line in input_str.split('\n'): + # remove comments + line_without_comment = comment_re.sub(r"\1",line) + # remove whitespaces + line_without_comment = line_without_comment.strip() + if len(line_without_comment) == 0: + continue - for variable_index, inline_value in enumerate(variable_arr_entries): - parsed_value = self._parse_value(inline_value) + + m = namelist_start_line_re.match(line_without_comment) + if(m): + if(current_group is None): + current_group = m.group(1) + group = CaseInsensitiveDict() + continue + else: + raise SyntaxError('Namelist %s starts, but namelist %s is not yet complete.' % (m.group(1),current_group)) - if variable_index is None: - group[variable_name] = parsed_value + m = namelist_end_line_re.match(line_without_comment) + if(m): + if(current_group is not None): + current_group = None + continue + else: + raise SyntaxError('End of namelist encountered, but there is no corresponding open namelist.') + + # other lines: key = value, or a continuation line + m = keyval_line_re.match(line_without_comment) + if(m): + if(current_group is not None): + variable_name = m.group(1) + variable_value = m.group(2) + + print(variable_name) + print(variable_value) + # parse the array with self-crafted regex + parsed_list = array_re.findall(variable_value) + print(parsed_list) + parsed_list = [self._parse_value(elem) for elem in parsed_list] + + # if it wasnt for special notations like .false. or 60*'' , one could + # simply use a parser from the python standard library for the right + # hand side as a whole: + #parsed_value = ast.literal_eval(variable_value) + try: + if(len(parsed_list) == 1): + group[variable_name] = parsed_list[0] else: - if not variable_name in group: - group[variable_name] = {'_is_list': True} - group[variable_name][variable_index] = parsed_value + group[variable_name] = parsed_list + except TypeError: + group[variable_name] = parsed_list + + else: + raise SyntaxError('Key %s encountered, but there is no enclosing namelist' % variable_name) - self.groups[group_name] = group + self.groups[current_group] = group - self._check_lists() + #self._check_lists() - def _parse_value(self, variable_value): + def _parse_value(self, variable_value_str): """ - Tries to parse a single value, raises an exception if no single value is matched + Tries to parse a single value, raises a SyntaxError if not successful. """ + import ast try: - parsed_value = int(variable_value) - except ValueError: - try: - parsed_value = float(variable_value) - except ValueError: - # check for complex number - complex_values = re.findall(self._complex_re, variable_value) - if len(complex_values) == 1: - a, b = complex_values[0] - parsed_value = complex(float(a),float(b)) - elif variable_value in ['.true.', 'T']: - # check for a boolean - parsed_value = True - elif variable_value in ['.false.', 'F']: - parsed_value = False - else: - # see if we have an escaped string - if variable_value.startswith("'") and variable_value.endswith("'") and variable_value.count("'") == 2: - parsed_value = variable_value[1:-1] - elif variable_value.startswith('"') and variable_value.endswith('"') and variable_value.count('"') == 2: - parsed_value = variable_value[1:-1] - else: - raise NoSingleValueFoundException(variable_value) + parsed_value = ast.literal_eval(variable_value_str.strip()) + except (ValueError, SyntaxError): + print(variable_value_str.strip()) + + abbrev_list_match = self.abbrev_list_re.match(variable_value_str) + if(abbrev_list_match): + parsed_value = int(abbrev_list_match.group(1)) * [self._parse_value(abbrev_list_match.group(2))] + elif(self.logical_true_re.match(variable_value_str)): + parsed_value = True + elif(self.logical_false_re.match(variable_value_str)): + parsed_value = False + else: + raise SyntaxError('Right hand side expression could not be parsed. The string is: %s' % (variable_value_str)) - return parsed_value + #FIXME distinguish complex scalar and a list of 2 reals + #FIXME rstrip strings, because this is what fortran does + try: + if(len(parsed_value) == 1): + # one gets a list of length 1 if the line ends with a + # comma, because (4,) for python is a tuple with one + # element, and (4) is just the scalar 4. + return parsed_value[0] + else: + return parsed_value + except TypeError: + return parsed_value + def _check_lists(self): for group in self.groups.values(): @@ -412,7 +473,7 @@ def test_inline_array(self): class ParsingTests(unittest.TestCase): def test_single_value(self): input_str = """&CCFMSIM_SETUP -CCFMrad=800. +CCFMrad=800.000000 /""" namelist = Namelist(input_str) @@ -420,10 +481,10 @@ def test_single_value(self): def test_multigroup(self): input_str = """&CCFMSIM_SETUP -CCFMrad=800. +CCFMrad=800.000000 / &GROUP2 -R=500. +R=500.000000 /""" namelist = Namelist(input_str) @@ -447,7 +508,7 @@ def test_array(self): def test_inline_array(self): input_str = """&AADATA -AACOMPLEX= (3.,4.) (3.,4.) (5.,6.) (7.,7.) +AACOMPLEX= (3.000000,4.000000) (3.000000,4.000000) (5.000000,6.000000) (7.000000,7.000000) /""" namelist = Namelist(input_str) From 44f1923bfe5136dae1a76cefb0f3869cfdea4940 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Tue, 4 Jul 2017 14:25:02 +0200 Subject: [PATCH 03/19] update README --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 73cfadc..3aca295 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,15 @@ -# Fortran namelist files in Python +# A Python module to parse Fortran namelist files Read in a namelist file: ``` from namelist_python import read_namelist_file namelist = read_namelist_file('SIM_CONFIG.nl') +namelist.groups['foo']['bar'] ``` -`namelist` is an instance of `namelist_python.Namelist` and all groups are -stored in the attribute `groups` with each variable in a nested dictionary -structure (using `OrderedDict` so that the order will be remembered). +This creates an instance of `namelist_python.Namelist` whose attribute +`groups` holds the data in a nested ordered case-insensitive dictionary +structure. Write a `Namelist` object back to a file: ``` From 282733f80edfd249a21b06e67124cd292b7e6e65 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 10:34:05 +0200 Subject: [PATCH 04/19] handle multiple groups with same name An optional argument to the constructor of the Namelist object allows to use an alternative format. Example: two 'species' namelists lead to keys 'species01' and 'species02'. --- namelist.py | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/namelist.py b/namelist.py index 6652a9f..8882398 100644 --- a/namelist.py +++ b/namelist.py @@ -84,12 +84,24 @@ def _convert_keys(self): class Namelist(): """ - Parses namelist files in Fortran 90 format, recognised groups are - available through 'groups' attribute. + Parses namelist files in Fortran 90 format. + + Note that while Fortran speaks of "namelists", this module uses + the term "group" to refer to individual namelists within a file.. + + After parsing, recognised groups are accessible through + the 'groups' attribute. + """ - def __init__(self, input_str): + def __init__(self, input_str, dup_group_format="%s%02d"): + """ + + The optional argument dup_group_format is the format to be used if multiple namelists have the same name. + + """ self.groups = CaseInsensitiveDict() + self.dup_group_format = dup_group_format namelist_start_line_re = re.compile(r'^\s*&(\w+)\s*$') namelist_end_line_re = re.compile(r'^\s*/\s*$') @@ -127,6 +139,7 @@ def __init__(self, input_str): keyval_line_re = re.compile(r"\s*(\w+)\s*=\s*(.+),?") group = CaseInsensitiveDict() + list_of_groups = [] current_group = None for line in input_str.split('\n'): # remove comments @@ -136,11 +149,26 @@ def __init__(self, input_str): if len(line_without_comment) == 0: continue - m = namelist_start_line_re.match(line_without_comment) if(m): if(current_group is None): - current_group = m.group(1) + if(m.group(1) in list_of_groups): + if(m.group(1) in self.groups): + n = list_of_groups.count(m.group(1)) + groupname_with_counter = self.dup_group_format % (m.group(1), n) + if(groupname_with_counter not in self.groups): + self.groups[groupname_with_counter] = self.groups[m.group(1)] + self.groups.pop(m.group(1)) + else: + raise ValueError("Could not give namelist %s a counter, since namelist %s exists already." % (m.group(1), groupname_with_counter)) + + # increment the counter for the new one + n = list_of_groups.count(m.group(1)) + 1 + groupname_with_counter = self.dup_group_format % (m.group(1), n) + current_group = groupname_with_counter + else: + current_group = m.group(1) + list_of_groups.append(m.group(1)) group = CaseInsensitiveDict() continue else: @@ -161,11 +189,8 @@ def __init__(self, input_str): variable_name = m.group(1) variable_value = m.group(2) - print(variable_name) - print(variable_value) # parse the array with self-crafted regex parsed_list = array_re.findall(variable_value) - print(parsed_list) parsed_list = [self._parse_value(elem) for elem in parsed_list] # if it wasnt for special notations like .false. or 60*'' , one could @@ -195,7 +220,6 @@ def _parse_value(self, variable_value_str): try: parsed_value = ast.literal_eval(variable_value_str.strip()) except (ValueError, SyntaxError): - print(variable_value_str.strip()) abbrev_list_match = self.abbrev_list_re.match(variable_value_str) if(abbrev_list_match): From 444e1adf9ac6ca511dfb7d3ae08d08b3d8344857 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 12:00:10 +0200 Subject: [PATCH 05/19] various improvements improve parsing: strings can be enclosed in double quotes, and the array_re pattern missed whitespaces in front of commas sometimes. Blanks at the right hand side are stripped from strings. Dumps will produce a scientific notation for floats by default. This can be changed with an optional argument to dump() . Properly check for string type. And document the attribute notation via the 'data' member. --- namelist.py | 96 ++++++++++++++++++++++++++--------------------------- 1 file changed, 47 insertions(+), 49 deletions(-) diff --git a/namelist.py b/namelist.py index 8882398..0db63c1 100644 --- a/namelist.py +++ b/namelist.py @@ -7,16 +7,16 @@ import re -class NoSingleValueFoundException(Exception): - pass - def read_namelist_file(filename): return Namelist(open(filename, 'r').read()) +# trick for py2/3 compatibility +if 'basestring' not in globals(): + basestring = str class AttributeMapper(): """ - Simple mapper to access dictionary items as attributes + Simple mapper to access dictionary items as attributes. """ def __init__(self, obj): @@ -53,7 +53,7 @@ class CaseInsensitiveDict(OrderedDict): @classmethod def _k(cls, key): - return key.lower() if isinstance(key, str) else key + return key.lower() if isinstance(key, basestring) else key def __init__(self, *args, **kwargs): super(CaseInsensitiveDict, self).__init__(*args, **kwargs) @@ -89,8 +89,18 @@ class Namelist(): Note that while Fortran speaks of "namelists", this module uses the term "group" to refer to individual namelists within a file.. - After parsing, recognised groups are accessible through - the 'groups' attribute. + After parsing, + + nlist = namelist_python.read_namelist_file('input.dat') + + recognised groups are accessible through + the 'groups' attribute (which is a case-insensitive ordered dictionary) + + nlist.groups['mode']['chin'] + + or the data attribute + + nlist.data.mode.chin """ @@ -108,15 +118,14 @@ def __init__(self, input_str, dup_group_format="%s%02d"): # a pattern matching an array of stuff a_number = r'[0-9\.\+\-eE]+' - #array_re = re.compile(r'(?:\s*([^,](?:\'[^\']*\')(?:\(\s*'+a_number+'\s*,\s*'+a_number+'\s*\))*)\s,)*') # a comma-separated list, of elements which either do not # contain a comma, or may contain commas inside strings, or # may contain commas inside paretheses. # At the end of the line, the comma is optional. - # FIXME deal with abbrev. lists! + # FIXME deal with abbrev. lists. # FIXME strings containing parentheses will cause problems with this expression - array_re = re.compile(r"([\w.+-]+|\s*\'[^\']*\'\s*|\s[(][^),]+,[^),]+[)]\s*)(?:,|,?\s*$)") + array_re = re.compile(r"(\s*(?:[\w.+-]+|\'[^\']*\'|\"[^\']*\"|[(][^),]+,[^),]+[)])\s*)\s*(?:,|,?\s*$)") string_re = re.compile(r"\'\s*\w[^']*\'") self._complex_re = re.compile(r'^\((\d+.?\d*),(\d+.?\d*)\)$') @@ -138,14 +147,12 @@ def __init__(self, input_str, dup_group_format="%s%02d"): # commas at the end of lines seem to be optional keyval_line_re = re.compile(r"\s*(\w+)\s*=\s*(.+),?") - group = CaseInsensitiveDict() list_of_groups = [] current_group = None for line in input_str.split('\n'): - # remove comments - line_without_comment = comment_re.sub(r"\1",line) - # remove whitespaces - line_without_comment = line_without_comment.strip() + # remove comments and whitespaces + line_without_comment = comment_re.sub(r"\1",line).strip() + if len(line_without_comment) == 0: continue @@ -169,7 +176,7 @@ def __init__(self, input_str, dup_group_format="%s%02d"): else: current_group = m.group(1) list_of_groups.append(m.group(1)) - group = CaseInsensitiveDict() + self.groups[current_group] = CaseInsensitiveDict() continue else: raise SyntaxError('Namelist %s starts, but namelist %s is not yet complete.' % (m.group(1),current_group)) @@ -199,18 +206,16 @@ def __init__(self, input_str, dup_group_format="%s%02d"): #parsed_value = ast.literal_eval(variable_value) try: if(len(parsed_list) == 1): - group[variable_name] = parsed_list[0] + self.groups[current_group][variable_name] = parsed_list[0] else: - group[variable_name] = parsed_list + self.groups[current_group][variable_name] = parsed_list except TypeError: - group[variable_name] = parsed_list + self.groups[current_group][variable_name] = parsed_list else: raise SyntaxError('Key %s encountered, but there is no enclosing namelist' % variable_name) - - self.groups[current_group] = group - - #self._check_lists() + else: + raise SyntaxError('this line could not be parsed, please notify the author or contribute a patch: %s' % line_without_comment) def _parse_value(self, variable_value_str): """ @@ -219,6 +224,15 @@ def _parse_value(self, variable_value_str): import ast try: parsed_value = ast.literal_eval(variable_value_str.strip()) + try: + if(isinstance(parsed_value, basestring)): + # Fortran strings end with blanks + parsed_value = parsed_value.rstrip() + else: + parsed_value = [elem.rstrip() for elem in parsed_value] + except Exception as err: + # value is probably just not iterable + pass except (ValueError, SyntaxError): abbrev_list_match = self.abbrev_list_re.match(variable_value_str) @@ -245,56 +259,40 @@ def _parse_value(self, variable_value_str): return parsed_value - def _check_lists(self): - for group in self.groups.values(): - for variable_name, variable_values in group.items(): - if isinstance(variable_values, dict): - if '_is_list' in variable_values and variable_values['_is_list']: - variable_data = variable_values - del(variable_data['_is_list']) - - num_entries = len(variable_data.keys()) - variable_list = [None]*num_entries - - for i, value in variable_data.items(): - if i >= num_entries: - raise Exception("The variable '%s' has an array index assignment that is inconsistent with the number of list values" % variable) - else: - variable_list[i] = value - - group[variable_name] = variable_list - - def dump(self, array_inline=True): + def dump(self, array_inline=True, float_format="%13.5e"): lines = [] for group_name, group_variables in self.groups.items(): lines.append("&%s" % group_name) for variable_name, variable_value in group_variables.items(): if isinstance(variable_value, list): if array_inline: - lines.append("%s= %s" % (variable_name, " ".join([self._format_value(v) for v in variable_value]))) + lines.append("%s= %s" % (variable_name, " ".join([self._format_value(v, float_format) for v in variable_value]))) else: for n, v in enumerate(variable_value): - lines.append("%s(%d)=%s" % (variable_name, n+1, self._format_value(v))) + lines.append("%s(%d)=%s" % (variable_name, n+1, self._format_value(v, float_format))) else: - lines.append("%s=%s" % (variable_name, self._format_value(variable_value))) + lines.append("%s=%s" % (variable_name, self._format_value(variable_value, float_format))) lines.append("/") return "\n".join(lines) - def _format_value(self, value): + def _format_value(self, value, float_format): if isinstance(value, bool): return value and '.true.' or '.false.' elif isinstance(value, int): return "%d" % value elif isinstance(value, float): - return "%f" % value - elif isinstance(value, str): + return float_format % value + elif isinstance(value, basestring): return "'%s'" % value elif isinstance(value, complex): return "(%s,%s)" % (self._format_value(value.real), self._format_value(value.imag)) else: raise Exception("Variable type not understood: %s" % type(value)) + # create a read-only propery by using property() as a + # decorator. This function is then the getter function for the + # .data attribute: @property def data(self): return AttributeMapper(self.groups) From 5717604f164e66f1f3712661a23e79580a257db2 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 12:10:17 +0200 Subject: [PATCH 06/19] fix parsing of abbreviated list notation --- namelist.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/namelist.py b/namelist.py index 0db63c1..027e5db 100644 --- a/namelist.py +++ b/namelist.py @@ -125,7 +125,7 @@ def __init__(self, input_str, dup_group_format="%s%02d"): # At the end of the line, the comma is optional. # FIXME deal with abbrev. lists. # FIXME strings containing parentheses will cause problems with this expression - array_re = re.compile(r"(\s*(?:[\w.+-]+|\'[^\']*\'|\"[^\']*\"|[(][^),]+,[^),]+[)])\s*)\s*(?:,|,?\s*$)") + array_re = re.compile(r"(\s*(?:[0-9]+\*)?(?:[\w.+-]+|\'[^\']*\'|\"[^\']*\"|[(][^),]+,[^),]+[)])\s*)\s*(?:,|,?\s*$)") string_re = re.compile(r"\'\s*\w[^']*\'") self._complex_re = re.compile(r'^\((\d+.?\d*),(\d+.?\d*)\)$') @@ -142,7 +142,7 @@ def __init__(self, input_str, dup_group_format="%s%02d"): # 253*0 # NEO_EQUIL_PARSE_SP_SEQ= 1, 2, 2*3 , 28*-1 # 60*" " - self.abbrev_list_re = re.compile(r"([0-9]+)\*(.+)") + self.abbrev_list_re = re.compile(r"\s*([0-9]+)\*(.+)\s*") # commas at the end of lines seem to be optional keyval_line_re = re.compile(r"\s*(\w+)\s*=\s*(.+),?") @@ -246,7 +246,6 @@ def _parse_value(self, variable_value_str): raise SyntaxError('Right hand side expression could not be parsed. The string is: %s' % (variable_value_str)) #FIXME distinguish complex scalar and a list of 2 reals - #FIXME rstrip strings, because this is what fortran does try: if(len(parsed_value) == 1): # one gets a list of length 1 if the line ends with a From 5b4e5f5f0d9db35e4a32ea79095b4499ff1ac2ef Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 12:53:19 +0200 Subject: [PATCH 07/19] parse complex values --- namelist.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/namelist.py b/namelist.py index 027e5db..ae8e432 100644 --- a/namelist.py +++ b/namelist.py @@ -127,7 +127,7 @@ def __init__(self, input_str, dup_group_format="%s%02d"): # FIXME strings containing parentheses will cause problems with this expression array_re = re.compile(r"(\s*(?:[0-9]+\*)?(?:[\w.+-]+|\'[^\']*\'|\"[^\']*\"|[(][^),]+,[^),]+[)])\s*)\s*(?:,|,?\s*$)") string_re = re.compile(r"\'\s*\w[^']*\'") - self._complex_re = re.compile(r'^\((\d+.?\d*),(\d+.?\d*)\)$') + self._complex_re = re.compile(r'\s*\([^,]+,[^,]+\)\s*') # a pattern to match the non-comment part of a line. This # should be able to deal with ! signs inside strings. @@ -224,6 +224,16 @@ def _parse_value(self, variable_value_str): import ast try: parsed_value = ast.literal_eval(variable_value_str.strip()) + + # use a regex to check if value is a complex number: (1.2 , 3.4) + # this is needed, because literal_eval parses both "(1.2 , 3.4)" + # and "1.2 , 3.4" into a tupel with two elements and then one + # cannot distinguish between a list of two numbers and a single + # complex number. This makes a difference when it comes to + # dumping, though. + if(self._complex_re.match(variable_value_str)): + parsed_value = complex(parsed_value[0], parsed_value[1]) + try: if(isinstance(parsed_value, basestring)): # Fortran strings end with blanks @@ -285,9 +295,11 @@ def _format_value(self, value, float_format): elif isinstance(value, basestring): return "'%s'" % value elif isinstance(value, complex): - return "(%s,%s)" % (self._format_value(value.real), self._format_value(value.imag)) + complex_format = "("+float_format+","+float_format+")" + return complex_format % (value.real,value.imag) else: - raise Exception("Variable type not understood: %s" % type(value)) + print(value) + raise Exception("Variable type not understood: type %s" % type(value)) # create a read-only propery by using property() as a # decorator. This function is then the getter function for the From 4904d93a3fd8a782dcbf5ae1dfc43b6b9adadf91 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 12:54:56 +0200 Subject: [PATCH 08/19] fix, lists containing abbreviated parts came out nested but should be flat --- namelist.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/namelist.py b/namelist.py index ae8e432..335e297 100644 --- a/namelist.py +++ b/namelist.py @@ -199,6 +199,7 @@ def __init__(self, input_str, dup_group_format="%s%02d"): # parse the array with self-crafted regex parsed_list = array_re.findall(variable_value) parsed_list = [self._parse_value(elem) for elem in parsed_list] + parsed_list = self._flatten(parsed_list) # if it wasnt for special notations like .false. or 60*'' , one could # simply use a parser from the python standard library for the right @@ -241,7 +242,7 @@ def _parse_value(self, variable_value_str): else: parsed_value = [elem.rstrip() for elem in parsed_value] except Exception as err: - # value is probably just not iterable + # value is probably just not iterable, or is an iterable of numbers pass except (ValueError, SyntaxError): @@ -285,6 +286,15 @@ def dump(self, array_inline=True, float_format="%13.5e"): return "\n".join(lines) + def _flatten(self, x): + result = [] + for elem in x: + if hasattr(elem, "__iter__") and not isinstance(elem, basestring): + result.extend(self._flatten(elem)) + else: + result.append(elem) + return result + def _format_value(self, value, float_format): if isinstance(value, bool): return value and '.true.' or '.false.' From 852ffafca25096dca7bea94cb11806a2f8c253d7 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 12:56:04 +0200 Subject: [PATCH 09/19] minors --- namelist.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/namelist.py b/namelist.py index 335e297..e7f1018 100644 --- a/namelist.py +++ b/namelist.py @@ -114,6 +114,8 @@ def __init__(self, input_str, dup_group_format="%s%02d"): self.dup_group_format = dup_group_format namelist_start_line_re = re.compile(r'^\s*&(\w+)\s*$') + # FIXME the end of the namelist does not necessarily have to + # be in a separate line namelist_end_line_re = re.compile(r'^\s*/\s*$') # a pattern matching an array of stuff @@ -123,10 +125,8 @@ def __init__(self, input_str, dup_group_format="%s%02d"): # contain a comma, or may contain commas inside strings, or # may contain commas inside paretheses. # At the end of the line, the comma is optional. - # FIXME deal with abbrev. lists. # FIXME strings containing parentheses will cause problems with this expression array_re = re.compile(r"(\s*(?:[0-9]+\*)?(?:[\w.+-]+|\'[^\']*\'|\"[^\']*\"|[(][^),]+,[^),]+[)])\s*)\s*(?:,|,?\s*$)") - string_re = re.compile(r"\'\s*\w[^']*\'") self._complex_re = re.compile(r'\s*\([^,]+,[^,]+\)\s*') # a pattern to match the non-comment part of a line. This @@ -274,15 +274,16 @@ def dump(self, array_inline=True, float_format="%13.5e"): for group_name, group_variables in self.groups.items(): lines.append("&%s" % group_name) for variable_name, variable_value in group_variables.items(): - if isinstance(variable_value, list): + if(isinstance(variable_value, list)): if array_inline: - lines.append("%s= %s" % (variable_name, " ".join([self._format_value(v, float_format) for v in variable_value]))) + lines.append("%s= %s" % (variable_name, ", ".join([self._format_value(elem, float_format) for elem in variable_value]))) else: for n, v in enumerate(variable_value): - lines.append("%s(%d)=%s" % (variable_name, n+1, self._format_value(v, float_format))) + lines.append("%s(%d)= %s" % (variable_name, n+1, self._format_value(v, float_format))) else: lines.append("%s=%s" % (variable_name, self._format_value(variable_value, float_format))) lines.append("/") + lines.append("") return "\n".join(lines) From b0a1d1b2d1fb61f9a8586ef951ac12d1f21cc8be Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 12:55:49 +0200 Subject: [PATCH 10/19] handle duplicated namelists in a list Example: two 'SPECIES' namelists will be parsed into groups['species'][0] and groups['species'][1] They are also dumped correctly now. --- namelist.py | 64 +++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/namelist.py b/namelist.py index e7f1018..c038dd0 100644 --- a/namelist.py +++ b/namelist.py @@ -147,7 +147,6 @@ def __init__(self, input_str, dup_group_format="%s%02d"): # commas at the end of lines seem to be optional keyval_line_re = re.compile(r"\s*(\w+)\s*=\s*(.+),?") - list_of_groups = [] current_group = None for line in input_str.split('\n'): # remove comments and whitespaces @@ -158,28 +157,22 @@ def __init__(self, input_str, dup_group_format="%s%02d"): m = namelist_start_line_re.match(line_without_comment) if(m): + found_group = m.group(1).lower() if(current_group is None): - if(m.group(1) in list_of_groups): - if(m.group(1) in self.groups): - n = list_of_groups.count(m.group(1)) - groupname_with_counter = self.dup_group_format % (m.group(1), n) - if(groupname_with_counter not in self.groups): - self.groups[groupname_with_counter] = self.groups[m.group(1)] - self.groups.pop(m.group(1)) - else: - raise ValueError("Could not give namelist %s a counter, since namelist %s exists already." % (m.group(1), groupname_with_counter)) - - # increment the counter for the new one - n = list_of_groups.count(m.group(1)) + 1 - groupname_with_counter = self.dup_group_format % (m.group(1), n) - current_group = groupname_with_counter + if(found_group in self.groups): + if(not isinstance(self.groups[found_group],list)): + self.groups[found_group] = [self.groups[found_group]] + + self.groups[found_group].append(CaseInsensitiveDict()) + current_group = self.groups[found_group][-1] + else: - current_group = m.group(1) - list_of_groups.append(m.group(1)) - self.groups[current_group] = CaseInsensitiveDict() + self.groups[found_group] = CaseInsensitiveDict() + current_group = self.groups[found_group] + continue else: - raise SyntaxError('Namelist %s starts, but namelist %s is not yet complete.' % (m.group(1),current_group)) + raise SyntaxError('Namelist %s starts, but namelist %s is not yet complete.' % (found_group,current_group)) m = namelist_end_line_re.match(line_without_comment) if(m): @@ -207,11 +200,11 @@ def __init__(self, input_str, dup_group_format="%s%02d"): #parsed_value = ast.literal_eval(variable_value) try: if(len(parsed_list) == 1): - self.groups[current_group][variable_name] = parsed_list[0] + current_group[variable_name] = parsed_list[0] else: - self.groups[current_group][variable_name] = parsed_list + current_group[variable_name] = parsed_list except TypeError: - self.groups[current_group][variable_name] = parsed_list + current_group[variable_name] = parsed_list else: raise SyntaxError('Key %s encountered, but there is no enclosing namelist' % variable_name) @@ -271,19 +264,22 @@ def _parse_value(self, variable_value_str): def dump(self, array_inline=True, float_format="%13.5e"): lines = [] - for group_name, group_variables in self.groups.items(): - lines.append("&%s" % group_name) - for variable_name, variable_value in group_variables.items(): - if(isinstance(variable_value, list)): - if array_inline: - lines.append("%s= %s" % (variable_name, ", ".join([self._format_value(elem, float_format) for elem in variable_value]))) + for group_name, group_content in self.groups.items(): + + group_list = isinstance(group_content,list) and group_content or [group_content] + for group in group_list: + lines.append("&%s" % group_name.upper()) + for variable_name, variable_value in group.items(): + if(isinstance(variable_value, list)): + if array_inline: + lines.append("%s= %s" % (variable_name, ", ".join([self._format_value(elem, float_format) for elem in variable_value]))) + else: + for n, v in enumerate(variable_value): + lines.append("%s(%d)= %s" % (variable_name, n+1, self._format_value(v, float_format))) else: - for n, v in enumerate(variable_value): - lines.append("%s(%d)= %s" % (variable_name, n+1, self._format_value(v, float_format))) - else: - lines.append("%s=%s" % (variable_name, self._format_value(variable_value, float_format))) - lines.append("/") - lines.append("") + lines.append("%s=%s" % (variable_name, self._format_value(variable_value, float_format))) + lines.append("/") + lines.append("") return "\n".join(lines) From 88b407c516e854dc6f9d05a125ecf31c1c9a2400 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 14:57:24 +0200 Subject: [PATCH 11/19] minor --- namelist.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/namelist.py b/namelist.py index c038dd0..03b05cb 100644 --- a/namelist.py +++ b/namelist.py @@ -104,14 +104,8 @@ class Namelist(): """ - def __init__(self, input_str, dup_group_format="%s%02d"): - """ - - The optional argument dup_group_format is the format to be used if multiple namelists have the same name. - - """ + def __init__(self, input_str): self.groups = CaseInsensitiveDict() - self.dup_group_format = dup_group_format namelist_start_line_re = re.compile(r'^\s*&(\w+)\s*$') # FIXME the end of the namelist does not necessarily have to From 727f6756f28ab2a7e429e9fccf5969ce51e58b0d Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 22:59:45 +0200 Subject: [PATCH 12/19] update README --- README.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 3aca295..6dd51f2 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ Read in a namelist file: ``` -from namelist_python import read_namelist_file -namelist = read_namelist_file('SIM_CONFIG.nl') +import namelist_python +namelist = namelist_python.read_namelist_file('config.dat') namelist.groups['foo']['bar'] ``` @@ -13,15 +13,16 @@ structure. Write a `Namelist` object back to a file: ``` -with open('NEW_FILE.nl', 'w') as f: +with open('new_file.dat', 'w') as f: f.write(namelist.dump()) ``` `dump` takes an optional argument `array_inline` a boolean which sets whether arrays should be inline or given in index notation. -If you use ipython there is usefull attribute called `data` which allows you to -do tab completion on the group and variable names, and do assignment: +If you use ipython or a another interactive REPL prompt you may want to use +the `data` attribute which allows you to do tab completion on the group and +variable names: ``` In [7]: namelist.data.ATHAM_SETUP.dt @@ -39,13 +40,15 @@ Out[9]: 4.0 ## Features - Parses ints, floats, booleans, escaped strings and complex numbers. - - Parses arrays in index notation and inlined. - Can output in namelist format. - Tab-completion and variable assignment in interactive console ## Missing features - - Currently can't handle variable definitions across multiple lines + - Parse arrays in index notation + - Currently can't handle line continuations + - Currently can't handle lines with several parameters - Comments are not kept, and so won't exist in output. + - Module does not help to create a Namelist object from scratch ## Contribute Please send any namelist files that don't parse correctly or fix the code From 4c5becb379e4c1a95dcfed0403afe20d747cb67a Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Wed, 5 Jul 2017 23:01:31 +0200 Subject: [PATCH 13/19] better __init__.py file If this folder is called 'namelist_python' and the parent directory is in the PYTHONPATH, then scripts can call import namelist_python --- __init__.py | 4 +++- namelist.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/__init__.py b/__init__.py index 3c9c3a1..f270c60 100644 --- a/__init__.py +++ b/__init__.py @@ -1 +1,3 @@ -from namelist import read_namelist_file, Namelist, AttributeMapper + +__all__ = ['namelist'] +from .namelist import * diff --git a/namelist.py b/namelist.py index 03b05cb..bb7018d 100644 --- a/namelist.py +++ b/namelist.py @@ -3,7 +3,7 @@ try: from collections import OrderedDict except ImportError: - from utils import OrderedDict + from .utils import OrderedDict import re @@ -104,7 +104,7 @@ class Namelist(): """ - def __init__(self, input_str): + def __init__(self, input_str=""): self.groups = CaseInsensitiveDict() namelist_start_line_re = re.compile(r'^\s*&(\w+)\s*$') From 4471fa51d7d9ab71bd765e2dff550d7be1ae45d2 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Fri, 7 Jul 2017 15:25:56 +0200 Subject: [PATCH 14/19] more robustness against ifort namelist In these, strings are not quoted. --- namelist.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/namelist.py b/namelist.py index bb7018d..a67b6f9 100644 --- a/namelist.py +++ b/namelist.py @@ -104,8 +104,9 @@ class Namelist(): """ - def __init__(self, input_str=""): + def __init__(self, input_str="", parse_strings_unqoted=True): self.groups = CaseInsensitiveDict() + self.parse_strings_unqoted = parse_strings_unqoted namelist_start_line_re = re.compile(r'^\s*&(\w+)\s*$') # FIXME the end of the namelist does not necessarily have to @@ -120,7 +121,7 @@ def __init__(self, input_str=""): # may contain commas inside paretheses. # At the end of the line, the comma is optional. # FIXME strings containing parentheses will cause problems with this expression - array_re = re.compile(r"(\s*(?:[0-9]+\*)?(?:[\w.+-]+|\'[^\']*\'|\"[^\']*\"|[(][^),]+,[^),]+[)])\s*)\s*(?:,|,?\s*$)") + array_re = re.compile(r"(\s*(?:[0-9]+\*)?(?:[^,(\'\"]+|\'[^\']*\'|\"[^\']*\"|[(][^),]+,[^),]+[)])\s*)\s*(?:,|,?\s*$)") self._complex_re = re.compile(r'\s*\([^,]+,[^,]+\)\s*') # a pattern to match the non-comment part of a line. This @@ -139,7 +140,7 @@ def __init__(self, input_str=""): self.abbrev_list_re = re.compile(r"\s*([0-9]+)\*(.+)\s*") # commas at the end of lines seem to be optional - keyval_line_re = re.compile(r"\s*(\w+)\s*=\s*(.+),?") + keyval_line_re = re.compile(r"\s*(\w+)\s*=\s*(.*),?") current_group = None for line in input_str.split('\n'): @@ -175,7 +176,7 @@ def __init__(self, input_str=""): continue else: raise SyntaxError('End of namelist encountered, but there is no corresponding open namelist.') - + # other lines: key = value, or a continuation line m = keyval_line_re.match(line_without_comment) if(m): @@ -203,7 +204,9 @@ def __init__(self, input_str=""): else: raise SyntaxError('Key %s encountered, but there is no enclosing namelist' % variable_name) else: - raise SyntaxError('this line could not be parsed, please notify the author or contribute a patch: %s' % line_without_comment) + warning_text = 'this line could not be parsed: %s' % line_without_comment + print("WARNING: %s" % warning_text) + #raise SyntaxError(warning_text) def _parse_value(self, variable_value_str): """ @@ -236,12 +239,23 @@ def _parse_value(self, variable_value_str): abbrev_list_match = self.abbrev_list_re.match(variable_value_str) if(abbrev_list_match): parsed_value = int(abbrev_list_match.group(1)) * [self._parse_value(abbrev_list_match.group(2))] - elif(self.logical_true_re.match(variable_value_str)): + elif(self.logical_true_re.match(variable_value_str) and + (variable_value_str.lower() in ['true','.true','.true.','t'] or not self.parse_strings_unqoted)): parsed_value = True - elif(self.logical_false_re.match(variable_value_str)): + if(variable_value_str.lower() not in ['true','.true','.true.','t'] and not self.parse_strings_unqoted): + print("WARNING: value %s was parsed to boolean %s" % (variable_value_str, parsed_value)) + elif(self.logical_false_re.match(variable_value_str) and + (variable_value_str.lower() in ['false','.false','.false.','f'] or not self.parse_strings_unqoted)): parsed_value = False + if(variable_value_str.lower() not in ['false','.false','.false.','f'] and not self.parse_strings_unqoted): + print("WARNING: value %s was parsed to boolean %s" % (variable_value_str, parsed_value)) else: - raise SyntaxError('Right hand side expression could not be parsed. The string is: %s' % (variable_value_str)) + quoted = "'" + variable_value_str.strip()+"'" + try: + parsed_value = ast.literal_eval(quoted) + print("WARNING: value %s was treated as %s" % (variable_value_str, quoted)) + except: + raise SyntaxError('Right hand side expression could not be parsed. The string is: %s' % (variable_value_str)) #FIXME distinguish complex scalar and a list of 2 reals try: From 1c0dd878c46ee4e83b1080ae47983e3e0cf88de1 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Tue, 11 Jul 2017 13:17:06 +0200 Subject: [PATCH 15/19] bring back index notation parsing, and output index notation if necessary tests pass now. --- namelist.py | 128 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 48 deletions(-) diff --git a/namelist.py b/namelist.py index a67b6f9..9a58b6f 100644 --- a/namelist.py +++ b/namelist.py @@ -121,7 +121,7 @@ def __init__(self, input_str="", parse_strings_unqoted=True): # may contain commas inside paretheses. # At the end of the line, the comma is optional. # FIXME strings containing parentheses will cause problems with this expression - array_re = re.compile(r"(\s*(?:[0-9]+\*)?(?:[^,(\'\"]+|\'[^\']*\'|\"[^\']*\"|[(][^),]+,[^),]+[)])\s*)\s*(?:,|,?\s*$)") + array_re = re.compile(r"(\s*(?:[0-9]+\*)?(?:[^,(\'\"]+|\'[^\']*\'|\"[^\']*\"|[(][^),]+,[^),]+[)])\s*)\s*(?: |,|,?\s*$)") self._complex_re = re.compile(r'\s*\([^,]+,[^,]+\)\s*') # a pattern to match the non-comment part of a line. This @@ -140,7 +140,10 @@ def __init__(self, input_str="", parse_strings_unqoted=True): self.abbrev_list_re = re.compile(r"\s*([0-9]+)\*(.+)\s*") # commas at the end of lines seem to be optional - keyval_line_re = re.compile(r"\s*(\w+)\s*=\s*(.*),?") + keyval_line_re = re.compile(r"\s*([\w\(\)]+)\s*=\s*(.*),?") + + # detect index notation for arrays + array_index_notation_re = re.compile(r"\s*(\w+)\(([0-9])+\)\s*") current_group = None for line in input_str.split('\n'): @@ -184,6 +187,19 @@ def __init__(self, input_str="", parse_strings_unqoted=True): variable_name = m.group(1) variable_value = m.group(2) + # check if this is in array index notation + m_ind_notation = array_index_notation_re.match(variable_name) + if(m_ind_notation): + variable_name = m_ind_notation.group(1) + # Fortran indexing is 1-based, + # but used Python indexing here + index = int(m_ind_notation.group(2))-1 + #print("index notation: %s %i" % (variable_name, index)) + if(variable_name not in current_group): + current_group[variable_name] = (index+1)*[None] + elif(len(current_group[variable_name]) <= index): + current_group[variable_name].extend((index+1-len(current_group[variable_name]))*[None]) + # parse the array with self-crafted regex parsed_list = array_re.findall(variable_value) parsed_list = [self._parse_value(elem) for elem in parsed_list] @@ -195,12 +211,20 @@ def __init__(self, input_str="", parse_strings_unqoted=True): #parsed_value = ast.literal_eval(variable_value) try: if(len(parsed_list) == 1): - current_group[variable_name] = parsed_list[0] + if(m_ind_notation): + current_group[variable_name][index] = parsed_list[0] + else: + current_group[variable_name] = parsed_list[0] else: - current_group[variable_name] = parsed_list + if(m_ind_notation): + current_group[variable_name][index] = parsed_list + else: + current_group[variable_name] = parsed_list except TypeError: - current_group[variable_name] = parsed_list - + if(m_ind_notation): + current_group[variable_name][index] = parsed_list + else: + current_group[variable_name] = parsed_list else: raise SyntaxError('Key %s encountered, but there is no enclosing namelist' % variable_name) else: @@ -279,11 +303,12 @@ def dump(self, array_inline=True, float_format="%13.5e"): lines.append("&%s" % group_name.upper()) for variable_name, variable_value in group.items(): if(isinstance(variable_value, list)): - if array_inline: + if(array_inline and None not in variable_value): lines.append("%s= %s" % (variable_name, ", ".join([self._format_value(elem, float_format) for elem in variable_value]))) else: for n, v in enumerate(variable_value): - lines.append("%s(%d)= %s" % (variable_name, n+1, self._format_value(v, float_format))) + if(v is not None): + lines.append("%s(%d)= %s" % (variable_name, n+1, self._format_value(v, float_format))) else: lines.append("%s=%s" % (variable_name, self._format_value(variable_value, float_format))) lines.append("/") @@ -327,7 +352,7 @@ class ParsingTests(unittest.TestCase): def test_single_value(self): input_str = """ &CCFMSIM_SETUP - CCFMrad=800.0 + ccfmrad=800.0 / """ namelist = Namelist(input_str) @@ -339,7 +364,7 @@ def test_single_value(self): def test_multigroup(self): input_str = """ &CCFMSIM_SETUP - CCFMrad=800.0 + ccfmrad=800.0 / &GROUP2 R=500.0 @@ -356,11 +381,11 @@ def test_comment(self): input_str = """ ! Interesting comment at the start &CCFMSIM_SETUP - CCFMrad=800.0 + ccfmrad=800.0 ! And a comment some where in the middle / &GROUP2 - R=500.0 + r=500.0 / """ namelist = Namelist(input_str) @@ -447,12 +472,12 @@ def test_comment_with_forwardslash(self): input_str = """ ! Interesting comment at the start &CCFMSIM_SETUP - CCFMrad=800.0 + ccfmrad=800.0 ! And a comment some where in the middle/halfway ! var2=40 / &GROUP2 - R=500.0 + r=500.0 / """ namelist = Namelist(input_str) @@ -475,21 +500,21 @@ def test_inline_array(self): !&BOGUS rko=1 / ! &TTDATA - TTREAL = 1., - TTINTEGER = 2, - TTCOMPLEX = (3.,4.), - TTCHAR = 'namelist', - TTBOOL = T/ + ttreal = 1., + ttinteger = 2, + ttcomplex = (3.,4.), + ttchar = 'namelist', + ttbool = T/ &AADATA - AAREAL = 1. 1. 2. 3., - AAINTEGER = 2 2 3 4, - AACOMPLEX = (3.,4.) (3.,4.) (5.,6.) (7.,7.), - AACHAR = 'namelist' 'namelist' 'array' ' the lot', - AABOOL = T T F F/ + aareal = 1. 1. 2. 3., + aainteger = 2 2 3 4, + aacomplex = (3.,4.) (3.,4.) (5.,6.) (7.,7.), + aachar = 'namelist' 'namelist' 'array' ' the lot', + aabool = T T F F/ &XXDATA - XXREAL = 1., - XXINTEGER = 2, - XXCOMPLEX = (3.,4.)/! can have blank lines and comments in the namelist input file + xxreal = 1., + xxinteger = 2, + xxcomplex = (3.,4.)/! can have blank lines and comments in the namelist input file """ expected_output = { @@ -521,19 +546,22 @@ def test_inline_array(self): class ParsingTests(unittest.TestCase): def test_single_value(self): input_str = """&CCFMSIM_SETUP -CCFMrad=800.000000 -/""" +ccfmrad= 8.00000e+02 +/ +""" namelist = Namelist(input_str) self.assertEqual(namelist.dump(), input_str) def test_multigroup(self): input_str = """&CCFMSIM_SETUP -CCFMrad=800.000000 +ccfmrad= 8.00000e+02 / + &GROUP2 -R=500.000000 -/""" +r= 5.00000e+02 +/ +""" namelist = Namelist(input_str) self.assertEqual(namelist.dump(), input_str) @@ -541,30 +569,34 @@ def test_multigroup(self): def test_array(self): input_str = """&CCFMSIM_SETUP -var_trac_picture(1)='watcnew' -var_trac_picture(2)='watpnew' -var_trac_picture(3)='icecnew' -var_trac_picture(4)='granew' -des_trac_picture(1)='cloud_water' -des_trac_picture(2)='rain' -des_trac_picture(3)='cloud_ice' -des_trac_picture(4)='graupel' -/""" +var_trac_picture(1)= 'watcnew' +var_trac_picture(2)= 'watpnew' +var_trac_picture(3)= 'icecnew' +var_trac_picture(4)= 'granew' +des_trac_picture(1)= 'cloud_water' +des_trac_picture(2)= 'rain' +des_trac_picture(3)= 'cloud_ice' +des_trac_picture(4)= 'graupel' +/ +""" namelist = Namelist(input_str) self.assertEqual(namelist.dump(array_inline=False), input_str) def test_inline_array(self): input_str = """&AADATA -AACOMPLEX= (3.000000,4.000000) (3.000000,4.000000) (5.000000,6.000000) (7.000000,7.000000) -/""" +aacomplex= (3.000000,4.000000) (3.000000,4.000000) (5.000000,6.000000) (7.000000,7.000000) +bbcomplex= ( 3.00000e+00, 4.00000e+00), ( 3.00000e+00, 4.00000e+00), ( 5.00000e+00, 6.00000e+00), ( 7.00000e+00, 7.00000e+00) +/ +""" namelist = Namelist(input_str) - - print(input_str) - print(namelist.dump()) - - self.assertEqual(namelist.dump(), input_str) + expected_output = { + 'aadata': { + 'aacomplex': [complex(3.,4.),complex(3.,4.),complex(5.,6.),complex(7.,7.),], + 'bbcomplex': [complex(3.,4.),complex(3.,4.),complex(5.,6.),complex(7.,7.),] + }} + self.assertEqual(dict(namelist.groups), expected_output) if __name__=='__main__': unittest.main() From 743f8816f33123e2c4863a1aa72c9c4c8cf77334 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Tue, 11 Jul 2017 13:20:18 +0200 Subject: [PATCH 16/19] minor --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6dd51f2..37753f0 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,6 @@ Out[9]: 4.0 - Tab-completion and variable assignment in interactive console ## Missing features - - Parse arrays in index notation - Currently can't handle line continuations - Currently can't handle lines with several parameters - Comments are not kept, and so won't exist in output. @@ -52,7 +51,6 @@ Out[9]: 4.0 ## Contribute Please send any namelist files that don't parse correctly or fix the code -yourself and send me a pull request :) +yourself and send a pull request :) -Thanks, -Leif +Thanks From ec038f4d8f3a52c0c7716cfa05ad031a08a255c0 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Tue, 20 Mar 2018 14:19:59 +0100 Subject: [PATCH 17/19] make unit test pass again --- namelist.py | 70 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 28 deletions(-) mode change 100644 => 100755 namelist.py diff --git a/namelist.py b/namelist.py old mode 100644 new mode 100755 index 9a58b6f..0694303 --- a/namelist.py +++ b/namelist.py @@ -1,3 +1,4 @@ +#!/usr/bin/python3 import unittest try: @@ -81,6 +82,15 @@ def _convert_keys(self): for k in list(self.keys()): v = super(CaseInsensitiveDict, self).pop(k) self.__setitem__(k, v) + # def to_dict(self): + # import copy + # ret = {} + # for k in self.keys(): + # if isinstance(self[k], CaseInsensitiveDict): + # ret[k] = self[k].to_dict() + # else: + # ret[k] = copy.deepcopy(self[k]) + # return ret class Namelist(): """ @@ -349,6 +359,11 @@ def data(self): return AttributeMapper(self.groups) class ParsingTests(unittest.TestCase): + + def __init__(self, methodName='runTest'): + super().__init__(methodName) + self.maxDiff = None + def test_single_value(self): input_str = """ &CCFMSIM_SETUP @@ -357,7 +372,7 @@ def test_single_value(self): """ namelist = Namelist(input_str) - expected_output = {'CCFMSIM_SETUP': { 'CCFMrad': 800. }} + expected_output = {'ccfmsim_setup': { 'ccfmrad': 800. }} self.assertEqual(namelist.groups, expected_output) @@ -372,8 +387,8 @@ def test_multigroup(self): """ namelist = Namelist(input_str) - expected_output = {'CCFMSIM_SETUP': { 'CCFMrad': 800. }, - 'GROUP2': { 'R': 500. }} + expected_output = {'ccfmsim_setup': { 'ccfmrad': 800. }, + 'group2': { 'r': 500. }} self.assertEqual(namelist.groups, expected_output) @@ -390,8 +405,8 @@ def test_comment(self): """ namelist = Namelist(input_str) - expected_output = {'CCFMSIM_SETUP': { 'CCFMrad': 800. }, - 'GROUP2': { 'R': 500. }} + expected_output = {'ccfmsim_setup': { 'ccfmrad': 800. }, + 'group2': { 'r': 500. }} self.assertEqual(namelist.groups, expected_output) @@ -412,7 +427,7 @@ def test_array(self): namelist = Namelist(input_str) expected_output = { - 'CCFMSIM_SETUP': { + 'ccfmsim_setup': { 'ntrac_picture': 4, 'var_trac_picture': [ 'watcnew', @@ -438,7 +453,7 @@ def test_boolean_sciformat(self): nz =300 zstart =0. ztotal =15000. - dzzoom =50. + dzzoom =50.012345e-15 kcenter =20 nztrans =0 nztrans_boundary =6 @@ -452,11 +467,11 @@ def test_boolean_sciformat(self): namelist = Namelist(input_str) expected_output = { - 'ATHAM_SETUP': { + 'atham_setup': { 'nz': 300, 'zstart': 0., 'ztotal': 15000., - 'dzzoom': 50., + 'dzzoom': 50.012345e-15, 'kcenter': 20, 'nztrans': 0, 'nztrans_boundary': 6, @@ -482,8 +497,8 @@ def test_comment_with_forwardslash(self): """ namelist = Namelist(input_str) - expected_output = {'CCFMSIM_SETUP': { 'CCFMrad': 800., 'var2': 40 }, - 'GROUP2': { 'R': 500. }} + expected_output = {'ccfmsim_setup': { 'ccfmrad': 800., 'var2': 40 }, + 'group2': { 'r': 500. }} self.assertEqual(namelist.groups, expected_output) @@ -518,24 +533,24 @@ def test_inline_array(self): """ expected_output = { - 'TTDATA': { - 'TTREAL': 1., - 'TTINTEGER': 2, - 'TTCOMPLEX': 3. + 4.j, - 'TTCHAR': 'namelist', - 'TTBOOL': True, + 'ttdata': { + 'ttreal': 1., + 'ttinteger': 2, + 'ttcomplex': 3. + 4.j, + 'ttchar': 'namelist', + 'ttbool': true, }, - 'AADATA': { - 'AAREAL': [1., 1., 2., 3.,], - 'AAINTEGER': [2, 2, 3, 4], - 'AACOMPLEX': [3.+4.j, 3.+4.j, 5.+6.j, 7.+7.j], - 'AACHAR': ['namelist', 'namelist', 'array', ' the lot'], - 'AABOOL': [True, True, False, False], + 'aadata': { + 'aareal': [1., 1., 2., 3.,], + 'aainteger': [2, 2, 3, 4], + 'aacomplex': [3.+4.j, 3.+4.j, 5.+6.j, 7.+7.j], + 'aachar': ['namelist', 'namelist', 'array', ' the lot'], + 'aabool': [True, True, False, False], }, - 'XXDATA': { - 'XXREAL': 1., - 'XXINTEGER': 2., - 'XXCOMPLEX': 3.+4.j, + 'xxdata': { + 'xxreal': 1., + 'xxinteger': 2., + 'xxcomplex': 3.+4.j, }, } @@ -543,7 +558,6 @@ def test_inline_array(self): self.assertEqual(dict(namelist.groups), expected_output) -class ParsingTests(unittest.TestCase): def test_single_value(self): input_str = """&CCFMSIM_SETUP ccfmrad= 8.00000e+02 From 188c8143641092a34c336495ed342de3dd10de1d Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Tue, 20 Mar 2018 14:41:27 +0100 Subject: [PATCH 18/19] fix, allow to dump tuples In some rare cases (I am not sure when), parsing an array yields a tuple, not a list. Then this line failed when dumping the data. --- namelist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/namelist.py b/namelist.py index 0694303..57d8eaa 100755 --- a/namelist.py +++ b/namelist.py @@ -312,7 +312,7 @@ def dump(self, array_inline=True, float_format="%13.5e"): for group in group_list: lines.append("&%s" % group_name.upper()) for variable_name, variable_value in group.items(): - if(isinstance(variable_value, list)): + if(isinstance(variable_value, list) or isinstance(variable_value, tuple)): if(array_inline and None not in variable_value): lines.append("%s= %s" % (variable_name, ", ".join([self._format_value(elem, float_format) for elem in variable_value]))) else: From 48cb683d90345c6fd31968eec0f08098b67640a1 Mon Sep 17 00:00:00 2001 From: Stefan Grosshauser Date: Tue, 20 Mar 2018 14:41:44 +0100 Subject: [PATCH 19/19] test for inline array dump --- namelist.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/namelist.py b/namelist.py index 57d8eaa..c530060 100755 --- a/namelist.py +++ b/namelist.py @@ -612,5 +612,28 @@ def test_inline_array(self): }} self.assertEqual(dict(namelist.groups), expected_output) + def test_inline_array_dump(self): + input_str = """&DATA +temp_coef= 1.00000000, 5.00000000, 0.18077700, 0.28930000, 0.01450000, +dens_coef=2.00000000,3.00000000,4.18077700,5.28930000,6.01450000 +/ +""" + namelist = Namelist(input_str) + expected_output = { + 'data': { + 'temp_coef': [1.00000000, 5.00000000, 0.18077700, 0.28930000, 0.01450000,], + 'dens_coef': [2.00000000,3.00000000,4.18077700,5.28930000,6.01450000] + }} + self.assertEqual(dict(namelist.groups), expected_output) + + expected_output = """&DATA +temp_coef= 1.00000e+00, 5.00000e+00, 1.80777e-01, 2.89300e-01, 1.45000e-02 +dens_coef= 2.00000e+00, 3.00000e+00, 4.18078e+00, 5.28930e+00, 6.01450e+00 +/ +""" + + self.assertEqual(namelist.dump(), expected_output) + + if __name__=='__main__': unittest.main()