From e5c044b8c18ac5db574900a25837eec05a74544a Mon Sep 17 00:00:00 2001 From: Valery Sinelnikov Date: Mon, 19 Jun 2023 14:17:18 +0400 Subject: [PATCH 1/2] Add support for inline comments in regex Change _handle_comment to support new regex - src/configobj/__init__.py --- src/configobj/__init__.py | 129 ++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 60 deletions(-) diff --git a/src/configobj/__init__.py b/src/configobj/__init__.py index 0580b65..67d8c71 100644 --- a/src/configobj/__init__.py +++ b/src/configobj/__init__.py @@ -1043,64 +1043,71 @@ class ConfigObj(Section): MAX_PARSE_ERROR_DETAILS = 5 # Override/append to this class variable for alternative comment markers - # TODO: also support inline comments (needs dynamic compiling of the regex below) COMMENT_MARKERS = ['#'] - _keyword = re.compile(r'''^ # line start - (\s*) # indentation - ( # keyword - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'"=].*?) # no quotes - ) - \s*=\s* # divider - (.*) # value (including list values and comments) - $ # line end - ''', - re.VERBOSE) - - _sectionmarker = re.compile(r'''^ - (\s*) # 1: indentation - ((?:\[\s*)+) # 2: section marker open - ( # 3: section name open - (?:"\s*\S.*?\s*")| # at least one non-space with double quotes - (?:'\s*\S.*?\s*')| # at least one non-space with single quotes - (?:[^'"\s].*?) # at least one non-space unquoted - ) # section name close - ((?:\s*\])+) # 4: section marker close - (\s*(?:\#.*)?)? # 5: optional comment - $''', - re.VERBOSE) - - # this regexp pulls list values out as a single string - # or single values and comments - # FIXME: this regex adds a '' to the end of comma terminated lists - # workaround in ``_handle_value`` - _valueexp = re.compile(r'''^ - (?: + # Use the @classmethod decorator to initialize regular expressions + # that depend on another class variable. + @classmethod + def __regex_init__(cls): + #Filter and modify the list to keep only single-character elements + process_list = lambda lst: [item for item in lst if len(item) == 1] + ConfigObj.COMMENT_MARKERS = process_list(ConfigObj.COMMENT_MARKERS) + + ConfigObj._keyword = re.compile(r'''^ # line start + (\s*) # indentation + ( # keyword + (?:".*?")| # double quotes + (?:'.*?')| # single quotes + (?:[^'"=].*?) # no quotes + ) + \s*=\s* # divider + (.*) # value (including list values and comments) + $ # line end + ''', + re.VERBOSE) + + ConfigObj._sectionmarker = re.compile((r'''^ + (\s*) # 1: indentation + ((?:\[\s*)+) # 2: section marker open + ( # 3: section name open + (?:"\s*\S.*?\s*")| # at least one non-space with double quotes + (?:'\s*\S.*?\s*')| # at least one non-space with single quotes + (?:[^'"\s].*?) # at least one non-space unquoted + ) # section name close + ((?:\s*\])+) # 4: section marker close + (\s*(?:[{}].*)?)? # 5: optional comment + $''').format(''.join(ConfigObj.COMMENT_MARKERS)), + re.VERBOSE) + + # this regexp pulls list values out as a single string + # or single values and comments + # FIXME: this regex adds a '' to the end of comma terminated lists + # workaround in ``_handle_value`` + ConfigObj._valueexp = re.compile((r'''^ (?: - ( - (?: + (?: + ( (?: - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#][^,\#]*?) # unquoted - ) - \s*,\s* # comma - )* # match all list items ending in a comma (if any) - ) - ( - (?:".*?")| # double quotes - (?:'.*?')| # single quotes - (?:[^'",\#\s][^,]*?)| # unquoted - (?:(? Date: Wed, 18 Sep 2024 13:11:12 +0400 Subject: [PATCH 2/2] Refactor TestComments usage and enhance comment marker tests - Removed ConfigObjPHP class. - Expanded test cases for handling inline comments with both # and ; markers. - Adjusted assertions to include multiple comment styles (hashtag and semicolon). --- src/tests/test_configobj.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/tests/test_configobj.py b/src/tests/test_configobj.py index b1f2d01..39f589d 100644 --- a/src/tests/test_configobj.py +++ b/src/tests/test_configobj.py @@ -1149,8 +1149,6 @@ def test_creating_with_a_dictionary(): assert dictionary_cfg_content is not cfg.dict() -class ConfigObjPHP(ConfigObj): - COMMENT_MARKERS = ['#', ';'] class TestComments(object): @@ -1240,27 +1238,36 @@ def test_comments(self, comment_filled_cfg): def test_comment_markers(self, cfg_contents): cfgfile = cfg_contents("""; comment - [php] # section marker - ;INLINE NOT SUPPORTED YET [php] ; section marker + [php] # section marker hashtag ; Boolean: true, on, yes or false, off, no, none switch = off track_errors = yes ; string in double-quotes include_path = ".:/usr/local/lib/php" + [semicolon] ; section marker semicolon + ; Boolean: true, on, yes or false, off, no, none + switch = on ; comment_semicolon + track_errors = no # comment_hashtag """) - c = ConfigObjPHP(cfgfile) - assert c == dict(php=dict(switch='off', track_errors='yes', include_path=".:/usr/local/lib/php")) + ConfigObj.COMMENT_MARKERS = ['#', ';'] + c = ConfigObj(cfgfile) + assert c == dict(php=dict(switch='off', track_errors='yes', include_path=".:/usr/local/lib/php"), semicolon=dict(switch='on', track_errors='no')) assert c.initial_comment == ['; comment'] + assert c.inline_comments == {'php': ' # section marker hashtag', 'semicolon': ' ; section marker semicolon'} + ConfigObj.COMMENT_MARKERS = ['#'] + def test_write_back_comment_markers(self, cfg_contents): lines = ( '# initial comment', '; 2nd line', - '[sect_name]', '; section comment', 'foo = bar', - '', '; final comment') - c = ConfigObjPHP(lines) + '[sect_name] # comment', '; section comment', 'foo = bar', + '', '; final comment', '[sect_name2] ;comment_semicolon', 'semicolon = yes ;comment_semicolon') + ConfigObj.COMMENT_MARKERS = ['#', ';'] + c = ConfigObj(lines) for expected, got in zip(lines, c.write()): assert expected == got + ConfigObj.COMMENT_MARKERS = ['#'] def test_overwriting_filenames(a, b, i):