Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*.local.*

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
96 changes: 96 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Project Overview

pyhocon is a Python implementation of the HOCON (Human-Optimized Config Object Notation) parser. It parses HOCON configuration files into Python data structures and can convert to JSON, YAML, properties, and HOCON formats.

HOCON spec: https://github.com/typesafehub/config/blob/master/HOCON.md

## Commands

### Testing
```bash
# Run all tests
pytest tests/

# Run a specific test file
pytest tests/test_config_parser.py

# Run a specific test
pytest tests/test_config_parser.py::TestConfigParser::test_parse_simple_value

# Run with coverage
coverage run --source=pyhocon -m pytest tests/
coverage report -m
```

### Linting
```bash
flake8 pyhocon tests setup.py
```

### Tox (multi-environment testing)
```bash
tox # Run all environments
tox -e flake8 # Run flake8 only
tox -e py312 # Run tests on Python 3.12
```

### CLI Tool
```bash
# Convert HOCON to JSON
pyhocon -i input.conf -f json -o output.json
cat input.conf | pyhocon -f json

# Other formats: json, yaml, properties, hocon
# Use -c for compact output (nested single-value dicts as a.b.c = 1)
```

## Architecture

### Core Modules

- **config_parser.py** - Main parsing engine using pyparsing library
- `ConfigFactory` - Public API for parsing (parse_file, parse_string, parse_URL, from_dict)
- `ConfigParser` - Internal parser with HOCON grammar rules

- **config_tree.py** - Data structures
- `ConfigTree` - Hierarchical config storage (extends OrderedDict), supports dot notation access (`config['a.b.c']`)
- `ConfigList` - HOCON arrays
- `ConfigValues` - Concatenated values (handles array/string/dict merging)
- `ConfigSubstitution` - Represents `${var}` and `${?var}` substitutions

- **converter.py** - `HOCONConverter` with to_json, to_yaml, to_properties, to_hocon methods

- **period_parser.py / period_serializer.py** - Duration parsing (e.g., "5 days", "10 seconds")

- **tool.py** - CLI entry point

### Parsing Flow

1. `ConfigFactory.parse_*()` receives input
2. `ConfigParser.parse()` applies pyparsing grammar rules
3. Produces `ConfigTree`/`ConfigList` with unresolved `ConfigSubstitution` tokens
4. `resolve_substitutions()` replaces `${var}` references from config or environment variables
5. Returns resolved `ConfigTree`

### Key Features

- Substitutions: `${key}` (required) and `${?key}` (optional, fallback to env vars)
- Includes: `include "file.conf"`, `include url("http://...")`, `include required(file("..."))`, glob patterns
- Value access: `config['a.b.c']` or `config['a']['b']['c']` or `config.get_string('a.b.c')`
- Type-safe getters: `get_string()`, `get_int()`, `get_float()`, `get_bool()`, `get_list()`, `get_config()`

## Dependencies

- **pyparsing** (>=2, <4) - Grammar parsing
- **python-dateutil** (>=2.8.0, optional) - For months/years in duration parsing

## Test Dependencies

- pytest
- mock
- python-dateutil
- coveralls (for CI coverage reporting)
48 changes: 17 additions & 31 deletions pyhocon/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,14 @@
import socket
import sys

import pyparsing
from pyparsing import (Forward, Group, Keyword, Literal, Optional,
ParserElement, ParseSyntaxException, QuotedString,
Regex, SkipTo, StringEnd, Suppress, TokenConverter,
Word, ZeroOrMore, alphanums, alphas8bit, col, lineno,
replaceWith)
replace_with)

from pyhocon.period_parser import get_period_expr

# Fix deepcopy issue with pyparsing
if sys.version_info >= (3, 8):
def fixed_get_attr(self, item):
if item == '__deepcopy__':
raise AttributeError(item)
try:
return self[item]
except KeyError:
return ""


pyparsing.ParseResults.__getattr__ = fixed_get_attr

from pyhocon.config_tree import (ConfigInclude, ConfigList, ConfigQuotedString,
ConfigSubstitution, ConfigTree,
ConfigUnquotedString, ConfigValues, NoneValue)
Expand Down Expand Up @@ -142,7 +128,7 @@ def parse_file(cls, filename, encoding='utf-8', required=True, resolve=True, unr
except IOError as e:
if required:
raise e
logger.warn('Cannot include file %s. File does not exist or cannot be read.', filename)
logger.warning('Cannot include file %s. File does not exist or cannot be read.', filename)
return []

@classmethod
Expand All @@ -167,7 +153,7 @@ def parse_URL(cls, url, timeout=None, resolve=True, required=False, unresolved_v
content = fd.read() if use_urllib2 else fd.read().decode('utf-8')
return cls.parse_string(content, os.path.dirname(url), resolve, unresolved_value)
except (HTTPError, URLError) as e:
logger.warn('Cannot include url %s. Resource is inaccessible.', url)
logger.warning('Cannot include url %s. Resource is inaccessible.', url)
if required:
raise e
else:
Expand Down Expand Up @@ -376,38 +362,38 @@ def _merge(a, b):
@contextlib.contextmanager
def set_default_white_spaces():
default = ParserElement.DEFAULT_WHITE_CHARS
ParserElement.setDefaultWhitespaceChars(' \t')
ParserElement.set_default_whitespace_chars(' \t')
yield
ParserElement.setDefaultWhitespaceChars(default)
ParserElement.set_default_whitespace_chars(default)

with set_default_white_spaces():
assign_expr = Forward()
true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True))
false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False))
null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(NoneValue()))
key = QuotedString('"""', escChar='\\', unquoteResults=False) | \
QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /')
true_expr = Keyword("true", caseless=True).set_parse_action(replace_with(True))
false_expr = Keyword("false", caseless=True).set_parse_action(replace_with(False))
null_expr = Keyword("null", caseless=True).set_parse_action(replace_with(NoneValue()))
key = QuotedString('"""', esc_char='\\', unquote_results=False) | \
QuotedString('"', esc_char='\\', unquote_results=False) | Word(alphanums + alphas8bit + '._- /')

eol = Word('\n\r').suppress()
eol_comma = Word('\n\r,').suppress()
comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd())
comment_eol = Suppress(Optional(eol_comma) + comment)
comment_no_comma_eol = (comment | eol).suppress()
number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))',
re.DOTALL).setParseAction(convert_number)
re.DOTALL).set_parse_action(convert_number)
# multi line string using """
# Using fix described in http://pyparsing.wikispaces.com/share/view/3778969
multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string)
multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).set_parse_action(parse_multi_string)
# single quoted line string
quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string)
quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).set_parse_action(create_quoted_string)
# unquoted string that takes the rest of the line until an optional comment
# we support .properties multiline support which is like this:
# line1 \
# line2 \
# so a backslash precedes the \n
unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(
unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).set_parse_action(
unescape_string)
substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution)
substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').set_parse_action(create_substitution)
string_expr = multiline_string | quoted_string | unquoted_string

value_expr = get_period_expr() | number_expr | true_expr | false_expr | null_expr | string_expr
Expand All @@ -422,7 +408,7 @@ def set_default_white_spaces():
Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress()
)
)
).setParseAction(include_config)
).set_parse_action(include_config)

root_dict_expr = Forward()
dict_expr = Forward()
Expand Down Expand Up @@ -451,7 +437,7 @@ def set_default_white_spaces():
config_expr = ZeroOrMore(comment_eol | eol) + (
list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(
comment_eol | eol_comma)
config = config_expr.parseString(content, parseAll=True)[0]
config = config_expr.parse_string(content, parse_all=True)[0]

if resolve:
allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION \
Expand Down
4 changes: 2 additions & 2 deletions pyhocon/period_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ def get_period_expr():
return Combine(
Word(nums)('value') + ZeroOrMore(Literal(" ")).suppress() + Or(period_types)('unit') + WordEnd(
alphanums).suppress()
).setParseAction(convert_period)
).set_parse_action(convert_period)


def parse_period(content):
return get_period_expr().parseString(content, parseAll=True)[0]
return get_period_expr().parse_string(content, parse_all=True)[0]
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ def run_tests(self):
],
install_requires=[
'pyparsing~=2.0;python_version<"3.0"',
'pyparsing>=2,<4;python_version>="3.0"',
'pyparsing>=3,<4;python_version>="3.0"',
],
extras_require={
'Duration': ['python-dateutil>=2.8.0']
'Duration': ['python-dateutil>=2.8.0'],
'test': ['pytest', 'mock==3.0.5']
},
tests_require=['pytest', 'mock==3.0.5'],
entry_points={
Expand Down