From 38523b3da77b542b7b2502c9474f37d05a56beb9 Mon Sep 17 00:00:00 2001 From: Florian Weyandt Date: Fri, 14 May 2021 12:42:42 +0200 Subject: [PATCH 1/3] add prometheus format status output --- src/plotman/_tests/reporting_test.py | 18 ++++++++++++ src/plotman/plotman.py | 8 +++++- src/plotman/reporting.py | 42 ++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/src/plotman/_tests/reporting_test.py b/src/plotman/_tests/reporting_test.py index 87c8a5e2..afc235f9 100644 --- a/src/plotman/_tests/reporting_test.py +++ b/src/plotman/_tests/reporting_test.py @@ -60,3 +60,21 @@ def test_job_viz_counts(): ] assert(reporting.job_viz(jobs) == '1 2 .:;! 3 ! 4 ') + +def test_to_prometheus_format(): + prom_stati = [ + ('foo="bar",baz="2"', {'metric1': 1, 'metric2': 2}), + ('foo="blubb",baz="3"', {'metric1': 2, 'metric2': 3}) + ] + metrics = {'metric1': 'This is foo', 'metric2': 'In a parallel universe this is foo'} + expected = [ + '# HELP metric1 This is foo.', + '# TYPE metric1 gauge', + 'metric1{foo="bar",baz="2"} 1', + 'metric1{foo="blubb",baz="3"} 2', + '# HELP metric2 In a parallel universe this is foo.', + '# TYPE metric2 gauge', + 'metric2{foo="bar",baz="2"} 2','metric2{foo="blubb",baz="3"} 3' + ] + result = reporting.to_prometheus_format(metrics, prom_stati) + assert(result == expected) diff --git a/src/plotman/plotman.py b/src/plotman/plotman.py index d3468735..27fd4c60 100755 --- a/src/plotman/plotman.py +++ b/src/plotman/plotman.py @@ -27,7 +27,9 @@ def parse_args(self): sp.add_parser('version', help='print the version') sp.add_parser('status', help='show current plotting status') - + + sp.add_parser('prometheus', help='show current plotting status in prometheus readable format') + sp.add_parser('dirs', help='show directories info') sp.add_parser('interactive', help='run interactive control/monitoring mode') @@ -165,6 +167,10 @@ def main(): if args.cmd == 'status': print(reporting.status_report(jobs, get_term_width())) + # Prometheus report + if args.cmd == 'prometheus': + print(reporting.prometheus_report(jobs)) + # Directories report elif args.cmd == 'dirs': print(reporting.dirs_report(jobs, cfg.directories, cfg.scheduling, get_term_width())) diff --git a/src/plotman/reporting.py b/src/plotman/reporting.py index 3142b978..aa5ab987 100644 --- a/src/plotman/reporting.py +++ b/src/plotman/reporting.py @@ -125,6 +125,48 @@ def status_report(jobs, width, height=None, tmp_prefix='', dst_prefix=''): # return ('tmp dir prefix: %s ; dst dir prefix: %s\n' % (tmp_prefix, dst_prefix) return tab.draw() +def to_prometheus_format(metrics, prom_stati): + prom_str_list = [] + for metric_name, metric_desc in metrics.items(): + prom_str_list.append(f'# HELP {metric_name} {metric_desc}.') + prom_str_list.append(f'# TYPE {metric_name} gauge') + for label_str, values in prom_stati: + prom_str_list.append('%s{%s} %s' % (metric_name, label_str, values[metric_name])) + return prom_str_list + +def prometheus_report(jobs, tmp_prefix='', dst_prefix=''): + metrics = { + 'plotman_plot_phase_major': 'The phase the plot is currently in', + 'plotman_plot_phase_minor': 'The part of the phase the plot is currently in', + 'plotman_plot_tmp_usage': 'Tmp dir usage in bytes', + 'plotman_plot_mem_usage': 'Memory usage in bytes', + 'plotman_plot_user_time': 'Processor time (user) in s', + 'plotman_plot_sys_time': 'Processor time (sys) in s', + 'plotman_plot_iowait_time': 'Processor time (iowait) in s', + } + prom_stati = [] + for j in jobs: + labels = { + 'plot_id': j.plot_id[:8], + 'tmp_dir': abbr_path(j.tmpdir, tmp_prefix), + 'dst_dir': abbr_path(j.dstdir, dst_prefix), + 'run_status': j.get_run_status(), + 'phase': phase_str(j.progress()) + } + label_str = ','.join([f'{k}="{v}"' for k, v in labels.items()]) + values = { + 'plotman_plot_phase_major': j.progress().major, + 'plotman_plot_phase_minor': j.progress().minor, + 'plotman_plot_tmp_usage': j.get_tmp_usage(), + 'plotman_plot_mem_usage': j.get_mem_usage(), + 'plotman_plot_user_time': j.get_time_user(), + 'plotman_plot_sys_time': j.get_time_sys(), + 'plotman_plot_iowait_time': j.get_time_iowait(), + } + prom_stati += [(label_str, values)] + return '\n'.join(to_prometheus_format(metrics, prom_stati)) + + def tmp_dir_report(jobs, dir_cfg, sched_cfg, width, start_row=None, end_row=None, prefix=''): '''start_row, end_row let you split the table up if you want''' tab = tt.Texttable() From d2c9f081adb1b57668e13e41478051f9b825ade2 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 18 Jun 2021 22:11:22 -0400 Subject: [PATCH 2/3] add type hints --- src/plotman/_tests/reporting_test.py | 2 +- src/plotman/reporting.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/plotman/_tests/reporting_test.py b/src/plotman/_tests/reporting_test.py index e566f3e6..5ce0ffa9 100644 --- a/src/plotman/_tests/reporting_test.py +++ b/src/plotman/_tests/reporting_test.py @@ -62,7 +62,7 @@ def test_job_viz_counts() -> None: assert(reporting.job_viz(jobs) == '1 2 .:;! 3 ! 4 ') # type: ignore[arg-type] -def test_to_prometheus_format(): +def test_to_prometheus_format() -> None: prom_stati = [ ('foo="bar",baz="2"', {'metric1': 1, 'metric2': 2}), ('foo="blubb",baz="3"', {'metric1': 2, 'metric2': 3}) diff --git a/src/plotman/reporting.py b/src/plotman/reporting.py index 46ef98eb..0a74d84d 100644 --- a/src/plotman/reporting.py +++ b/src/plotman/reporting.py @@ -124,7 +124,7 @@ def status_report(jobs: typing.List[job.Job], width: int, height: typing.Optiona return tab.draw() # type: ignore[no-any-return] -def to_prometheus_format(metrics, prom_stati): +def to_prometheus_format(metrics: typing.Dict[str, str], prom_stati: typing.Sequence[typing.Tuple[str, typing.Mapping[str, typing.Optional[int]]]]) -> typing.List[str]: prom_str_list = [] for metric_name, metric_desc in metrics.items(): prom_str_list.append(f'# HELP {metric_name} {metric_desc}.') @@ -133,7 +133,7 @@ def to_prometheus_format(metrics, prom_stati): prom_str_list.append('%s{%s} %s' % (metric_name, label_str, values[metric_name])) return prom_str_list -def prometheus_report(jobs, tmp_prefix='', dst_prefix=''): +def prometheus_report(jobs: typing.List[job.Job], tmp_prefix: str = '', dst_prefix: str = '') -> str: metrics = { 'plotman_plot_phase_major': 'The phase the plot is currently in', 'plotman_plot_phase_minor': 'The part of the phase the plot is currently in', @@ -150,7 +150,7 @@ def prometheus_report(jobs, tmp_prefix='', dst_prefix=''): 'tmp_dir': abbr_path(j.tmpdir, tmp_prefix), 'dst_dir': abbr_path(j.dstdir, dst_prefix), 'run_status': j.get_run_status(), - 'phase': phase_str(j.progress()) + 'phase': str(j.progress()), } label_str = ','.join([f'{k}="{v}"' for k, v in labels.items()]) values = { From d92f397f0d673b3759aaaf04c6b07261b97ec03f Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 18 Jun 2021 22:25:38 -0400 Subject: [PATCH 3/3] add changelog entry --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b291813..b03cf8ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#549](https://github.com/ericaltendorf/plotman/pull/549)) - If the tmp drive selected for a plot is also listed as a dst drive then plotman will use the same drive for both. ([#643](https://github.com/ericaltendorf/plotman/pull/643)) +- `plotman prometheus` command to output status for consumption by [Prometheus](https://prometheus.io/). + ([#430](https://github.com/ericaltendorf/plotman/pull/430)) ## [0.4.1] - 2021-06-11 ### Fixed