From dcd2a98a9d8ab2b2aa071f2382b0d767f0df38f7 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sat, 15 Aug 2020 15:48:57 +0200 Subject: [PATCH 1/3] refactor: move ValidationReport to ocrd_models --- ocrd/ocrd/task_sequence.py | 7 ++++++- ocrd_models/ocrd_models/__init__.py | 1 + .../ocrd_validators => ocrd_models/ocrd_models}/report.py | 0 ocrd_validators/ocrd_validators/__init__.py | 2 -- ocrd_validators/ocrd_validators/json_validator.py | 2 +- ocrd_validators/ocrd_validators/ocrd_zip_validator.py | 2 +- ocrd_validators/ocrd_validators/page_validator.py | 2 +- ocrd_validators/ocrd_validators/workspace_validator.py | 2 +- ocrd_validators/ocrd_validators/xsd_validator.py | 4 +++- tests/validator/test_report.py | 2 +- 10 files changed, 15 insertions(+), 9 deletions(-) rename {ocrd_validators/ocrd_validators => ocrd_models/ocrd_models}/report.py (100%) diff --git a/ocrd/ocrd/task_sequence.py b/ocrd/ocrd/task_sequence.py index 9c56c8377a..9cf58066b3 100644 --- a/ocrd/ocrd/task_sequence.py +++ b/ocrd/ocrd/task_sequence.py @@ -5,9 +5,11 @@ from collections import Counter from ocrd_utils import getLogger, parse_json_string_or_file, set_json_key_value_overrides +# from collections import Counter from ocrd.processor.base import run_cli from ocrd.resolver import Resolver -from ocrd_validators import ParameterValidator, WorkspaceValidator, ValidationReport +from ocrd_validators import ParameterValidator, WorkspaceValidator +from ocrd_models import ValidationReport class ProcessorTask(): @@ -85,6 +87,9 @@ def __str__(self): if self.parameters: ret += " -p '%s'" % json.dumps(self.parameters) return ret +from ocrd_validators import WorkspaceValidator +from ocrd_utils import getLogger +from ocrd_models import ValidationReport def validate_tasks(tasks, workspace, page_id=None, overwrite=False): report = ValidationReport() diff --git a/ocrd_models/ocrd_models/__init__.py b/ocrd_models/ocrd_models/__init__.py index cc25df470b..9a31a2d4c7 100644 --- a/ocrd_models/ocrd_models/__init__.py +++ b/ocrd_models/ocrd_models/__init__.py @@ -6,3 +6,4 @@ from .ocrd_file import OcrdFile from .ocrd_mets import OcrdMets from .ocrd_xml_base import OcrdXmlDocument +from .report import ValidationReport diff --git a/ocrd_validators/ocrd_validators/report.py b/ocrd_models/ocrd_models/report.py similarity index 100% rename from ocrd_validators/ocrd_validators/report.py rename to ocrd_models/ocrd_models/report.py diff --git a/ocrd_validators/ocrd_validators/__init__.py b/ocrd_validators/ocrd_validators/__init__.py index 3ba0236d81..bbf88323af 100644 --- a/ocrd_validators/ocrd_validators/__init__.py +++ b/ocrd_validators/ocrd_validators/__init__.py @@ -7,13 +7,11 @@ 'PageValidator', 'OcrdToolValidator', 'OcrdZipValidator', - 'ValidationReport', 'XsdValidator', 'XsdMetsValidator', 'XsdPageValidator', ] -from .report import ValidationReport from .parameter_validator import ParameterValidator from .workspace_validator import WorkspaceValidator from .page_validator import PageValidator diff --git a/ocrd_validators/ocrd_validators/json_validator.py b/ocrd_validators/ocrd_validators/json_validator.py index 6b855d409a..57a0a9a37c 100644 --- a/ocrd_validators/ocrd_validators/json_validator.py +++ b/ocrd_validators/ocrd_validators/json_validator.py @@ -5,7 +5,7 @@ from jsonschema import Draft4Validator, validators # pylint: disable=import-error -from .report import ValidationReport +from ocrd_models import ValidationReport # http://python-jsonschema.readthedocs.io/en/latest/faq/ def extend_with_default(validator_class): diff --git a/ocrd_validators/ocrd_validators/ocrd_zip_validator.py b/ocrd_validators/ocrd_validators/ocrd_zip_validator.py index 3786e2d0da..147a3c4849 100644 --- a/ocrd_validators/ocrd_validators/ocrd_zip_validator.py +++ b/ocrd_validators/ocrd_validators/ocrd_zip_validator.py @@ -12,7 +12,7 @@ from bagit_profile import Profile, ProfileValidationError # pylint: disable=no-name-in-module from .constants import OCRD_BAGIT_PROFILE, OCRD_BAGIT_PROFILE_URL, TMP_BAGIT_PREFIX -from .report import ValidationReport +from ocrd_models import ValidationReport log = getLogger('ocrd.ocrd_zip_validator') diff --git a/ocrd_validators/ocrd_validators/page_validator.py b/ocrd_validators/ocrd_validators/page_validator.py index 5ae272d166..3421efb15a 100644 --- a/ocrd_validators/ocrd_validators/page_validator.py +++ b/ocrd_validators/ocrd_validators/page_validator.py @@ -29,7 +29,7 @@ UnorderedGroupType, UnorderedGroupIndexedType, ) -from .report import ValidationReport +from ocrd_models import ValidationReport log = getLogger('ocrd.page_validator') diff --git a/ocrd_validators/ocrd_validators/workspace_validator.py b/ocrd_validators/ocrd_validators/workspace_validator.py index 7da5402df7..61ad3e8f0c 100644 --- a/ocrd_validators/ocrd_validators/workspace_validator.py +++ b/ocrd_validators/ocrd_validators/workspace_validator.py @@ -6,10 +6,10 @@ from pathlib import Path from ocrd_utils import getLogger, MIMETYPE_PAGE, pushd_popd, is_local_filename +from ocrd_models import ValidationReport from ocrd_modelfactory import page_from_file from .constants import FILE_GROUP_CATEGORIES, FILE_GROUP_PREFIX -from .report import ValidationReport from .page_validator import PageValidator from .xsd_page_validator import XsdPageValidator from .xsd_mets_validator import XsdMetsValidator diff --git a/ocrd_validators/ocrd_validators/xsd_validator.py b/ocrd_validators/ocrd_validators/xsd_validator.py index 80388bc95c..81b9457564 100644 --- a/ocrd_validators/ocrd_validators/xsd_validator.py +++ b/ocrd_validators/ocrd_validators/xsd_validator.py @@ -4,7 +4,9 @@ from pathlib import Path from lxml import etree as ET -from .report import ValidationReport + +from ocrd_models import ValidationReport + from .constants import XSD_PATHS # diff --git a/tests/validator/test_report.py b/tests/validator/test_report.py index fc7e2cd211..3eccb08f61 100644 --- a/tests/validator/test_report.py +++ b/tests/validator/test_report.py @@ -1,5 +1,5 @@ from tests.base import TestCase, main # pylint: disable=import-error,no-name-in-module -from ocrd_validators import ValidationReport +from ocrd_models import ValidationReport class TestValidationReport(TestCase): From 80991ff1c0a8b19c002e3458f32aae05d76fd76a Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sat, 15 Aug 2020 18:45:52 +0200 Subject: [PATCH 2/3] refactor: decouple task_sequence and validate test --- tests/cli/test_validate.py | 8 ++-- tests/data/wf_testcase.py | 66 +++++++++++++++++++++++++++++++++ tests/test_task_sequence.py | 74 ++++++------------------------------- 3 files changed, 81 insertions(+), 67 deletions(-) create mode 100644 tests/data/wf_testcase.py diff --git a/tests/cli/test_validate.py b/tests/cli/test_validate.py index 155d2e6d9f..c60eb9c8d4 100644 --- a/tests/cli/test_validate.py +++ b/tests/cli/test_validate.py @@ -5,13 +5,13 @@ from click.testing import CliRunner # pylint: disable=import-error, no-name-in-module -from tests.base import TestCase, main, assets, copy_of_directory +from tests.base import main, assets +from tests.data.wf_testcase import TestCase from ocrd_utils import pushd_popd from ocrd.resolver import Resolver from ocrd.cli.validate import validate_cli -from tests.test_task_sequence import TestTaskSequence OCRD_TOOL = ''' { @@ -49,10 +49,10 @@ ''' # inherit from TestTaskSequence for the setUp/tearDown methods -class TestCli(TestTaskSequence): +class TestCli(TestCase): def __init__(self, *args, **kwargs): - super(TestTaskSequence, self).__init__(*args, **kwargs) + super(TestCli, self).__init__(*args, **kwargs) self.runner = CliRunner() def test_validate_ocrd_tool(self): diff --git a/tests/data/wf_testcase.py b/tests/data/wf_testcase.py new file mode 100644 index 0000000000..41e939395e --- /dev/null +++ b/tests/data/wf_testcase.py @@ -0,0 +1,66 @@ +import os +import json +from tempfile import mkdtemp +from os.path import join +from shutil import rmtree +from pathlib import Path + +from tests.base import TestCase as BaseTestCase + +SAMPLE_NAME = 'ocrd-sample-processor' +SAMPLE_OCRD_TOOL_JSON = '''{ + "executable": "ocrd-sample-processor", + "description": "Do stuff and things", + "categories": ["Image foobaring"], + "steps": ["preprocessing/optimization/foobarization"], + "input_file_grp": ["OCR-D-IMG"], + "output_file_grp": ["OCR-D-IMG-BIN", "SECOND_OUT"], + "parameters": { + "param1": { + "type": "boolean", + "default": false, + "description": "param1 description" + } + } +}''' + +SAMPLE_NAME_REQUIRED_PARAM = 'sample-processor-required-param' +SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM = json.loads(SAMPLE_OCRD_TOOL_JSON) +del SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM['parameters']['param1']['default'] +SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM['executable'] = 'ocrd-' + SAMPLE_NAME_REQUIRED_PARAM +SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM['parameters']['param1']['required'] = True +SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM['input_file_grp'] += ['SECOND_IN'] +SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM = json.dumps(SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM) + +PARAM_JSON = '{"foo": 42}' + +class TestCase(BaseTestCase): + + def tearDown(self): + rmtree(self.tempdir) + + def setUp(self): + self.tempdir = mkdtemp(prefix='ocrd-task-sequence-') + self.param_fname = join(self.tempdir, 'params.json') + with open(self.param_fname, 'w') as f: + f.write(PARAM_JSON) + + p = Path(self.tempdir, SAMPLE_NAME) + p.write_text("""\ +#!/usr/bin/env python +print('''%s''') + """ % SAMPLE_OCRD_TOOL_JSON) + p.chmod(0o777) + + p = Path(self.tempdir, 'ocrd-' + SAMPLE_NAME_REQUIRED_PARAM) + p.write_text("""\ +#!/usr/bin/env python +print('''%s''') + """ % SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM) + p.chmod(0o777) + + os.environ['PATH'] = os.pathsep.join([self.tempdir, os.environ['PATH']]) + # from distutils.spawn import find_executable as which # pylint: disable=import-error,no-name-in-module + # self.assertTrue(which('ocrd-sample-processor')) + + diff --git a/tests/test_task_sequence.py b/tests/test_task_sequence.py index bf7c9101a0..26ed5ee32e 100644 --- a/tests/test_task_sequence.py +++ b/tests/test_task_sequence.py @@ -1,72 +1,20 @@ -import os import json -from tempfile import mkdtemp, TemporaryDirectory -from shutil import rmtree - +from tempfile import TemporaryDirectory from pathlib import Path -from os.path import join -from tests.base import TestCase, main, assets +from tests.base import main, assets +from tests.data.wf_testcase import ( + TestCase, + + SAMPLE_NAME_REQUIRED_PARAM, + PARAM_JSON, +) from ocrd_utils import pushd_popd, MIMETYPE_PAGE from ocrd.resolver import Resolver -from ocrd.task_sequence import ProcessorTask, validate_tasks, run_tasks - -SAMPLE_NAME = 'ocrd-sample-processor' -SAMPLE_OCRD_TOOL_JSON = '''{ - "executable": "ocrd-sample-processor", - "description": "Do stuff and things", - "categories": ["Image foobaring"], - "steps": ["preprocessing/optimization/foobarization"], - "input_file_grp": ["OCR-D-IMG"], - "output_file_grp": ["OCR-D-IMG-BIN", "SECOND_OUT"], - "parameters": { - "param1": { - "type": "boolean", - "default": false, - "description": "param1 description" - } - } -}''' - -SAMPLE_NAME_REQUIRED_PARAM = 'sample-processor-required-param' -SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM = json.loads(SAMPLE_OCRD_TOOL_JSON) -del SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM['parameters']['param1']['default'] -SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM['executable'] = 'ocrd-' + SAMPLE_NAME_REQUIRED_PARAM -SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM['parameters']['param1']['required'] = True -SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM['input_file_grp'] += ['SECOND_IN'] -SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM = json.dumps(SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM) - -PARAM_JSON = '{"foo": 42}' - -class TestTaskSequence(TestCase): - - def tearDown(self): - rmtree(self.tempdir) - - def setUp(self): - self.tempdir = mkdtemp(prefix='ocrd-task-sequence-') - self.param_fname = join(self.tempdir, 'params.json') - with open(self.param_fname, 'w') as f: - f.write(PARAM_JSON) - - p = Path(self.tempdir, SAMPLE_NAME) - p.write_text("""\ -#!/usr/bin/env python -print('''%s''') - """ % SAMPLE_OCRD_TOOL_JSON) - p.chmod(0o777) - - p = Path(self.tempdir, 'ocrd-' + SAMPLE_NAME_REQUIRED_PARAM) - p.write_text("""\ -#!/usr/bin/env python -print('''%s''') - """ % SAMPLE_OCRD_TOOL_JSON_REQUIRED_PARAM) - p.chmod(0o777) - - os.environ['PATH'] = os.pathsep.join([self.tempdir, os.environ['PATH']]) - # from distutils.spawn import find_executable as which # pylint: disable=import-error,no-name-in-module - # self.assertTrue(which('ocrd-sample-processor')) +from ocrd.task_sequence import run_tasks, validate_tasks, ProcessorTask + +class TestOcrdWfStep(TestCase): def test_parse_no_in(self): task = ProcessorTask.parse('sample-processor') From a74d29df39236e64ba132cb73bb54eb2f4e53f72 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 10 Aug 2020 12:27:35 +0200 Subject: [PATCH 3/3] refactor helpers out of processor.base --- ocrd/ocrd/processor/__init__.py | 2 + ocrd/ocrd/processor/base.py | 171 ++---------------------------- ocrd/ocrd/processor/helpers.py | 178 ++++++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+), 162 deletions(-) create mode 100644 ocrd/ocrd/processor/helpers.py diff --git a/ocrd/ocrd/processor/__init__.py b/ocrd/ocrd/processor/__init__.py index 8273ab6879..f01e2b3c91 100644 --- a/ocrd/ocrd/processor/__init__.py +++ b/ocrd/ocrd/processor/__init__.py @@ -1,5 +1,7 @@ from .base import ( Processor, +) +from .helpers import ( run_cli, run_processor, generate_processor_help diff --git a/ocrd/ocrd/processor/base.py b/ocrd/ocrd/processor/base.py index 483a2c1364..9b0ecd720b 100644 --- a/ocrd/ocrd/processor/base.py +++ b/ocrd/ocrd/processor/base.py @@ -1,171 +1,18 @@ +""" +Processor base class and helper functions +""" + +__all__ = ['Processor', 'generate_processor_help', 'run_cli', 'run_processo'] + import os import json -from click import wrap_text -from time import time -import subprocess from ocrd_utils import getLogger, VERSION as OCRD_VERSION, MIMETYPE_PAGE from ocrd_validators import ParameterValidator -log = getLogger('ocrd.processor') - -def _get_workspace(workspace=None, resolver=None, mets_url=None, working_dir=None): - if workspace is None: - if resolver is None: - raise Exception("Need to pass a resolver to create a workspace") - if mets_url is None: - raise Exception("Need to pass mets_url to create a workspace") - workspace = resolver.workspace_from_url(mets_url, dst_dir=working_dir) - return workspace - -def run_processor( - processorClass, - ocrd_tool=None, - mets_url=None, - resolver=None, - workspace=None, - page_id=None, - log_level=None, # TODO actually use this! - input_file_grp=None, - output_file_grp=None, - parameter=None, - parameter_override=None, - working_dir=None, -): # pylint: disable=too-many-locals - """ - Create a workspace for mets_url and run processor through it - - Args: - parameter (string): URL to the parameter - """ - workspace = _get_workspace( - workspace, - resolver, - mets_url, - working_dir - ) - log.debug("Running processor %s", processorClass) - processor = processorClass( - workspace, - ocrd_tool=ocrd_tool, - page_id=page_id, - input_file_grp=input_file_grp, - output_file_grp=output_file_grp, - parameter=parameter - ) - ocrd_tool = processor.ocrd_tool - name = '%s v%s' % (ocrd_tool['executable'], processor.version) - otherrole = ocrd_tool['steps'][0] - logProfile = getLogger('ocrd.process.profile') - log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole) - t0 = time() - processor.process() - t1 = time() - t0 - logProfile.info("Executing processor '%s' took %fs [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s']" % ( - ocrd_tool['executable'], - t1, - input_file_grp if input_file_grp else '', - output_file_grp if output_file_grp else '', - json.dumps(parameter) if parameter else {} - )) - workspace.mets.add_agent( - name=name, - _type='OTHER', - othertype='SOFTWARE', - role='OTHER', - otherrole=otherrole - ) - workspace.save_mets() - return processor - -def run_cli( - executable, - mets_url=None, - resolver=None, - workspace=None, - page_id=None, - overwrite=None, - log_level=None, - input_file_grp=None, - output_file_grp=None, - parameter=None, - working_dir=None, -): - """ - Create a workspace for mets_url and run MP CLI through it - """ - workspace = _get_workspace(workspace, resolver, mets_url, working_dir) - args = [executable, '--working-dir', workspace.directory] - args += ['--mets', mets_url] - if log_level: - args += ['--log-level', log_level] - if page_id: - args += ['--page-id', page_id] - if input_file_grp: - args += ['--input-file-grp', input_file_grp] - if output_file_grp: - args += ['--output-file-grp', output_file_grp] - if parameter: - args += ['--parameter', parameter] - if overwrite: - args += ['--overwrite'] - log.debug("Running subprocess '%s'", ' '.join(args)) - return subprocess.call(args) - -def generate_processor_help(ocrd_tool): - parameter_help = '' - if 'parameters' not in ocrd_tool or not ocrd_tool['parameters']: - parameter_help = ' NONE\n' - else: - def wrap(s): - return wrap_text(s, initial_indent=' '*3, - subsequent_indent=' '*4, - width=72, preserve_paragraphs=True) - for param_name, param in ocrd_tool['parameters'].items(): - parameter_help += wrap('"%s" [%s%s]' % ( - param_name, - param['type'], - ' - REQUIRED' if 'required' in param and param['required'] else - ' - %s' % json.dumps(param['default']) if 'default' in param else '')) - parameter_help += '\n ' + wrap(param['description']) - if 'enum' in param: - parameter_help += '\n ' + wrap('Possible values: %s' % json.dumps(param['enum'])) - parameter_help += "\n" - return ''' -Usage: %s [OPTIONS] - - %s - -Options: - -I, --input-file-grp USE File group(s) used as input - -O, --output-file-grp USE File group(s) used as output - -g, --page-id ID Physical page ID(s) to process - --overwrite Remove existing output pages/images - (with --page-id, remove only those) - -p, --parameter JSON-PATH Parameters, either verbatim JSON string - or JSON file path - -P, --param-override KEY VAL Override a single JSON object key-value pair, - taking precedence over --parameter - -m, --mets URL-PATH URL or file path of METS to process - -w, --working-dir PATH Working directory of local workspace - -l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE] - Log level - -J, --dump-json Dump tool description as JSON and exit - -h, --help This help message - -V, --version Show version - -Parameters: -%s -Default Wiring: - %s -> %s - -''' % ( - ocrd_tool['executable'], - ocrd_tool['description'], - parameter_help, - ocrd_tool.get('input_file_grp', 'NONE'), - ocrd_tool.get('output_file_grp', 'NONE') -) +# XXX imports must remain for backwards-compatibilty +from .helpers import run_cli, run_processor, generate_processor_help # pylint: disable=unused-import +log = getLogger('ocrd.processor') class Processor(): """ diff --git a/ocrd/ocrd/processor/helpers.py b/ocrd/ocrd/processor/helpers.py new file mode 100644 index 0000000000..314b5a7313 --- /dev/null +++ b/ocrd/ocrd/processor/helpers.py @@ -0,0 +1,178 @@ +""" +Helper methods for running and documenting processors +""" +from time import time +import json +import subprocess + +from click import wrap_text +from ocrd_utils import getLogger + +__all__ = [ + 'generate_processor_help', + 'run_cli', + 'run_processor' +] + +log = getLogger('ocrd.processor') + +def _get_workspace(workspace=None, resolver=None, mets_url=None, working_dir=None): + if workspace is None: + if resolver is None: + raise Exception("Need to pass a resolver to create a workspace") + if mets_url is None: + raise Exception("Need to pass mets_url to create a workspace") + workspace = resolver.workspace_from_url(mets_url, dst_dir=working_dir) + return workspace + +def run_processor( + processorClass, + ocrd_tool=None, + mets_url=None, + resolver=None, + workspace=None, + page_id=None, + log_level=None, # TODO actually use this! + input_file_grp=None, + output_file_grp=None, + parameter=None, + parameter_override=None, + working_dir=None, +): # pylint: disable=too-many-locals + """ + Create a workspace for mets_url and run processor through it + + Args: + parameter (string): URL to the parameter + """ + workspace = _get_workspace( + workspace, + resolver, + mets_url, + working_dir + ) + log.debug("Running processor %s", processorClass) + processor = processorClass( + workspace, + ocrd_tool=ocrd_tool, + page_id=page_id, + input_file_grp=input_file_grp, + output_file_grp=output_file_grp, + parameter=parameter + ) + ocrd_tool = processor.ocrd_tool + name = '%s v%s' % (ocrd_tool['executable'], processor.version) + otherrole = ocrd_tool['steps'][0] + logProfile = getLogger('ocrd.process.profile') + log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole) + t0 = time() + processor.process() + t1 = time() - t0 + logProfile.info("Executing processor '%s' took %fs [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s']" % ( + ocrd_tool['executable'], + t1, + input_file_grp if input_file_grp else '', + output_file_grp if output_file_grp else '', + json.dumps(parameter) if parameter else {} + )) + workspace.mets.add_agent( + name=name, + _type='OTHER', + othertype='SOFTWARE', + role='OTHER', + otherrole=otherrole + ) + workspace.save_mets() + return processor + +def run_cli( + executable, + mets_url=None, + resolver=None, + workspace=None, + page_id=None, + overwrite=None, + log_level=None, + input_file_grp=None, + output_file_grp=None, + parameter=None, + working_dir=None, +): + """ + Create a workspace for mets_url and run MP CLI through it + """ + workspace = _get_workspace(workspace, resolver, mets_url, working_dir) + args = [executable, '--working-dir', workspace.directory] + args += ['--mets', mets_url] + if log_level: + args += ['--log-level', log_level] + if page_id: + args += ['--page-id', page_id] + if input_file_grp: + args += ['--input-file-grp', input_file_grp] + if output_file_grp: + args += ['--output-file-grp', output_file_grp] + if parameter: + args += ['--parameter', parameter] + if overwrite: + args += ['--overwrite'] + log.debug("Running subprocess '%s'", ' '.join(args)) + return subprocess.call(args) + +def generate_processor_help(ocrd_tool): + parameter_help = '' + if 'parameters' not in ocrd_tool or not ocrd_tool['parameters']: + parameter_help = ' NONE\n' + else: + def wrap(s): + return wrap_text(s, initial_indent=' '*3, + subsequent_indent=' '*4, + width=72, preserve_paragraphs=True) + for param_name, param in ocrd_tool['parameters'].items(): + parameter_help += wrap('"%s" [%s%s]' % ( + param_name, + param['type'], + ' - REQUIRED' if 'required' in param and param['required'] else + ' - %s' % json.dumps(param['default']) if 'default' in param else '')) + parameter_help += '\n ' + wrap(param['description']) + if 'enum' in param: + parameter_help += '\n ' + wrap('Possible values: %s' % json.dumps(param['enum'])) + parameter_help += "\n" + return ''' +Usage: %s [OPTIONS] + + %s + +Options: + -I, --input-file-grp USE File group(s) used as input + -O, --output-file-grp USE File group(s) used as output + -g, --page-id ID Physical page ID(s) to process + --overwrite Remove existing output pages/images + (with --page-id, remove only those) + -p, --parameter JSON-PATH Parameters, either verbatim JSON string + or JSON file path + -P, --param-override KEY VAL Override a single JSON object key-value pair, + taking precedence over --parameter + -m, --mets URL-PATH URL or file path of METS to process + -w, --working-dir PATH Working directory of local workspace + -l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE] + Log level + -J, --dump-json Dump tool description as JSON and exit + -h, --help This help message + -V, --version Show version + +Parameters: +%s +Default Wiring: + %s -> %s + +''' % ( + ocrd_tool['executable'], + ocrd_tool['description'], + parameter_help, + ocrd_tool.get('input_file_grp', 'NONE'), + ocrd_tool.get('output_file_grp', 'NONE') +) + + +