From c874dbcd8538122bc442483fb0cef1287299b1aa Mon Sep 17 00:00:00 2001
From: joschrew <91774427+joschrew@users.noreply.github.com>
Date: Fri, 5 Sep 2025 16:13:40 +0200
Subject: [PATCH 1/5] Remove the ProcessorServer from ocrd network

---
 src/ocrd/cli/network.py                       |    2 -
 src/ocrd/decorators/__init__.py               |   68 +-
 src/ocrd/decorators/ocrd_cli_options.py       |    8 +-
 src/ocrd/lib.bash                             |   19 +-
 src/ocrd/ocrd-all-tool.json                   | 1743 ++++++++++++++++-
 src/ocrd/processor/base.py                    |   20 +-
 src/ocrd/processor/helpers.py                 |    3 +-
 src/ocrd_network/__init__.py                  |    3 +-
 src/ocrd_network/cli/__init__.py              |    2 -
 src/ocrd_network/cli/client.py                |    7 +-
 src/ocrd_network/cli/processing_server.py     |    3 +-
 src/ocrd_network/cli/processor_server.py      |   31 -
 src/ocrd_network/constants.py                 |    8 +-
 src/ocrd_network/logging_utils.py             |    9 +-
 src/ocrd_network/models/__init__.py           |    2 -
 src/ocrd_network/models/job.py                |    7 +-
 src/ocrd_network/models/ocrd_tool.py          |   12 -
 src/ocrd_network/models/workspace.py          |    2 +-
 src/ocrd_network/processing_server.py         |  138 +-
 src/ocrd_network/processor_server.py          |  255 ---
 src/ocrd_network/rabbitmq_utils/helpers.py    |    6 -
 src/ocrd_network/runtime_data/__init__.py     |    3 +-
 src/ocrd_network/runtime_data/deployer.py     |   90 +-
 src/ocrd_network/runtime_data/hosts.py        |  177 +-
 .../runtime_data/network_agents.py            |   36 +-
 src/ocrd_network/server_utils.py              |   44 -
 src/ocrd_utils/config.py                      |    9 +-
 .../processing_server_config.schema.yml       |   34 +-
 tests/network/config.py                       |    6 +-
 tests/network/fixtures_processing_requests.py |    2 -
 .../test_integration_5_processing_server.py   |    3 +-
 tests/network/test_integration_6_client.py    |    3 +-
 tests/network/test_modules_logging_utils.py   |    4 -
 33 files changed, 1860 insertions(+), 899 deletions(-)
 delete mode 100644 src/ocrd_network/cli/processor_server.py
 delete mode 100644 src/ocrd_network/models/ocrd_tool.py
 delete mode 100644 src/ocrd_network/processor_server.py

diff --git a/src/ocrd/cli/network.py b/src/ocrd/cli/network.py
index 72ecefae49..116f51cac8 100644
--- a/src/ocrd/cli/network.py
+++ b/src/ocrd/cli/network.py
@@ -12,7 +12,6 @@
     client_cli,
     processing_server_cli,
     processing_worker_cli,
-    processor_server_cli,
 )
 
 
@@ -27,4 +26,3 @@ def network_cli():
 network_cli.add_command(client_cli)
 network_cli.add_command(processing_server_cli)
 network_cli.add_command(processing_worker_cli)
-network_cli.add_command(processor_server_cli)
diff --git a/src/ocrd/decorators/__init__.py b/src/ocrd/decorators/__init__.py
index 553b6fa57d..7e0e1815aa 100644
--- a/src/ocrd/decorators/__init__.py
+++ b/src/ocrd/decorators/__init__.py
@@ -41,7 +41,6 @@ def ocrd_cli_wrap_processor(
     list_resources=False,
     # ocrd_network params start #
     subcommand=None,
-    address=None,
     queue=None,
     log_filename=None,
     database=None,
@@ -88,9 +87,8 @@ def ocrd_cli_wrap_processor(
     if list_resources:
         processor.list_resources()
         sys.exit()
-    if subcommand or address or queue or database:
-        # Used for checking/starting network agents for the WebAPI architecture
-        check_and_run_network_agent(processorClass, subcommand, address, database, queue)
+    if subcommand == "worker" or queue or database:
+        check_and_run_processing_worker(processorClass, database, queue)
 
     if 'parameter' in kwargs:
         # Disambiguate parameter file/literal, and resolve file
@@ -160,54 +158,26 @@ def goexit():
         run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs)
 
 
-def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str):
+def check_and_run_processing_worker(ProcessorClass, database: str, queue: str):
+    """ Check/start Processing Worker for the WebAPI architecture
     """
-    """
-    from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
-    SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER]
-
-    if not subcommand:
-        raise ValueError("Subcommand options --address --queue and --database "
-                         f"are only valid for subcommands: {SUBCOMMANDS}")
-    if subcommand not in SUBCOMMANDS:
-        raise ValueError(f"SUBCOMMAND can only be one of {SUBCOMMANDS}")
+    from ocrd_network import ProcessingWorker
 
     if not database:
-        raise ValueError(f"Option '--database' is invalid for subcommand {subcommand}")
-
-    if subcommand == AgentType.PROCESSOR_SERVER:
-        if not address:
-            raise ValueError(f"Option '--address' required for subcommand {subcommand}")
-        if queue:
-            raise ValueError(f"Option '--queue' invalid for subcommand {subcommand}")
-    if subcommand == AgentType.PROCESSING_WORKER:
-        if address:
-            raise ValueError(f"Option '--address' invalid for subcommand {subcommand}")
-        if not queue:
-            raise ValueError(f"Option '--queue' required for subcommand {subcommand}")
+        raise ValueError("Option '--database' is required for the Processing Worker")
+    if not queue:
+        raise ValueError("Option '--queue' is required for the Processing Worker")
 
     processor = ProcessorClass(workspace=None)
-    if subcommand == AgentType.PROCESSING_WORKER:
-        processing_worker = ProcessingWorker(
-            rabbitmq_addr=queue,
-            mongodb_addr=database,
-            processor_name=processor.ocrd_tool['executable'],
-            ocrd_tool=processor.ocrd_tool,
-            processor_class=ProcessorClass,
-        )
-        # The RMQConsumer is initialized and a connection to the RabbitMQ is performed
-        processing_worker.connect_consumer()
-        # Start consuming from the queue with name `processor_name`
-        processing_worker.start_consuming()
-    elif subcommand == AgentType.PROCESSOR_SERVER:
-        # TODO: Better validate that inside the ProcessorServer itself
-        host, port = address.split(':')
-        processor_server = ProcessorServer(
-            mongodb_addr=database,
-            processor_name=processor.ocrd_tool['executable'],
-            processor_class=ProcessorClass,
-        )
-        processor_server.run_server(host=host, port=int(port))
-    else:
-        raise ValueError(f"Unknown network agent type, must be one of: {SUBCOMMANDS}")
+    processing_worker = ProcessingWorker(
+        rabbitmq_addr=queue,
+        mongodb_addr=database,
+        processor_name=processor.ocrd_tool['executable'],
+        ocrd_tool=processor.ocrd_tool,
+        processor_class=ProcessorClass,
+    )
+    # The RMQConsumer is initialized and a connection to the RabbitMQ is performed
+    processing_worker.connect_consumer()
+    # Start consuming from the queue with name `processor_name`
+    processing_worker.start_consuming()
     sys.exit(0)
diff --git a/src/ocrd/decorators/ocrd_cli_options.py b/src/ocrd/decorators/ocrd_cli_options.py
index e8c3d86854..cf676ad0b1 100644
--- a/src/ocrd/decorators/ocrd_cli_options.py
+++ b/src/ocrd/decorators/ocrd_cli_options.py
@@ -1,12 +1,10 @@
 import click
 from click import option, Path, argument
 from ocrd_utils import DEFAULT_METS_BASENAME
-from ocrd_network import AgentType
 from .parameter_option import parameter_option, parameter_override_option
 from .loglevel_option import loglevel_option
 from ocrd_network import (
     DatabaseParamType,
-    ServerAddressParamType,
     QueueServerParamType
 )
 
@@ -40,7 +38,6 @@ def cli(**kwargs):
         parameter_override_option,
         loglevel_option,
         option('--log-filename', default=None),
-        option('--address', type=ServerAddressParamType()),
         option('--queue', type=QueueServerParamType()),
         option('--database', type=DatabaseParamType()),
         option('-R', '--resolve-resource'),
@@ -50,13 +47,12 @@ def cli(**kwargs):
         option('-D', '--dump-module-dir', is_flag=True, default=False),
         option('-h', '--help', is_flag=True, default=False),
         option('-V', '--version', is_flag=True, default=False),
-        # Subcommand, only used for 'worker'/'server'. Cannot be handled in
+        # Subcommand, only used for 'worker'. Cannot be handled in
         # click because processors use the @command decorator and even if they
         # were using `group`, you cannot combine have a command with
         # subcommands. So we have to work around that by creating a
         # pseudo-subcommand handled in ocrd_cli_wrap_processor
-        argument('subcommand', nargs=1, required=False,
-                 type=click.Choice(list(map(str, AgentType)))),
+        argument('subcommand', nargs=1, required=False, type=click.Choice(["worker"])),
     ]
     for param in params:
         param(f)
diff --git a/src/ocrd/lib.bash b/src/ocrd/lib.bash
index 52bde30258..20c3228d30 100644
--- a/src/ocrd/lib.bash
+++ b/src/ocrd/lib.bash
@@ -183,30 +183,23 @@ ocrd__parse_argv () {
             -V|--version) ocrd ocrd-tool "$OCRD_TOOL_JSON" version; exit ;;
             --queue) ocrd__worker_queue="$2" ; shift ;;
             --database) ocrd__worker_database="$2" ; shift ;;
-            --address) ocrd__worker_address="$2" ; shift ;;
             *) ocrd__raise "Unknown option '$1'" ;;
         esac
         shift
     done
 
-    if [ -v ocrd__worker_queue -o -v ocrd__worker_database -o -v ocrd__subcommand -o -v ocrd__worker_address ]; then
+    if [ -v ocrd__worker_queue -o -v ocrd__worker_database -o -v ocrd__subcommand ]; then
         if ! [ -v ocrd__subcommand ] ; then
-            ocrd__raise "Provide subcommand 'worker' or 'server' for Processing Worker / Processor Server"
+            ocrd__raise "Provide subcommand 'worker' for Processing Worker"
         elif ! [ -v ocrd__worker_database ]; then
-            ocrd__raise "For the Processing Worker / Processor Server --database is required"
+            ocrd__raise "For the Processing Worker --database is required"
+        elif ! [ -v ocrd__worker_queue ]; then
+            ocrd__raise "For the Processing Worker --queue is required"
         fi
         if [ ${ocrd__subcommand} = "worker" ]; then
-            if ! [ -v ocrd__worker_queue ]; then
-                ocrd__raise "For the Processing Worker --queue is required"
-            fi
             ocrd network processing-worker $OCRD_TOOL_NAME --queue "${ocrd__worker_queue}" --database "${ocrd__worker_database}"
-        elif [ ${ocrd__subcommand} = "server" ]; then
-            if ! [ -v ocrd__worker_address ]; then
-                ocrd__raise "For the Processor Server --address is required"
-            fi
-            ocrd network processor-server $OCRD_TOOL_NAME --database "${ocrd__worker_database}" --address "${ocrd__worker_address}"
         else
-            ocrd__raise "subcommand must be either 'worker' or 'server' not '${ocrd__subcommand}'"
+            ocrd__raise "subcommand must be 'worker' not '${ocrd__subcommand}'"
         fi
         exit
     fi
diff --git a/src/ocrd/ocrd-all-tool.json b/src/ocrd/ocrd-all-tool.json
index 5f27d9a1ae..1a8b1cec41 100644
--- a/src/ocrd/ocrd-all-tool.json
+++ b/src/ocrd/ocrd-all-tool.json
@@ -1,45 +1,1704 @@
 {
- "ocrd-dummy": {
-  "executable": "ocrd-dummy",
-  "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group",
-  "steps": [
-   "preprocessing/optimization"
-  ],
-  "categories": [
-   "Image preprocessing"
-  ],
-  "input_file_grp_cardinality": 1,
-  "output_file_grp_cardinality": 1,
-  "parameters": {
-   "copy_files": {
-    "type": "boolean",
-    "default": false,
-    "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)"
-   }
+  "ocrd-dummy": {
+    "executable": "ocrd-dummy",
+    "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group",
+    "steps": [
+      "preprocessing/optimization"
+    ],
+    "categories": [
+      "Image preprocessing"
+    ],
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "parameters": {
+      "copy_files": {
+        "type": "boolean",
+        "default": false,
+        "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)"
+      }
+    }
+  },
+  "ocrd-filter": {
+    "executable": "ocrd-filter",
+    "description": "Bare-bones processor can be dynamically configured to remove segments based on XPath queries",
+    "steps": [
+      "recognition/post-correction"
+    ],
+    "categories": [
+      "Quality assurance"
+    ],
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "parameters": {
+      "select": {
+        "type": "string",
+        "default": "//*[ends-with(local-name(),'Region')]",
+        "description": "Which segments to select for removal. An XPath 2.0 query expression (path and optional predicates), with 'pc' as namespace prefix for PAGE-XML and our extension functions (see help text). Only selection of segment hierarchy elements is allowed (so e.g. `*` would be equivalent to `pc:NoiseRegion|pc:LineDrawingRegion|pc:AdvertRegion|pc:ImageRegion|pc:ChartRegion|pc:MusicRegion|pc:GraphicRegion|pc:UnknownRegion|pc:CustomRegion|pc:SeparatorRegion|pc:MathsRegion|pc:TextRegion|pc:MapRegion|pc:ChemRegion|pc:TableRegion|pc:TextLine|pc:Word|pc:Glyph`, but `pc:MetadataItem` or `pc:Border` or `pc:Coords` would not match).\nFor example, to remove words or glyphs with low text confidence, select '(pc:Word|pc:Glyph)[pc:TextEquiv/@conf < 0.7]'. Or low layout confidence, '*[pc:Coords/@conf < 0.7]'.\nTo remove high pixel-to-character rate, select '*[pc:pixelarea(.) div string-length(pc:textequiv(.)) > 10000]'."
+      },
+      "plot": {
+        "type": "boolean",
+        "default": false,
+        "description": "Whether to extract an image for each filtered segment and write to the output fileGrp."
+      }
+    }
+  },
+  "ocrd-tesserocr-deskew": {
+    "executable": "ocrd-tesserocr-deskew",
+    "categories": [
+      "Image preprocessing"
+    ],
+    "description": "Detect script, orientation and skew angle for pages or regions",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "preprocessing/optimization/deskewing"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "operation_level": {
+        "type": "string",
+        "enum": [
+          "page",
+          "region",
+          "line"
+        ],
+        "default": "region",
+        "description": "PAGE XML hierarchy level to operate on"
+      },
+      "min_orientation_confidence": {
+        "type": "number",
+        "format": "float",
+        "default": 1.5,
+        "description": "Minimum confidence score to apply orientation as detected by OSD"
+      }
+    }
+  },
+  "ocrd-tesserocr-fontshape": {
+    "executable": "ocrd-tesserocr-fontshape",
+    "categories": [
+      "Text recognition and optimization"
+    ],
+    "description": "Recognize font shapes (family/monospace/bold/italic) and size in segmented words with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons), annotating TextStyle",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "recognition/font-identification"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "padding": {
+        "type": "number",
+        "format": "integer",
+        "default": 0,
+        "description": "Number of background-filled pixels to add around the word image (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) on each side before recognition."
+      },
+      "model": {
+        "type": "string",
+        "format": "uri",
+        "content-type": "application/octet-stream",
+        "default": "osd",
+        "description": "tessdata model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or osd); must be an old (pre-LSTM) model"
+      }
+    }
+  },
+  "ocrd-tesserocr-recognize": {
+    "executable": "ocrd-tesserocr-recognize",
+    "categories": [
+      "Text recognition and optimization"
+    ],
+    "description": "Segment and/or recognize text with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons) on any level of the PAGE hierarchy.",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "layout/segmentation/region",
+      "layout/segmentation/line",
+      "recognition/text-recognition"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "padding": {
+        "type": "number",
+        "format": "integer",
+        "default": 0,
+        "description": "Extend detected region/cell/line/word rectangles by this many (true) pixels, or extend existing region/line/word images (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) by this many (background/white) pixels on each side before recognition."
+      },
+      "segmentation_level": {
+        "type": "string",
+        "enum": [
+          "region",
+          "cell",
+          "line",
+          "word",
+          "glyph",
+          "none"
+        ],
+        "default": "word",
+        "description": "Highest PAGE XML hierarchy level to remove existing annotation from and detect segments for (before iterating downwards); if ``none``, does not attempt any new segmentation; if ``cell``, starts at table regions, detecting text regions (cells). Ineffective when lower than ``textequiv_level``."
+      },
+      "textequiv_level": {
+        "type": "string",
+        "enum": [
+          "region",
+          "cell",
+          "line",
+          "word",
+          "glyph",
+          "none"
+        ],
+        "default": "word",
+        "description": "Lowest PAGE XML hierarchy level to re-use or detect segments for and add the TextEquiv results to (before projecting upwards); if ``none``, adds segmentation down to the glyph level, but does not attempt recognition at all; if ``cell``, stops short before text lines, adding text of text regions inside tables (cells) or on page level only."
+      },
+      "overwrite_segments": {
+        "type": "boolean",
+        "default": false,
+        "description": "If ``segmentation_level`` is not none, but an element already contains segments, remove them and segment again. Otherwise use the existing segments of that element."
+      },
+      "overwrite_text": {
+        "type": "boolean",
+        "default": true,
+        "description": "If ``textequiv_level`` is not none, but a segment already contains TextEquivs, remove them and replace with recognised text. Otherwise add new text as alternative. (Only the first entry is projected upwards.)"
+      },
+      "shrink_polygons": {
+        "type": "boolean",
+        "default": false,
+        "description": "When detecting any segments, annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols."
+      },
+      "block_polygons": {
+        "type": "boolean",
+        "default": false,
+        "description": "When detecting regions, annotate polygon coordinates instead of bounding box rectangles by querying Tesseract accordingly."
+      },
+      "find_tables": {
+        "type": "boolean",
+        "default": true,
+        "description": "When detecting regions, recognise tables as table regions (Tesseract's ``textord_tabfind_find_tables=1``)."
+      },
+      "find_staves": {
+        "type": "boolean",
+        "default": false,
+        "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves."
+      },
+      "sparse_text": {
+        "type": "boolean",
+        "default": false,
+        "description": "When detecting regions, use 'sparse text' page segmentation mode (finding as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space."
+      },
+      "raw_lines": {
+        "type": "boolean",
+        "default": false,
+        "description": "When detecting lines, do not attempt additional segmentation (baseline+xheight+ascenders/descenders prediction) on line images. Can increase accuracy for certain workflows. Disable when line segments/images may contain components of more than 1 line, or larger gaps/white-spaces."
+      },
+      "char_whitelist": {
+        "type": "string",
+        "default": "",
+        "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow exclusively; overruled by blacklist if set."
+      },
+      "char_blacklist": {
+        "type": "string",
+        "default": "",
+        "description": "When recognizing text, enumeration of character hypotheses (from the model) to suppress; overruled by unblacklist if set."
+      },
+      "char_unblacklist": {
+        "type": "string",
+        "default": "",
+        "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow inclusively."
+      },
+      "tesseract_parameters": {
+        "type": "object",
+        "default": {},
+        "description": "Dictionary of additional Tesseract runtime variables (cf. tesseract --print-parameters), string values."
+      },
+      "xpath_parameters": {
+        "type": "object",
+        "default": {},
+        "description": "Set additional Tesseract runtime variables according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) (Example: {'ancestor::TextRegion/@type=\"page-number\"': {'char_whitelist': '0123456789-'}, 'contains(@custom,\"ISBN\")': {'char_whitelist': '0123456789-'}})"
+      },
+      "xpath_model": {
+        "type": "object",
+        "default": {},
+        "description": "Prefer models mapped according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) If no queries / mappings match (or under the default empty parameter), then fall back to `model`. If there are multiple matches, combine their results. (Example: {'starts-with(@script,\"Latn\")': 'Latin', 'starts-with(@script,\"Grek\")': 'Greek', '@language=\"Latin\"': 'lat', '@language=\"Greek\"': 'grc+ell', 'ancestor::TextRegion/@type=\"page-number\"': 'eng'})"
+      },
+      "auto_model": {
+        "type": "boolean",
+        "default": false,
+        "description": "Prefer models performing best (by confidence) per segment (if multiple given in `model`). Repeats the OCR of the best model once (i.e. slower). (Use as a fallback to xpath_model if you do not trust script/language detection.)"
+      },
+      "model": {
+        "type": "string",
+        "format": "uri",
+        "content-type": "application/octet-stream",
+        "description": "The tessdata text recognition model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or Fraktur)."
+      },
+      "oem": {
+        "type": "string",
+        "enum": [
+          "TESSERACT_ONLY",
+          "LSTM_ONLY",
+          "TESSERACT_LSTM_COMBINED",
+          "DEFAULT"
+        ],
+        "default": "DEFAULT",
+        "description": "Tesseract OCR engine mode to use:\n* Run Tesseract only - fastest,\n* Run just the LSTM line recognizer. (>=v4.00),\n*Run the LSTM recognizer, but allow fallback to Tesseract when things get difficult. (>=v4.00),\n*Run both and combine results - best accuracy."
+      }
+    },
+    "resource_locations": [
+      "module"
+    ],
+    "resources": [
+      {
+        "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/Fraktur_5000000/tessdata_best/Fraktur_50000000.334_450937.traineddata",
+        "name": "Fraktur_GT4HistOCR.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model trained on GT4HistOCR",
+        "size": 1058487
+      },
+      {
+        "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/ONB/tessdata_best/ONB_1.195_300718_989100.traineddata",
+        "name": "ONB.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model based on Austrian National Library newspaper data",
+        "size": 4358948
+      },
+      {
+        "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/frak2021/tessdata_best/frak2021-0.905.traineddata",
+        "name": "frak2021.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model based on a mix of mostly German and Latin ground truth data",
+        "size": 3421140
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/equ.traineddata",
+        "name": "equ.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract legacy model for mathematical equations",
+        "size": 2251950
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/osd.traineddata",
+        "name": "osd.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract legacy model for orientation and script detection",
+        "size": 10562727
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata",
+        "name": "eng.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) English",
+        "size": 15400601
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/deu.traineddata",
+        "name": "deu.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) German",
+        "size": 8628461
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/deu_latf.traineddata",
+        "name": "deu_latf.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for historical (Fraktur typesetting and letterpress printing) German",
+        "size": 6423052
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/deu_latf.traineddata",
+        "name": "frk.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for historical German (deprecated, replaced by deu_latf)",
+        "size": 6423052
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Fraktur.traineddata",
+        "name": "Fraktur.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for historical Latin script with Fraktur typesetting (~125 characters with precomposed diacritics)",
+        "size": 17613343
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Latin.traineddata",
+        "name": "Latin.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Latin script (contemporary and historical, ~250 characters with precomposed diacritics)",
+        "size": 101402885
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Arabic.traineddata",
+        "name": "Arabic.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Arabic script",
+        "size": 17095279
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Armenian.traineddata",
+        "name": "Armenian.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Armenian script",
+        "size": 18908681
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Bengali.traineddata",
+        "name": "Bengali.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Bengali script (a Brahmic script)",
+        "size": 16711376
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Canadian_Aboriginal.traineddata",
+        "name": "Canadian_Aboriginal.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Canadian Aboriginal script",
+        "size": 15184388
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Cherokee.traineddata",
+        "name": "Cherokee.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Cherokee script",
+        "size": 7126553
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Cyrillic.traineddata",
+        "name": "Cyrillic.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Cyrillic script",
+        "size": 36730735
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Devanagari.traineddata",
+        "name": "Devanagari.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Devanagari script (a Brahmic script)",
+        "size": 28561664
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Ethiopic.traineddata",
+        "name": "Ethiopic.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Ethiopic script (Ge\u02bdez script)",
+        "size": 11211460
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Georgian.traineddata",
+        "name": "Georgian.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Georgian script",
+        "size": 13806109
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Greek.traineddata",
+        "name": "Greek.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Greek script (~210 characters + basic Latin)",
+        "size": 10634749
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Gujarati.traineddata",
+        "name": "Gujarati.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Gujarati script (a Brahmic script)",
+        "size": 7074537
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Gurmukhi.traineddata",
+        "name": "Gurmukhi.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Gurmukhi script (a Brahmic script)",
+        "size": 11642032
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Hangul.traineddata",
+        "name": "Hangul.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Hangul script (horizontal writing)",
+        "size": 12225308
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Hangul_vert.traineddata",
+        "name": "Hangul_vert.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Hangul script (vertical writing)",
+        "size": 16522140
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/HanS.traineddata",
+        "name": "HanS.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Han script (simplified, horizontal writing)",
+        "size": 16633038
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/HanS_vert.traineddata",
+        "name": "HanS_vert.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Han script (simplified, vertical writing)",
+        "size": 12320913
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/HanT.traineddata",
+        "name": "HanT.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Han script (traditional, horizontal writing)",
+        "size": 12344619
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/HanT_vert.traineddata",
+        "name": "HanT_vert.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Han script (traditional, vertical writing)",
+        "size": 12344866
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Hebrew.traineddata",
+        "name": "Hebrew.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Hebrew script",
+        "size": 12218204
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Japanese.traineddata",
+        "name": "Japanese.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Japanese script (Hiragana + Katakana + 2.4k Kanji + basic Latin, horizontal writing)",
+        "size": 17789735
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Japanese_vert.traineddata",
+        "name": "Japanese_vert.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Japanese script (Hiragana + Katakana + 2.4k Kanji + basic Latin, vertical writing)",
+        "size": 17789844
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Kannada.traineddata",
+        "name": "Kannada.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Kannada script (a Brahmic script)",
+        "size": 14163058
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Khmer.traineddata",
+        "name": "Khmer.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Khmer script",
+        "size": 12025463
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Lao.traineddata",
+        "name": "Lao.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Lao script",
+        "size": 17479398
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Malayalam.traineddata",
+        "name": "Malayalam.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Malayalam script (a Brahmic script)",
+        "size": 11073689
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Myanmar.traineddata",
+        "name": "Myanmar.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Myanmar script (Burmese alphabet)",
+        "size": 14664489
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Oriya.traineddata",
+        "name": "Oriya.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Oriya script (or Odia, a Brahmic script)",
+        "size": 16265327
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Sinhala.traineddata",
+        "name": "Sinhala.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Sinhala script (a Brahmic script)",
+        "size": 6842699
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Syriac.traineddata",
+        "name": "Syriac.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Syriac script (Syriac alphabet)",
+        "size": 15352617
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Tamil.traineddata",
+        "name": "Tamil.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Tamil script (a Brahmic script)",
+        "size": 18607472
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Telugu.traineddata",
+        "name": "Telugu.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Telugu script (a Brahmic script)",
+        "size": 13981001
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Thaana.traineddata",
+        "name": "Thaana.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Thaana script",
+        "size": 12783652
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Thai.traineddata",
+        "name": "Thai.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Thai script (a Brahmic script)",
+        "size": 11544984
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Tibetan.traineddata",
+        "name": "Tibetan.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Tibetan script (a Brahmic script)",
+        "size": 12523531
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Vietnamese.traineddata",
+        "name": "Vietnamese.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Vietnamese script (Vietnamese alphabet)",
+        "size": 12435419
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/afr.traineddata",
+        "name": "afr.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Afrikaans",
+        "size": 12800552
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/amh.traineddata",
+        "name": "amh.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Amharic",
+        "size": 8389639
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ara.traineddata",
+        "name": "ara.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Arabic",
+        "size": 12603724
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/asm.traineddata",
+        "name": "asm.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Assamese",
+        "size": 11315350
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/aze_cyrl.traineddata",
+        "name": "aze_cyrl.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Azerbaijani (in Cyrillic script)",
+        "size": 4700277
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/aze.traineddata",
+        "name": "aze.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Azerbaijani (in Latin script)",
+        "size": 6281404
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bel.traineddata",
+        "name": "bel.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Belarusian",
+        "size": 10870278
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ben.traineddata",
+        "name": "ben.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Bengali",
+        "size": 11045427
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bod.traineddata",
+        "name": "bod.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Tibetan",
+        "size": 8623846
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bos.traineddata",
+        "name": "bos.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Bosnian",
+        "size": 5264248
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bre.traineddata",
+        "name": "bre.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Breton",
+        "size": 15640760
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bul.traineddata",
+        "name": "bul.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Bulgarian",
+        "size": 8844613
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/cat.traineddata",
+        "name": "cat.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Catalan",
+        "size": 3802329
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ceb.traineddata",
+        "name": "ceb.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Cebuano",
+        "size": 3452674
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ces.traineddata",
+        "name": "ces.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Czech",
+        "size": 10918912
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chi_sim.traineddata",
+        "name": "chi_sim.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Mandarin Chinese (simplified, horizontal writing)",
+        "size": 13077423
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chi_sim_vert.traineddata",
+        "name": "chi_sim_vert.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Mandarin Chinese (simplified, vertical writing)",
+        "size": 13077507
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chi_tra.traineddata",
+        "name": "chi_tra.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Mandarin Chinese (traditional, horizontal writing)",
+        "size": 12985735
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chi_tra_vert.traineddata",
+        "name": "chi_tra_vert.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Mandarin Chinese (traditional, vertical writing)",
+        "size": 12985521
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chr.traineddata",
+        "name": "chr.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Cherokee",
+        "size": 2258703
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/cos.traineddata",
+        "name": "cos.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Corsican",
+        "size": 8830216
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/cym.traineddata",
+        "name": "cym.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Welsh",
+        "size": 8750784
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/dan.traineddata",
+        "name": "dan.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Danish",
+        "size": 9758142
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/div.traineddata",
+        "name": "div.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Dhivehi",
+        "size": 4574116
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/dzo.traineddata",
+        "name": "dzo.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Dzongkha",
+        "size": 3243805
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ell.traineddata",
+        "name": "ell.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Modern Greek (1453-)",
+        "size": 8945021
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/enm.traineddata",
+        "name": "enm.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Middle English (1100-1500)",
+        "size": 13281564
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/epo.traineddata",
+        "name": "epo.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Esperanto",
+        "size": 7402169
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/est.traineddata",
+        "name": "est.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Estonian",
+        "size": 15833749
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/eus.traineddata",
+        "name": "eus.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Basque",
+        "size": 7933869
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fao.traineddata",
+        "name": "fao.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Faroese",
+        "size": 10030003
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fas.traineddata",
+        "name": "fas.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Persian",
+        "size": 3325955
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fil.traineddata",
+        "name": "fil.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Filipino",
+        "size": 8978743
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fin.traineddata",
+        "name": "fin.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Finnish",
+        "size": 14369979
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fra.traineddata",
+        "name": "fra.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for French",
+        "size": 3972885
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/frm.traineddata",
+        "name": "frm.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Middle French (ca. 1400-1600)",
+        "size": 4043005
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fry.traineddata",
+        "name": "fry.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Western Frisian",
+        "size": 8442509
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/gla.traineddata",
+        "name": "gla.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Scottish Gaelic",
+        "size": 9599424
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/gle.traineddata",
+        "name": "gle.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Irish",
+        "size": 3942458
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/glg.traineddata",
+        "name": "glg.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Galician",
+        "size": 12709487
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/grc.traineddata",
+        "name": "grc.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Ancient Greek (to 1453)",
+        "size": 5168122
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/guj.traineddata",
+        "name": "guj.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Gujarati",
+        "size": 8515761
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hat.traineddata",
+        "name": "hat.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Haitian",
+        "size": 12128251
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/heb.traineddata",
+        "name": "heb.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Hebrew",
+        "size": 3704077
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hin.traineddata",
+        "name": "hin.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Hindi",
+        "size": 11895564
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hrv.traineddata",
+        "name": "hrv.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Croatian",
+        "size": 11195424
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hun.traineddata",
+        "name": "hun.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Hungarian",
+        "size": 12350405
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hye.traineddata",
+        "name": "hye.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Armenian",
+        "size": 6372242
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/iku.traineddata",
+        "name": "iku.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Inuktitut",
+        "size": 6139484
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ind.traineddata",
+        "name": "ind.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Indonesian",
+        "size": 8253606
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/isl.traineddata",
+        "name": "isl.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Icelandic",
+        "size": 9486436
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ita_old.traineddata",
+        "name": "ita_old.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for historical Italian",
+        "size": 9852171
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ita.traineddata",
+        "name": "ita.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for modern Italian",
+        "size": 8863635
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/jav.traineddata",
+        "name": "jav.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Javanese",
+        "size": 8650382
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/jpn.traineddata",
+        "name": "jpn.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Japanese (horizontal writing)",
+        "size": 14330109
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/jpn_vert.traineddata",
+        "name": "jpn_vert.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Japanese (vertical writing)",
+        "size": 14330809
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kan.traineddata",
+        "name": "kan.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Kannada",
+        "size": 10233763
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kat_old.traineddata",
+        "name": "kat_old.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for historical Georgian",
+        "size": 3174400
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kat.traineddata",
+        "name": "kat.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for modern Georgian",
+        "size": 4487336
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kaz.traineddata",
+        "name": "kaz.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Kazakh",
+        "size": 7528853
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/khm.traineddata",
+        "name": "khm.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Central Khmer",
+        "size": 8104332
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kir.traineddata",
+        "name": "kir.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Kirghiz",
+        "size": 11948344
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kmr.traineddata",
+        "name": "kmr.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Northern Kurdish",
+        "size": 10196464
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kor.traineddata",
+        "name": "kor.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Korean (horizontal writing)",
+        "size": 12528128
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kor_vert.traineddata",
+        "name": "kor_vert.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Korean (vertical writing)",
+        "size": 3964469
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/lao.traineddata",
+        "name": "lao.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Lao",
+        "size": 13532551
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/lat.traineddata",
+        "name": "lat.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Latin",
+        "size": 9705145
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/lav.traineddata",
+        "name": "lav.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Latvian",
+        "size": 5623473
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/lit.traineddata",
+        "name": "lit.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Lithuanian",
+        "size": 10252680
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ltz.traineddata",
+        "name": "ltz.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Luxembourgish",
+        "size": 12721945
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mal.traineddata",
+        "name": "mal.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Malayalam",
+        "size": 12524967
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mar.traineddata",
+        "name": "mar.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Marathi",
+        "size": 13437670
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mkd.traineddata",
+        "name": "mkd.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Macedonian",
+        "size": 3453054
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mlt.traineddata",
+        "name": "mlt.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Maltese",
+        "size": 5060029
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mon.traineddata",
+        "name": "mon.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Mongolian",
+        "size": 8646663
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mri.traineddata",
+        "name": "mri.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Maori",
+        "size": 3610177
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/msa.traineddata",
+        "name": "msa.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Malay (macrolanguage)",
+        "size": 8230552
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mya.traineddata",
+        "name": "mya.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Burmese",
+        "size": 14971060
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/nep.traineddata",
+        "name": "nep.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Nepali (macrolanguage)",
+        "size": 12387399
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/nld.traineddata",
+        "name": "nld.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Dutch",
+        "size": 8903736
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/nor.traineddata",
+        "name": "nor.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Norwegian",
+        "size": 14312333
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/oci.traineddata",
+        "name": "oci.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Occitan (post 1500)",
+        "size": 12917692
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ori.traineddata",
+        "name": "ori.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Oriya (macrolanguage)",
+        "size": 8110602
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/pan.traineddata",
+        "name": "pan.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Panjabi",
+        "size": 11893154
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/pol.traineddata",
+        "name": "pol.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Polish",
+        "size": 11978867
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/por.traineddata",
+        "name": "por.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Portuguese",
+        "size": 8159939
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/pus.traineddata",
+        "name": "pus.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Pushto",
+        "size": 11987930
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/que.traineddata",
+        "name": "que.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Quechua",
+        "size": 10774587
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ron.traineddata",
+        "name": "ron.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Romanian",
+        "size": 9595755
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/rus.traineddata",
+        "name": "rus.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Russian",
+        "size": 15301764
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/san.traineddata",
+        "name": "san.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Sanskrit",
+        "size": 15136202
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/sin.traineddata",
+        "name": "sin.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Sinhala",
+        "size": 8282713
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/slk.traineddata",
+        "name": "slk.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Slovak",
+        "size": 11542252
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/slv.traineddata",
+        "name": "slv.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Slovenian",
+        "size": 5879151
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/snd.traineddata",
+        "name": "snd.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Sindhi",
+        "size": 11981538
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/spa_old.traineddata",
+        "name": "spa_old.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for historical Spanish",
+        "size": 9476925
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/spa.traineddata",
+        "name": "spa.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for modern Spanish",
+        "size": 13570187
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/sqi.traineddata",
+        "name": "sqi.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Albanian",
+        "size": 4631498
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/srp_latn.traineddata",
+        "name": "srp_latn.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Serbian (in Latin script)",
+        "size": 9831713
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/srp.traineddata",
+        "name": "srp.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Serbian (in Cyrillic script)",
+        "size": 9345851
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/sun.traineddata",
+        "name": "sun.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Sundanese",
+        "size": 4132820
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/swa.traineddata",
+        "name": "swa.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Swahili (macrolanguage)",
+        "size": 4914855
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/swe.traineddata",
+        "name": "swe.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Swedish",
+        "size": 14325549
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/syr.traineddata",
+        "name": "syr.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Syriac",
+        "size": 12498294
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tam.traineddata",
+        "name": "tam.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Tamil",
+        "size": 6023201
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tat.traineddata",
+        "name": "tat.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Tatar",
+        "size": 7585204
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tel.traineddata",
+        "name": "tel.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Telugu",
+        "size": 9098795
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tgk.traineddata",
+        "name": "tgk.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Tajik",
+        "size": 4602842
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tha.traineddata",
+        "name": "tha.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Thai",
+        "size": 7614571
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tir.traineddata",
+        "name": "tir.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Tigrinya",
+        "size": 2410256
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ton.traineddata",
+        "name": "ton.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Tonga (Tonga Islands)",
+        "size": 3729371
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tur.traineddata",
+        "name": "tur.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Turkish",
+        "size": 7456265
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/uig.traineddata",
+        "name": "uig.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Uighur",
+        "size": 13074609
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ukr.traineddata",
+        "name": "ukr.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Ukrainian",
+        "size": 10859081
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/urd.traineddata",
+        "name": "urd.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Urdu",
+        "size": 7994323
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/uzb_cyrl.traineddata",
+        "name": "uzb_cyrl.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Uzbek (in Cyrillic script)",
+        "size": 4325478
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/uzb.traineddata",
+        "name": "uzb.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Uzbek (in Latin script)",
+        "size": 12953454
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/vie.traineddata",
+        "name": "vie.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Vietnamese",
+        "size": 12435550
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/yid.traineddata",
+        "name": "yid.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Yiddish",
+        "size": 3278995
+      },
+      {
+        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/yor.traineddata",
+        "name": "yor.traineddata",
+        "parameter_usage": "without-extension",
+        "description": "Tesseract LSTM model for Yoruba",
+        "size": 3736121
+      }
+    ]
+  },
+  "ocrd-tesserocr-segment": {
+    "executable": "ocrd-tesserocr-segment",
+    "categories": [
+      "Layout analysis"
+    ],
+    "description": "Segment page into regions and lines with Tesseract",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "layout/segmentation/region",
+      "layout/segmentation/line"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "padding": {
+        "type": "number",
+        "format": "integer",
+        "description": "extend detected region rectangles by this many (true) pixels",
+        "default": 4
+      },
+      "shrink_polygons": {
+        "type": "boolean",
+        "default": false,
+        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
+      },
+      "block_polygons": {
+        "type": "boolean",
+        "default": false,
+        "description": "annotate polygon coordinates instead of bounding box rectangles"
+      },
+      "find_tables": {
+        "type": "boolean",
+        "default": true,
+        "description": "recognise tables as table regions (textord_tabfind_find_tables)"
+      },
+      "find_staves": {
+        "type": "boolean",
+        "default": false,
+        "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves."
+      },
+      "sparse_text": {
+        "type": "boolean",
+        "default": false,
+        "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space"
+      }
+    }
+  },
+  "ocrd-tesserocr-segment-region": {
+    "executable": "ocrd-tesserocr-segment-region",
+    "categories": [
+      "Layout analysis"
+    ],
+    "description": "Segment page into regions with Tesseract",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "layout/segmentation/region"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "overwrite_regions": {
+        "type": "boolean",
+        "default": true,
+        "description": "Remove existing layout and text annotation below the Page level (otherwise skip page; no incremental annotation yet)."
+      },
+      "padding": {
+        "type": "number",
+        "format": "integer",
+        "description": "extend detected region rectangles by this many (true) pixels",
+        "default": 0
+      },
+      "shrink_polygons": {
+        "type": "boolean",
+        "default": false,
+        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
+      },
+      "crop_polygons": {
+        "type": "boolean",
+        "default": false,
+        "description": "annotate polygon coordinates instead of bounding box rectangles"
+      },
+      "find_tables": {
+        "type": "boolean",
+        "default": true,
+        "description": "recognise tables as table regions (textord_tabfind_find_tables)"
+      },
+      "find_staves": {
+        "type": "boolean",
+        "default": false,
+        "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves."
+      },
+      "sparse_text": {
+        "type": "boolean",
+        "default": false,
+        "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space"
+      }
+    }
+  },
+  "ocrd-tesserocr-segment-table": {
+    "executable": "ocrd-tesserocr-segment-table",
+    "categories": [
+      "Layout analysis"
+    ],
+    "description": "Segment table regions into cell text regions with Tesseract",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "layout/segmentation/region"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "overwrite_cells": {
+        "type": "boolean",
+        "default": true,
+        "description": "Remove existing layout and text annotation below the TableRegion level (otherwise skip table; no incremental annotation yet)."
+      },
+      "padding": {
+        "type": "number",
+        "format": "integer",
+        "description": "extend detected cell rectangles by this many (true) pixels",
+        "default": 0
+      },
+      "shrink_polygons": {
+        "type": "boolean",
+        "default": false,
+        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
+      }
+    }
+  },
+  "ocrd-tesserocr-segment-line": {
+    "executable": "ocrd-tesserocr-segment-line",
+    "categories": [
+      "Layout analysis"
+    ],
+    "description": "Segment regions into lines with Tesseract",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "layout/segmentation/line"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "overwrite_lines": {
+        "type": "boolean",
+        "default": true,
+        "description": "Remove existing layout and text annotation below the TextRegion level (otherwise skip region; no incremental annotation yet)."
+      },
+      "padding": {
+        "type": "number",
+        "format": "integer",
+        "description": "extend detected line rectangles by this many (true) pixels",
+        "default": 0
+      },
+      "shrink_polygons": {
+        "type": "boolean",
+        "default": false,
+        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
+      }
+    }
+  },
+  "ocrd-tesserocr-segment-word": {
+    "executable": "ocrd-tesserocr-segment-word",
+    "categories": [
+      "Layout analysis"
+    ],
+    "description": "Segment lines into words with Tesseract",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "layout/segmentation/word"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "overwrite_words": {
+        "type": "boolean",
+        "default": true,
+        "description": "Remove existing layout and text annotation below the TextLine level (otherwise skip line; no incremental annotation yet)."
+      },
+      "padding": {
+        "type": "number",
+        "format": "integer",
+        "description": "extend detected cell rectangles by this many (true) pixels",
+        "default": 0
+      },
+      "shrink_polygons": {
+        "type": "boolean",
+        "default": false,
+        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
+      }
+    }
+  },
+  "ocrd-tesserocr-crop": {
+    "executable": "ocrd-tesserocr-crop",
+    "categories": [
+      "Image preprocessing"
+    ],
+    "description": "Poor man's cropping via region segmentation",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "preprocessing/optimization/cropping"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "padding": {
+        "type": "number",
+        "format": "integer",
+        "description": "extend detected border by this many (true) pixels on every side",
+        "default": 4
+      }
+    }
+  },
+  "ocrd-tesserocr-binarize": {
+    "executable": "ocrd-tesserocr-binarize",
+    "categories": [
+      "Image preprocessing"
+    ],
+    "description": "Binarize regions or lines with Tesseract's global Otsu",
+    "input_file_grp_cardinality": 1,
+    "output_file_grp_cardinality": 1,
+    "steps": [
+      "preprocessing/optimization/binarization"
+    ],
+    "parameters": {
+      "dpi": {
+        "type": "number",
+        "format": "float",
+        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
+        "default": 0
+      },
+      "operation_level": {
+        "type": "string",
+        "enum": [
+          "page",
+          "region",
+          "line"
+        ],
+        "default": "page",
+        "description": "PAGE XML hierarchy level to operate on"
+      },
+      "tiseg": {
+        "type": "boolean",
+        "default": false,
+        "description": "also separate text vs image by detecting and suppressing photo+sepline mask"
+      }
+    }
   }
- },
- "ocrd-filter": {
-  "executable": "ocrd-filter",
-  "description": "Bare-bones processor can be dynamically configured to remove segments based on XPath queries",
-  "steps": [
-   "recognition/post-correction"
-  ],
-  "categories": [
-   "Quality assurance"
-  ],
-  "input_file_grp_cardinality": 1,
-  "output_file_grp_cardinality": 1,
-  "parameters": {
-   "select": {
-    "type": "string",
-    "default": "//*[ends-with(local-name(),'Region')]",
-    "description": "Which segments to select for removal. An XPath 2.0 query expression (path and optional predicates), with 'pc' as namespace prefix for PAGE-XML and our extension functions (see help text). Only selection of segment hierarchy elements is allowed (so e.g. `*` would be equivalent to `pc:NoiseRegion|pc:LineDrawingRegion|pc:AdvertRegion|pc:ImageRegion|pc:ChartRegion|pc:MusicRegion|pc:GraphicRegion|pc:UnknownRegion|pc:CustomRegion|pc:SeparatorRegion|pc:MathsRegion|pc:TextRegion|pc:MapRegion|pc:ChemRegion|pc:TableRegion|pc:TextLine|pc:Word|pc:Glyph`, but `pc:MetadataItem` or `pc:Border` or `pc:Coords` would not match).\nFor example, to remove words or glyphs with low text confidence, select '(pc:Word|pc:Glyph)[pc:TextEquiv/@conf < 0.7]'. Or low layout confidence, '*[pc:Coords/@conf < 0.7]'.\nTo remove high pixel-to-character rate, select '*[pc:pixelarea(.) div string-length(pc:textequiv(.)) > 10000]'."
-   },
-   "plot": {
-    "type": "boolean",
-    "default": false,
-    "description": "Whether to extract an image for each filtered segment and write to the output fileGrp."
-   }
-  }
- }
 }
diff --git a/src/ocrd/processor/base.py b/src/ocrd/processor/base.py
index 8b46b1f49b..1fd810d1cd 100644
--- a/src/ocrd/processor/base.py
+++ b/src/ocrd/processor/base.py
@@ -1209,7 +1209,7 @@ def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None)
          ocrd_tool (dict): this processor's ``tools`` section of the module's ``ocrd-tool.json``
          processor_instance (object, optional): the processor implementation
              (for adding any module/class/function docstrings)
-        subcommand (string): 'worker' or 'server'
+        subcommand (string, optional): 'worker'
     """
     doc_help = ''
     if processor_instance:
@@ -1235,7 +1235,6 @@ def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None)
                                           preserve_paragraphs=True)
     subcommands = '''\
     worker      Start a processing worker rather than do local processing
-    server      Start a processor server rather than do local processing
 '''
 
     processing_worker_options = '''\
@@ -1250,8 +1249,6 @@ def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None)
 '''
 
     processing_server_options = '''\
-  --address                       The Processor server address in format
-                                  "{host}:{port}"
   --database                      The MongoDB server address in format
                                   "mongodb://{host}:{port}"
                                   [mongodb://localhost:27018]
@@ -1296,8 +1293,8 @@ def generate_processor_help(ocrd_tool, processor_instance=None, subcommand=None)
         parameter_help = '  NONE\n'
     else:
         def wrap(s):
-            return wrap_text(s, initial_indent=' '*3,
-                             subsequent_indent=' '*4,
+            return wrap_text(s, initial_indent=' ' * 3,
+                             subsequent_indent=' ' * 4,
                              width=72, preserve_paragraphs=True)
         for param_name, param in ocrd_tool['parameters'].items():
             parameter_help += wrap('"%s" [%s%s]' % (
@@ -1335,17 +1332,6 @@ def wrap(s):
 
 Options:
 {processing_worker_options}
-'''
-    elif subcommand == 'server':
-        return f'''\
-Usage: {ocrd_tool['executable']} server [OPTIONS]
-
-  Run {ocrd_tool['executable']} as a processor sever.
-
-  {ocrd_tool['description']}{doc_help}
-
-Options:
-{processing_server_options}
 '''
     else:
         pass
diff --git a/src/ocrd/processor/helpers.py b/src/ocrd/processor/helpers.py
index 188e627e4f..431cb6e452 100644
--- a/src/ocrd/processor/helpers.py
+++ b/src/ocrd/processor/helpers.py
@@ -66,8 +66,7 @@ def run_processor(
     when a match occurs - as long as the program is being run. They only get deleted (and
     their resources freed) when as many as :py:data:`~ocrd_utils.config.OCRD_MAX_PROCESSOR_CACHE`
     instances have already been cached while this particular parameter set was re-used
-    least frequently. (See :py:class:`~ocrd_network.ProcessingWorker` and
-    :py:class:`~ocrd_network.ProcessorServer` for use-cases.)
+    least frequently. (See :py:class:`~ocrd_network.ProcessingWorker` for use-cases.)
 
     Args:
         processorClass (object): Python class of the module processor.
diff --git a/src/ocrd_network/__init__.py b/src/ocrd_network/__init__.py
index 189a48100a..e7f472fd10 100644
--- a/src/ocrd_network/__init__.py
+++ b/src/ocrd_network/__init__.py
@@ -1,7 +1,6 @@
 from .client import Client
-from .constants import AgentType, JobState
+from .constants import JobState
 from .processing_server import ProcessingServer
 from .processing_worker import ProcessingWorker
-from .processor_server import ProcessorServer
 from .param_validators import DatabaseParamType, ServerAddressParamType, QueueServerParamType
 from .server_cache import CacheLockedPages, CacheProcessingRequests
diff --git a/src/ocrd_network/cli/__init__.py b/src/ocrd_network/cli/__init__.py
index 1704b2aaf7..281eea77e4 100644
--- a/src/ocrd_network/cli/__init__.py
+++ b/src/ocrd_network/cli/__init__.py
@@ -1,11 +1,9 @@
 from .client import client_cli
 from .processing_server import processing_server_cli
 from .processing_worker import processing_worker_cli
-from .processor_server import processor_server_cli
 
 __all__ = [
     'client_cli',
     'processing_server_cli',
     'processing_worker_cli',
-    'processor_server_cli'
 ]
diff --git a/src/ocrd_network/cli/client.py b/src/ocrd_network/cli/client.py
index b75dcbd580..3cd8c8ce60 100644
--- a/src/ocrd_network/cli/client.py
+++ b/src/ocrd_network/cli/client.py
@@ -49,7 +49,7 @@ def discovery_cli():
 @click.option('--address', type=URL, help=ADDRESS_HELP)
 def check_deployed_processors(address: Optional[str]):
     """
-    Get a list of deployed processing workers/processor servers.
+    Get a list of deployed processing workers.
     Each processor is shown only once regardless of the amount of deployed instances.
     """
     client = Client(server_addr_processing=address)
@@ -113,7 +113,6 @@ def check_processing_job_status(address: Optional[str], processing_job_id: str):
 @parameter_override_option
 @click.option('--result-queue-name')
 @click.option('--callback-url')
-@click.option('--agent-type', default='worker')
 @click.option('-b', '--block', default=False, is_flag=True,
               help='If set, the client will block till job timeout, fail or success.')
 @click.option('-p', '--print-state', default=False, is_flag=True,
@@ -129,9 +128,6 @@ def send_processing_job_request(
     parameter_override: List[Tuple[str, str]],
     result_queue_name: Optional[str],
     callback_url: Optional[str],
-    # TODO: This is temporally available to toggle
-    #  between the ProcessingWorker/ProcessorServer
-    agent_type: Optional[str],
     block: Optional[bool],
     print_state: Optional[bool]
 ):
@@ -142,7 +138,6 @@ def send_processing_job_request(
         "path_to_mets": mets,
         "description": "OCR-D Network client request",
         "input_file_grps": input_file_grp.split(','),
-        "agent_type": agent_type
     }
     if output_file_grp:
         req_params["output_file_grps"] = output_file_grp.split(',')
diff --git a/src/ocrd_network/cli/processing_server.py b/src/ocrd_network/cli/processing_server.py
index 50a42887c6..5fb611c0c5 100644
--- a/src/ocrd_network/cli/processing_server.py
+++ b/src/ocrd_network/cli/processing_server.py
@@ -12,8 +12,7 @@
 def processing_server_cli(path_to_config, address: str):
     """
     Start the Processing Server
-    (proxy between the user and the
-    Processing Worker(s) / Processor Server(s))
+    (proxy between the user and the Processing Worker(s))
     """
 
     # Note, the address is already validated with the type field
diff --git a/src/ocrd_network/cli/processor_server.py b/src/ocrd_network/cli/processor_server.py
deleted file mode 100644
index 50529adda3..0000000000
--- a/src/ocrd_network/cli/processor_server.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import click
-from ocrd_network import DatabaseParamType, ProcessorServer, ServerAddressParamType
-
-
-@click.command('processor-server')
-@click.argument('processor_name', required=True, type=click.STRING)
-@click.option('-a', '--address',
-              help='The URL of the processor server, format: host:port',
-              type=ServerAddressParamType(),
-              required=True)
-@click.option('-d', '--database',
-              default="mongodb://localhost:27018",
-              help='The URL of the MongoDB, format: mongodb://host:port',
-              type=DatabaseParamType(),
-              required=True)
-def processor_server_cli(processor_name: str, address: str, database: str):
-    """
-    Start Processor Server
-    (standalone REST API OCR-D processor)
-    """
-    try:
-        # Note, the address is already validated with the type field
-        host, port = address.split(':')
-        processor_server = ProcessorServer(
-            mongodb_addr=database,
-            processor_name=processor_name,
-            processor_class=None  # For readability purposes assigned here
-        )
-        processor_server.run_server(host=host, port=int(port))
-    except Exception as e:
-        raise Exception("Processor server has failed with error") from e
diff --git a/src/ocrd_network/constants.py b/src/ocrd_network/constants.py
index 089e321dff..00661801d8 100644
--- a/src/ocrd_network/constants.py
+++ b/src/ocrd_network/constants.py
@@ -16,11 +16,6 @@ def __str__(self):
         return self.value
 
 
-class AgentType(StrEnum):
-    PROCESSING_WORKER = "worker"
-    PROCESSOR_SERVER = "server"
-
-
 class DeployType(StrEnum):
     # Deployed by the Processing Server config file
     DOCKER = "docker"
@@ -40,7 +35,7 @@ class JobState(StrEnum):
     failed = "FAILED"
     # The processing job is queued inside the RabbitMQ
     queued = "QUEUED"
-    # Processing job is currently running in a Worker or Processor Server
+    # Processing job is currently running on a Worker
     running = "RUNNING"
     # Processing job finished successfully
     success = "SUCCESS"
@@ -53,7 +48,6 @@ class NetworkLoggingDirs(StrEnum):
     PROCESSING_JOBS = "processing_jobs"
     PROCESSING_SERVERS = "processing_servers"
     PROCESSING_WORKERS = "processing_workers"
-    PROCESSOR_SERVERS = "processor_servers"
 
 
 class ServerApiTags(StrEnum):
diff --git a/src/ocrd_network/logging_utils.py b/src/ocrd_network/logging_utils.py
index 2b9bffa1d0..bf979b0d86 100644
--- a/src/ocrd_network/logging_utils.py
+++ b/src/ocrd_network/logging_utils.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 
 from ocrd_utils import config, LOG_FORMAT, safe_filename
-from .constants import AgentType, NetworkLoggingDirs
+from .constants import NetworkLoggingDirs
 
 
 def configure_file_handler_with_formatter(logger: Logger, log_file: Path, mode: str = "a") -> None:
@@ -54,10 +54,5 @@ def get_processing_server_logging_file_path(pid: int) -> Path:
 
 
 def get_processing_worker_logging_file_path(processor_name: str, pid: int) -> Path:
-    log_file: str = f"{AgentType.PROCESSING_WORKER}.{pid}.{processor_name}.log"
+    log_file: str = f"worker.{pid}.{processor_name}.log"
     return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSING_WORKERS), log_file)
-
-
-def get_processor_server_logging_file_path(processor_name: str, pid: int) -> Path:
-    log_file: str = f"{AgentType.PROCESSOR_SERVER}.{pid}.{processor_name}.log"
-    return Path(get_root_logging_dir(NetworkLoggingDirs.PROCESSOR_SERVERS), log_file)
diff --git a/src/ocrd_network/models/__init__.py b/src/ocrd_network/models/__init__.py
index 774f8aa130..052900e57d 100644
--- a/src/ocrd_network/models/__init__.py
+++ b/src/ocrd_network/models/__init__.py
@@ -10,13 +10,11 @@
     'DBWorkflowScript',
     'PYJobInput',
     'PYJobOutput',
-    'PYOcrdTool',
     'PYResultMessage',
     'PYWorkflowJobOutput'
 ]
 
 from .job import DBProcessorJob, DBWorkflowJob, PYJobInput, PYJobOutput, PYWorkflowJobOutput
 from .messages import PYResultMessage
-from .ocrd_tool import PYOcrdTool
 from .workspace import DBWorkspace
 from .workflow import DBWorkflowScript
diff --git a/src/ocrd_network/models/job.py b/src/ocrd_network/models/job.py
index efc6750c48..04c57c8e9c 100644
--- a/src/ocrd_network/models/job.py
+++ b/src/ocrd_network/models/job.py
@@ -2,7 +2,7 @@
 from datetime import datetime
 from pydantic import BaseModel
 from typing import Dict, List, Optional
-from ..constants import AgentType, JobState
+from ..constants import JobState
 
 
 class PYJobInput(BaseModel):
@@ -18,9 +18,7 @@ class PYJobInput(BaseModel):
     parameters: dict = {}  # Always set to empty dict when None, otherwise it fails ocr-d-validation
     result_queue_name: Optional[str] = None
     callback_url: Optional[str] = None
-    # Used to toggle between sending requests to different network agents
-    agent_type: AgentType = AgentType.PROCESSING_WORKER
-    # Auto generated by the Processing Server when forwarding to the Processor Server
+    # Auto generated by the Processing Server when forwarding to the Processing-Worker
     job_id: Optional[str] = None
     # If set, specifies a list of job ids this job depends on
     depends_on: Optional[List[str]] = None
@@ -32,7 +30,6 @@ class Config:
                 'description': 'The description of this execution',
                 'input_file_grps': ['DEFAULT'],
                 'output_file_grps': ['OCR-D-BIN'],
-                'agent_type': AgentType.PROCESSING_WORKER,
                 'page_id': 'PHYS_0001..PHYS_0003',
                 'parameters': {}
             }
diff --git a/src/ocrd_network/models/ocrd_tool.py b/src/ocrd_network/models/ocrd_tool.py
deleted file mode 100644
index b3e2ceaea8..0000000000
--- a/src/ocrd_network/models/ocrd_tool.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from pydantic import BaseModel
-from typing import List, Optional
-
-
-class PYOcrdTool(BaseModel):
-    executable: str
-    categories: List[str]
-    description: str
-    input_file_grp: List[str]
-    output_file_grp: Optional[List[str]]
-    steps: List[str]
-    parameters: Optional[dict] = None
diff --git a/src/ocrd_network/models/workspace.py b/src/ocrd_network/models/workspace.py
index 670cb14b58..f323b1b424 100644
--- a/src/ocrd_network/models/workspace.py
+++ b/src/ocrd_network/models/workspace.py
@@ -17,7 +17,7 @@ class DBWorkspace(Document):
                                     key-value-pairs which are saved here
         deleted                     the document is deleted if set, however, the record is still preserved
         pages_locked                a data structure that holds output `fileGrp`s and their respective locked `page_id`
-                                    that are currently being processed by an OCR-D processor (server or worker).
+                                    that are currently being processed by an OCR-D Processing-Worker.
                                     If no `page_id` field is set, an identifier "all_pages" will be used.
         mets_server_url             If set, the reading from and writing to the mets file happens through the METS Server
     """
diff --git a/src/ocrd_network/processing_server.py b/src/ocrd_network/processing_server.py
index ba90bf86a0..e89eef89ab 100644
--- a/src/ocrd_network/processing_server.py
+++ b/src/ocrd_network/processing_server.py
@@ -10,7 +10,7 @@
 
 from ocrd.task_sequence import ProcessorTask
 from ocrd_utils import initLogging, getLogger
-from .constants import AgentType, JobState, ServerApiTags
+from .constants import JobState, ServerApiTags
 from .database import (
     initiate_database,
     db_get_processing_job,
@@ -34,14 +34,12 @@
 from .rabbitmq_utils import (
     check_if_queue_exists,
     connect_rabbitmq_publisher,
-    create_message_queues,
     OcrdProcessingMessage
 )
 from .server_cache import CacheLockedPages, CacheProcessingRequests
 from .server_utils import (
     create_processing_message,
     create_workspace_if_not_exists,
-    forward_job_to_processor_server,
     _get_processor_job,
     _get_processor_job_log,
     get_page_ids_list,
@@ -51,7 +49,6 @@
     kill_mets_server_zombies,
     parse_workflow_tasks,
     raise_http_exception,
-    request_processor_server_tool_json,
     validate_and_return_mets_path,
     validate_first_task_input_file_groups_existence,
     validate_job_input,
@@ -104,7 +101,7 @@ def __init__(self, config_path: str, host: str, port: int) -> None:
         self.mets_server_proxy = MetsServerProxy()
         self.use_tcp_mets = self.deployer.use_tcp_mets
         # If set, all Mets Server UDS requests are multiplexed over TCP
-        # Used by processing workers and/or processor servers to report back the results
+        # Used by processing workers to report back the results
         if self.deployer.internal_callback_url:
             host = self.deployer.internal_callback_url
             self.internal_job_callback_url = f"{host.rstrip('/')}/result_callback"
@@ -153,16 +150,10 @@ def start(self) -> None:
             # The RMQPublisher is initialized and a connection to the RabbitMQ is performed
             self.rmq_publisher = connect_rabbitmq_publisher(self.log, self.rmq_data, enable_acks=True)
 
-            queue_names = self.deployer.find_matching_network_agents(
-                worker_only=True, str_names_only=True, unique_only=True
-            )
-            self.log.info(f"Creating message queues on RabbitMQ instance url: {self.rabbitmq_url}")
-            create_message_queues(logger=self.log, rmq_publisher=self.rmq_publisher, queue_names=queue_names)
-
-            self.deployer.deploy_network_agents(mongodb_url=self.mongodb_url, rabbitmq_url=self.rabbitmq_url)
+            self.deployer.deploy_workers(mongodb_url=self.mongodb_url, rabbitmq_url=self.rabbitmq_url)
         except Exception as error:
             self.log.exception(f"Failed to start the Processing Server, error: {error}")
-            self.log.warning("Trying to stop previously deployed services and network agents.")
+            self.log.warning("Trying to stop previously deployed services and workers.")
             self.deployer.stop_all()
             raise
         uvicorn_run(self, host=self.hostname, port=int(self.port))
@@ -225,7 +216,7 @@ def add_api_routes_processing(self):
         )
         processing_router.add_api_route(
             path="/processor/info/{processor_name}",
-            endpoint=self.get_network_agent_ocrd_tool,
+            endpoint=self.get_worker_ocrd_tool,
             methods=["GET"],
             tags=[ServerApiTags.PROCESSING, ServerApiTags.DISCOVERY],
             status_code=status.HTTP_200_OK,
@@ -233,7 +224,7 @@ def add_api_routes_processing(self):
         )
         processing_router.add_api_route(
             path="/processor/run/{processor_name}",
-            endpoint=self.validate_and_forward_job_to_network_agent,
+            endpoint=self.validate_and_forward_job_to_worker,
             methods=["POST"],
             tags=[ServerApiTags.PROCESSING],
             status_code=status.HTTP_200_OK,
@@ -267,7 +258,7 @@ def add_api_routes_processing(self):
             methods=["POST"],
             tags=[ServerApiTags.PROCESSING],
             status_code=status.HTTP_200_OK,
-            summary="Callback used by a worker or processor server for reporting result of a processing request"
+            summary="Callback used by a worker for reporting result of a processing request"
         )
         self.include_router(processing_router)
 
@@ -351,68 +342,38 @@ async def home_page(self):
     async def stop_deployed_agents(self) -> None:
         self.deployer.stop_all()
 
-    def query_ocrd_tool_json_from_server(self, processor_name: str) -> Dict:
-        processor_server_base_url = self.deployer.resolve_processor_server_url(processor_name)
-        if processor_server_base_url == '':
-            message = f"Processor Server URL of '{processor_name}' not found"
-            raise_http_exception(self.log, status.HTTP_404_NOT_FOUND, message=message)
-        return request_processor_server_tool_json(self.log, processor_server_base_url=processor_server_base_url)
-
-    async def get_network_agent_ocrd_tool(
-        self, processor_name: str, agent_type: AgentType = AgentType.PROCESSING_WORKER
-    ) -> Dict:
+    async def get_worker_ocrd_tool(self, processor_name: str) -> Dict:
         ocrd_tool = {}
-        error_message = f"Network agent of type '{agent_type}' for processor '{processor_name}' not found."
-        if agent_type != AgentType.PROCESSING_WORKER and agent_type != AgentType.PROCESSOR_SERVER:
-            message = f"Unknown agent type: {agent_type}, {type(agent_type)}"
-            raise_http_exception(self.log, status_code=status.HTTP_501_NOT_IMPLEMENTED, message=message)
-        if agent_type == AgentType.PROCESSING_WORKER:
-            ocrd_tool = self.ocrd_all_tool_json.get(processor_name, None)
-        if agent_type == AgentType.PROCESSOR_SERVER:
-            ocrd_tool = self.query_ocrd_tool_json_from_server(processor_name)
+        ocrd_tool = self.ocrd_all_tool_json.get(processor_name, None)
         if not ocrd_tool:
-            raise_http_exception(self.log, status.HTTP_404_NOT_FOUND, error_message)
+            raise_http_exception(self.log, status.HTTP_404_NOT_FOUND,
+                                 f"Processing Worker '{processor_name}' not found.")
         return ocrd_tool
 
-    def network_agent_exists_server(self, processor_name: str) -> bool:
-        processor_server_url = self.deployer.resolve_processor_server_url(processor_name)
-        return bool(processor_server_url)
-
-    def network_agent_exists_worker(self, processor_name: str) -> bool:
+    def exists_worker(self, processor_name: str) -> bool:
         # TODO: Reconsider and refactor this.
         #  Added ocrd-dummy by default if not available for the integration tests.
-        #  A proper Processing Worker / Processor Server registration endpoint
-        #  is needed on the Processing Server side
+        #  A proper Processing Worker registration endpoint is needed on the Processing Server side
         if processor_name == 'ocrd-dummy':
             return True
         return bool(check_if_queue_exists(self.log, self.rmq_data, processor_name=processor_name))
 
-    def validate_agent_type_and_existence(self, processor_name: str, agent_type: AgentType) -> None:
-        agent_exists = False
-        if agent_type == AgentType.PROCESSOR_SERVER:
-            agent_exists = self.network_agent_exists_server(processor_name=processor_name)
-        elif agent_type == AgentType.PROCESSING_WORKER:
-            agent_exists = self.network_agent_exists_worker(processor_name=processor_name)
-        else:
-            message = f"Unknown agent type: {agent_type}, {type(agent_type)}"
-            raise_http_exception(self.log, status_code=status.HTTP_501_NOT_IMPLEMENTED, message=message)
-        if not agent_exists:
-            message = f"Network agent of type '{agent_type}' for processor '{processor_name}' not found."
+    def validate_worker_existence(self, processor_name: str) -> None:
+        worker_exists = self.exists_worker(processor_name=processor_name)
+        if not worker_exists:
+            message = f"Processing Worker '{processor_name}' not found."
             raise_http_exception(self.log, status.HTTP_422_UNPROCESSABLE_ENTITY, message)
 
-    async def validate_and_forward_job_to_network_agent(self, processor_name: str, data: PYJobInput) -> PYJobOutput:
+    async def validate_and_forward_job_to_worker(self, processor_name: str, data: PYJobInput) -> PYJobOutput:
         # Append the processor name to the request itself
         data.processor_name = processor_name
-        self.validate_agent_type_and_existence(processor_name=data.processor_name, agent_type=data.agent_type)
+        self.validate_worker_existence(processor_name=data.processor_name)
         if data.job_id:
             message = f"Processing request job id field is set but must not be: {data.job_id}"
             raise_http_exception(self.log, status.HTTP_422_UNPROCESSABLE_ENTITY, message)
         # Generate processing job id
         data.job_id = generate_id()
-        ocrd_tool = await self.get_network_agent_ocrd_tool(
-            processor_name=data.processor_name,
-            agent_type=data.agent_type
-        )
+        ocrd_tool = await self.get_worker_ocrd_tool(processor_name=data.processor_name)
         validate_job_input(self.log, data.processor_name, ocrd_tool, data)
 
         if data.workspace_id:
@@ -492,19 +453,13 @@ async def validate_and_forward_job_to_network_agent(self, processor_name: str, d
         )
         await db_queued_job.insert()
         self.cache_processing_requests.update_request_counter(workspace_key=workspace_key, by_value=1)
-        job_output = await self.push_job_to_network_agent(data=data, db_job=db_queued_job)
+        job_output = await self.push_job_to_worker(data=data, db_job=db_queued_job)
         return job_output
 
-    async def push_job_to_network_agent(self, data: PYJobInput, db_job: DBProcessorJob) -> PYJobOutput:
-        if data.agent_type != AgentType.PROCESSING_WORKER and data.agent_type != AgentType.PROCESSOR_SERVER:
-            message = f"Unknown agent type: {data.agent_type}, {type(data.agent_type)}"
-            raise_http_exception(self.log, status_code=status.HTTP_501_NOT_IMPLEMENTED, message=message)
+    async def push_job_to_worker(self, data: PYJobInput, db_job: DBProcessorJob) -> PYJobOutput:
         job_output = None
-        self.log.debug(f"Pushing to {data.agent_type}: {data.processor_name}, {data.page_id}, {data.job_id}")
-        if data.agent_type == AgentType.PROCESSING_WORKER:
-            job_output = await self.push_job_to_processing_queue(db_job=db_job)
-        if data.agent_type == AgentType.PROCESSOR_SERVER:
-            job_output = await self.push_job_to_processor_server(job_input=data)
+        self.log.debug(f"Pushing to Processing Worker: {data.processor_name}, {data.page_id}, {data.job_id}")
+        job_output = await self.push_job_to_processing_queue(db_job=db_job)
         if not job_output:
             message = f"Failed to create job output for job input: {data}"
             raise_http_exception(self.log, status.HTTP_500_INTERNAL_SERVER_ERROR, message)
@@ -526,12 +481,6 @@ async def push_job_to_processing_queue(self, db_job: DBProcessorJob) -> PYJobOut
             raise_http_exception(self.log, status.HTTP_500_INTERNAL_SERVER_ERROR, message, error)
         return db_job.to_job_output()
 
-    async def push_job_to_processor_server(self, job_input: PYJobInput) -> PYJobOutput:
-        processor_server_base_url = self.deployer.resolve_processor_server_url(job_input.processor_name)
-        return await forward_job_to_processor_server(
-            self.log, job_input=job_input, processor_server_base_url=processor_server_base_url
-        )
-
     async def get_processor_job(self, job_id: str) -> PYJobOutput:
         return await _get_processor_job(self.log, job_id)
 
@@ -557,7 +506,7 @@ async def _unlock_pages_of_workspace(
             page_ids=page_ids
         )
 
-    async def push_cached_jobs_to_agents(self, processing_jobs: List[PYJobInput]) -> None:
+    async def push_cached_jobs_to_workers(self, processing_jobs: List[PYJobInput]) -> None:
         if not len(processing_jobs):
             self.log.debug("No processing jobs were consumed from the requests cache")
             return
@@ -574,7 +523,7 @@ async def push_cached_jobs_to_agents(self, processing_jobs: List[PYJobInput]) ->
             )
 
             self.cache_processing_requests.update_request_counter(workspace_key=workspace_key, by_value=1)
-            job_output = await self.push_job_to_network_agent(data=data, db_job=db_consumed_job)
+            job_output = await self.push_job_to_worker(data=data, db_job=db_consumed_job)
             if not job_output:
                 self.log.exception(f"Failed to create job output for job input data: {data}")
 
@@ -654,22 +603,19 @@ async def remove_job_from_request_cache(self, result_message: PYResultMessage):
         consumed_cached_jobs = await self._consume_cached_jobs_of_workspace(
             workspace_key=workspace_key, mets_server_url=mets_server_url, path_to_mets=path_to_mets
         )
-        await self.push_cached_jobs_to_agents(processing_jobs=consumed_cached_jobs)
+        await self.push_cached_jobs_to_workers(processing_jobs=consumed_cached_jobs)
 
     async def list_processors(self) -> List[str]:
         # There is no caching on the Processing Server side
-        processor_names_list = self.deployer.find_matching_network_agents(
-            docker_only=False, native_only=False, worker_only=False, server_only=False,
-            str_names_only=True, unique_only=True, sort=True
-        )
-        return processor_names_list
+        # TODO: Implement: Get all existing queues (every time new request to get them) and derive
+        # correct ProcessorNames
+        return []
 
     async def task_sequence_to_processing_jobs(
         self,
         tasks: List[ProcessorTask],
         mets_path: str,
         page_id: str,
-        agent_type: AgentType = AgentType.PROCESSING_WORKER
     ) -> List[PYJobOutput]:
         temp_file_group_cache = {}
         responses = []
@@ -688,10 +634,9 @@ async def task_sequence_to_processing_jobs(
                 output_file_grps=task.output_file_grps,
                 page_id=page_id,
                 parameters=task.parameters,
-                agent_type=agent_type,
                 depends_on=dependent_jobs,
             )
-            response = await self.validate_and_forward_job_to_network_agent(
+            response = await self.validate_and_forward_job_to_worker(
                 processor_name=job_input_data.processor_name,
                 data=job_input_data
             )
@@ -700,18 +645,18 @@ async def task_sequence_to_processing_jobs(
             responses.append(response)
         return responses
 
-    def validate_tasks_agents_existence(self, tasks: List[ProcessorTask], agent_type: AgentType) -> None:
-        missing_agents = []
+    def validate_tasks_worker_existence(self, tasks: List[ProcessorTask]) -> None:
+        missing_workers = []
         for task in tasks:
             try:
-                self.validate_agent_type_and_existence(processor_name=task.executable, agent_type=agent_type)
+                self.validate_worker_existence(processor_name=task.executable)
             except HTTPException:
                 # catching the error is not relevant here
-                missing_agents.append({task.executable, agent_type})
-        if missing_agents:
+                missing_workers.append({task.executable})
+        if missing_workers:
             message = (
-                "Workflow validation has failed. The desired network agents not found. "
-                f"Missing processing agents: {missing_agents}"
+                "Workflow validation has failed. The desired Processing Worker was not found. "
+                f"Missing Processing Workers: {missing_workers}"
             )
             raise_http_exception(self.log, status.HTTP_406_NOT_ACCEPTABLE, message)
 
@@ -720,7 +665,6 @@ async def run_workflow(
         mets_path: str,
         workflow: Union[UploadFile, str, None] = File(None),
         workflow_id: str = None,
-        agent_type: AgentType = AgentType.PROCESSING_WORKER,
         page_id: str = None,
         page_wise: bool = False,
         workflow_callback_url: str = None
@@ -732,9 +676,9 @@ async def run_workflow(
         # Validate the input file groups of the first task in the workflow
         validate_first_task_input_file_groups_existence(self.log, mets_path, processing_tasks[0].input_file_grps)
 
-        # Validate existence of agents (processing workers/processor servers)
+        # Validate existence of Processing Workers
         # for the ocr-d processors referenced inside tasks
-        self.validate_tasks_agents_existence(processing_tasks, agent_type)
+        self.validate_tasks_worker_existence(processing_tasks)
 
         # for page_wise mode, we need to expand the list of pages
         # for the database, it's better to keep a short string
@@ -746,7 +690,6 @@ async def run_workflow(
                 tasks=processing_tasks,
                 mets_path=mets_path,
                 page_id=page_id,
-                agent_type=agent_type
             )
             processing_job_ids = [response.job_id for response in responses]
             db_workflow_job = DBWorkflowJob(
@@ -766,7 +709,6 @@ async def run_workflow(
                 tasks=processing_tasks,
                 mets_path=mets_path,
                 page_id=current_page,
-                agent_type=agent_type
             )
             processing_job_ids = [response.job_id for response in responses]
             all_pages_job_ids[current_page] = processing_job_ids
diff --git a/src/ocrd_network/processor_server.py b/src/ocrd_network/processor_server.py
deleted file mode 100644
index f873d2857a..0000000000
--- a/src/ocrd_network/processor_server.py
+++ /dev/null
@@ -1,255 +0,0 @@
-from datetime import datetime
-from os import getpid
-from subprocess import run as subprocess_run, PIPE
-from uvicorn import run
-
-from fastapi import APIRouter, BackgroundTasks, FastAPI, status
-from fastapi.responses import FileResponse
-
-from ocrd_utils import (
-    initLogging,
-    get_ocrd_tool_json,
-    getLogger,
-    parse_json_string_with_comments
-)
-from .constants import JobState, ServerApiTags
-from .database import (
-    DBProcessorJob,
-    db_get_workspace,
-    db_update_processing_job,
-    db_get_processing_job,
-    initiate_database
-)
-from .logging_utils import (
-    configure_file_handler_with_formatter,
-    get_processor_server_logging_file_path,
-    get_processing_job_logging_file_path
-)
-from .models import PYJobInput, PYJobOutput, PYOcrdTool
-from .process_helpers import invoke_processor
-from .rabbitmq_utils import OcrdResultMessage
-from .server_utils import (
-    _get_processor_job,
-    _get_processor_job_log,
-    raise_http_exception,
-    validate_and_return_mets_path,
-    validate_job_input
-)
-from .utils import calculate_execution_time, post_to_callback_url, generate_id
-
-
-class ProcessorServer(FastAPI):
-    def __init__(self, mongodb_addr: str, processor_name: str = "", processor_class=None):
-        if not (processor_name or processor_class):
-            raise ValueError("Either 'processor_name' or 'processor_class' must be provided")
-        super().__init__(
-            on_startup=[self.on_startup],
-            on_shutdown=[self.on_shutdown],
-            title="Network agent - Processor Server",
-            description="Network agent - Processor Server"
-        )
-        initLogging()
-        self.log = getLogger("ocrd_network.processor_server")
-        log_file = get_processor_server_logging_file_path(processor_name=processor_name, pid=getpid())
-        configure_file_handler_with_formatter(self.log, log_file=log_file, mode="a")
-
-        self.db_url = mongodb_addr
-        self.processor_name = processor_name
-        self.processor_class = processor_class
-        self.ocrd_tool = None
-        self.version = None
-
-        self.version = self.get_version()
-        self.ocrd_tool = self.get_ocrd_tool()
-
-        if not self.ocrd_tool:
-            raise Exception("The ocrd_tool is empty or missing")
-
-        if not self.processor_name:
-            self.processor_name = self.ocrd_tool["executable"]
-
-        self.add_api_routes_processing()
-        self.log.info(f"Initialized processor server: {processor_name}")
-
-    async def on_startup(self):
-        await initiate_database(db_url=self.db_url)
-
-    async def on_shutdown(self) -> None:
-        """
-        TODO: Perform graceful shutdown operations here
-        """
-        pass
-
-    def add_api_routes_processing(self):
-        processing_router = APIRouter()
-        processing_router.add_api_route(
-            path="/info",
-            endpoint=self.get_processor_info,
-            methods=["GET"],
-            tags=[ServerApiTags.PROCESSING],
-            status_code=status.HTTP_200_OK,
-            summary="Get information about this processor.",
-            response_model=PYOcrdTool,
-            response_model_exclude_unset=True,
-            response_model_exclude_none=True
-        )
-        processing_router.add_api_route(
-            path="/run",
-            endpoint=self.create_processor_task,
-            methods=["POST"],
-            tags=[ServerApiTags.PROCESSING],
-            status_code=status.HTTP_202_ACCEPTED,
-            summary="Submit a job to this processor.",
-            response_model=PYJobOutput,
-            response_model_exclude_unset=True,
-            response_model_exclude_none=True
-        )
-        processing_router.add_api_route(
-            path="/job/{job_id}",
-            endpoint=self.get_processor_job,
-            methods=["GET"],
-            tags=[ServerApiTags.PROCESSING],
-            status_code=status.HTTP_200_OK,
-            summary="Get information about a job based on its ID",
-            response_model=PYJobOutput,
-            response_model_exclude_unset=True,
-            response_model_exclude_none=True
-        )
-        processing_router.add_api_route(
-            path="/log/{job_id}",
-            endpoint=self.get_processor_job_log,
-            methods=["GET"],
-            tags=[ServerApiTags.PROCESSING],
-            status_code=status.HTTP_200_OK,
-            summary="Get the log file of a job id"
-        )
-
-    async def get_processor_info(self):
-        if not self.ocrd_tool:
-            message = "Empty or missing ocrd tool json."
-            raise_http_exception(self.log, status.HTTP_500_INTERNAL_SERVER_ERROR, message)
-        return self.ocrd_tool
-
-    # Note: The Processing server pushes to a queue, while
-    #  the Processor Server creates (pushes to) a background task
-    async def create_processor_task(self, job_input: PYJobInput, background_tasks: BackgroundTasks):
-        validate_job_input(self.log, self.processor_name, self.ocrd_tool, job_input)
-        job_input.path_to_mets = await validate_and_return_mets_path(self.log, job_input)
-
-        # The request is not forwarded from the Processing Server, assign a job_id
-        if not job_input.job_id:
-            job_id = generate_id()
-            # Create a DB entry
-            job = DBProcessorJob(
-                **job_input.dict(exclude_unset=True, exclude_none=True),
-                job_id=job_id,
-                processor_name=self.processor_name,
-                state=JobState.queued
-            )
-            await job.insert()
-        else:
-            job = await db_get_processing_job(job_input.job_id)
-        # await self.run_processor_task(job=job)
-        background_tasks.add_task(self.run_processor_task, job)
-        return job.to_job_output()
-
-    async def run_processor_task(self, job: DBProcessorJob):
-        execution_failed = False
-        start_time = datetime.now()
-        job_log_file = get_processing_job_logging_file_path(job_id=job.job_id)
-        await db_update_processing_job(
-            job_id=job.job_id,
-            state=JobState.running,
-            start_time=start_time,
-            log_file_path=job_log_file
-        )
-
-        mets_server_url = (await db_get_workspace(workspace_mets_path=job.path_to_mets)).mets_server_url
-        try:
-            invoke_processor(
-                processor_class=self.processor_class,
-                executable=self.processor_name,
-                abs_path_to_mets=job.path_to_mets,
-                input_file_grps=job.input_file_grps,
-                output_file_grps=job.output_file_grps,
-                page_id=job.page_id,
-                parameters=job.parameters,
-                mets_server_url=mets_server_url,
-                log_filename=job_log_file,
-            )
-        except Exception as error:
-            self.log.debug(f"processor_name: {self.processor_name}, path_to_mets: {job.path_to_mets}, "
-                           f"input_grps: {job.input_file_grps}, output_file_grps: {job.output_file_grps}, "
-                           f"page_id: {job.page_id}, parameters: {job.parameters}")
-            self.log.exception(error)
-            execution_failed = True
-        end_time = datetime.now()
-        exec_duration = calculate_execution_time(start_time, end_time)
-        job_state = JobState.success if not execution_failed else JobState.failed
-        await db_update_processing_job(
-            job_id=job.job_id,
-            state=job_state,
-            end_time=end_time,
-            exec_time=f"{exec_duration} ms"
-        )
-        result_message = OcrdResultMessage(
-            job_id=job.job_id,
-            state=job_state.value,
-            path_to_mets=job.path_to_mets,
-            # May not be always available
-            workspace_id=job.workspace_id if job.workspace_id else ''
-        )
-        self.log.info(f"Result message: {result_message}")
-        if job.callback_url:
-            # If the callback_url field is set,
-            # post the result message (callback to a user defined endpoint)
-            post_to_callback_url(self.log, job.callback_url, result_message)
-        if job.internal_callback_url:
-            # If the internal callback_url field is set,
-            # post the result message (callback to Processing Server endpoint)
-            post_to_callback_url(self.log, job.internal_callback_url, result_message)
-
-    def get_ocrd_tool(self):
-        if self.ocrd_tool:
-            return self.ocrd_tool
-        if self.processor_class:
-            # The way of accessing ocrd tool like in the line below may be problematic
-            # ocrd_tool = self.processor_class(workspace=None, version=True).ocrd_tool
-            ocrd_tool = parse_json_string_with_comments(
-                subprocess_run(
-                    [self.processor_name, "--dump-json"],
-                    stdout=PIPE,
-                    check=True,
-                    universal_newlines=True
-                ).stdout
-            )
-        else:
-            ocrd_tool = get_ocrd_tool_json(self.processor_name)
-        return ocrd_tool
-
-    def get_version(self) -> str:
-        if self.version:
-            return self.version
-
-        """
-        if self.processor_class:
-            # The way of accessing the version like in the line below may be problematic
-            # version_str = self.processor_class(workspace=None, version=True).version
-            return version_str
-        """
-        version_str = subprocess_run(
-            [self.processor_name, "--version"],
-            stdout=PIPE,
-            check=True,
-            universal_newlines=True
-        ).stdout
-        return version_str
-
-    def run_server(self, host, port):
-        run(self, host=host, port=port)
-
-    async def get_processor_job(self, job_id: str) -> PYJobOutput:
-        return await _get_processor_job(self.log, job_id)
-
-    async def get_processor_job_log(self, job_id: str) -> FileResponse:
-        return await _get_processor_job_log(self.log, job_id)
diff --git a/src/ocrd_network/rabbitmq_utils/helpers.py b/src/ocrd_network/rabbitmq_utils/helpers.py
index 5dc6dae779..9109385c5d 100644
--- a/src/ocrd_network/rabbitmq_utils/helpers.py
+++ b/src/ocrd_network/rabbitmq_utils/helpers.py
@@ -68,12 +68,6 @@ def check_if_queue_exists(logger: Logger, rmq_data: Dict, processor_name: str) -
 
 
 def create_message_queues(logger: Logger, rmq_publisher: RMQPublisher, queue_names: List[str]) -> None:
-    # TODO: Reconsider and refactor this.
-    #  Added ocrd-dummy by default if not available for the integration tests.
-    #  A proper Processing Worker / Processor Server registration endpoint is needed on the Processing Server side
-    if "ocrd-dummy" not in queue_names:
-        queue_names.append("ocrd-dummy")
-
     for queue_name in queue_names:
         # The existence/validity of the worker.name is not tested.
         # Even if an ocr-d processor does not exist, the queue is created
diff --git a/src/ocrd_network/runtime_data/__init__.py b/src/ocrd_network/runtime_data/__init__.py
index e43be7ae3c..1e658c1305 100644
--- a/src/ocrd_network/runtime_data/__init__.py
+++ b/src/ocrd_network/runtime_data/__init__.py
@@ -5,10 +5,9 @@
     "DataNetworkAgent",
     "DataRabbitMQ",
     "DataProcessingWorker",
-    "DataProcessorServer"
 ]
 
 from .deployer import Deployer
 from .hosts import DataHost
-from .network_agents import DataNetworkAgent, DataProcessingWorker, DataProcessorServer
+from .network_agents import DataNetworkAgent, DataProcessingWorker
 from .network_services import DataMongoDB, DataRabbitMQ
diff --git a/src/ocrd_network/runtime_data/deployer.py b/src/ocrd_network/runtime_data/deployer.py
index 4578e7eb85..9b85571c15 100644
--- a/src/ocrd_network/runtime_data/deployer.py
+++ b/src/ocrd_network/runtime_data/deployer.py
@@ -9,7 +9,7 @@
 from __future__ import annotations
 from pathlib import Path
 import psutil
-from typing import Dict, List, Union
+from typing import Dict, List
 
 from ocrd import OcrdMetsServer
 from ocrd_utils import getLogger
@@ -33,89 +33,15 @@ def __init__(self, config_path: str) -> None:
         self.mets_servers_paths: Dict = {}  # {"ws_dir_path": "mets_server_url"}
         self.use_tcp_mets = ps_config.get("use_tcp_mets", False)
 
-    # TODO: Reconsider this.
-    def find_matching_network_agents(
-        self, worker_only: bool = False, server_only: bool = False, docker_only: bool = False,
-        native_only: bool = False, str_names_only: bool = False, unique_only: bool = False, sort: bool = False
-    ) -> Union[List[str], List[object]]:
-        """Finds and returns a list of matching data objects of type:
-        `DataProcessingWorker` and `DataProcessorServer`.
-
-        :py:attr:`worker_only` match only worker network agents (DataProcessingWorker)
-        :py:attr:`server_only` match only server network agents (DataProcessorServer)
-        :py:attr:`docker_only` match only docker network agents (DataProcessingWorker and DataProcessorServer)
-        :py:attr:`native_only` match only native network agents (DataProcessingWorker and DataProcessorServer)
-        :py:attr:`str_names_only` returns the processor_name filed instead of the Data* object
-        :py:attr:`unique_only` remove duplicate names from the matches
-        :py:attr:`sort` sort the result
-
-        `worker_only` and `server_only` are mutually exclusive to each other
-        `docker_only` and `native_only` are mutually exclusive to each other
-        `unique_only` is allowed only together with `str_names_only`
-        """
-
-        if worker_only and server_only:
-            msg = "Only 'worker_only' or 'server_only' is allowed, not both."
-            self.log.exception(msg)
-            raise ValueError(msg)
-        if docker_only and native_only:
-            msg = "Only 'docker_only' or 'native_only' is allowed, not both."
-            self.log.exception(msg)
-            raise ValueError(msg)
-        if not str_names_only and unique_only:
-            msg = "Value 'unique_only' is allowed only together with 'str_names_only'"
-            self.log.exception(msg)
-            raise ValueError(msg)
-        if sort and not str_names_only:
-            msg = "Value 'sort' is allowed only together with 'str_names_only'"
-            self.log.exception(msg)
-            raise ValueError(msg)
-
-        # Find all matching objects of type DataProcessingWorker or DataProcessorServer
-        matched_objects = []
-        for data_host in self.data_hosts:
-            if not server_only:
-                if not docker_only:
-                    for data_worker in data_host.network_agents_worker_native:
-                        matched_objects.append(data_worker)
-                if not native_only:
-                    for data_worker in data_host.network_agents_worker_docker:
-                        matched_objects.append(data_worker)
-            if not worker_only:
-                if not docker_only:
-                    for data_server in data_host.network_agents_server_native:
-                        matched_objects.append(data_server)
-                if not native_only:
-                    for data_server in data_host.network_agents_server_docker:
-                        matched_objects.append(data_server)
-        if not str_names_only:
-            return matched_objects
-        # Gets only the processor names of the matched objects
-        matched_names = [match.processor_name for match in matched_objects]
-        if not unique_only:
-            return matched_names
-        list_matched = list(dict.fromkeys(matched_names))
-        if not sort:
-            # Removes any duplicate entries from matched names
-            return list_matched
-        list_matched.sort()
-        return list_matched
-
-    def resolve_processor_server_url(self, processor_name) -> str:
-        processor_server_url = ''
-        for data_host in self.data_hosts:
-            processor_server_url = data_host.resolve_processor_server_url(processor_name=processor_name)
-        return processor_server_url
-
-    def deploy_network_agents(self, mongodb_url: str, rabbitmq_url: str) -> None:
-        self.log.debug("Deploying processing workers/processor servers...")
+    def deploy_workers(self, mongodb_url: str, rabbitmq_url: str) -> None:
+        self.log.debug("Deploying processing workers...")
         for host_data in self.data_hosts:
-            host_data.deploy_network_agents(logger=self.log, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
+            host_data.deploy_workers(logger=self.log, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
 
-    def stop_network_agents(self) -> None:
-        self.log.debug("Stopping processing workers/processor servers...")
+    def stop_workers(self) -> None:
+        self.log.debug("Stopping processing workers...")
         for host_data in self.data_hosts:
-            host_data.stop_network_agents(logger=self.log)
+            host_data.stop_workers(logger=self.log)
 
     def deploy_rabbitmq(self) -> str:
         self.data_queue.deploy_rabbitmq(self.log)
@@ -137,7 +63,7 @@ def stop_all(self) -> None:
         If RabbitMQ server is stopped before stopping Processing Workers that may have
         a bad outcome and leave Processing Workers in an unpredictable state.
         """
-        self.stop_network_agents()
+        self.stop_workers()
         self.stop_mongodb()
         self.stop_rabbitmq()
 
diff --git a/src/ocrd_network/runtime_data/hosts.py b/src/ocrd_network/runtime_data/hosts.py
index 176afb991d..847d94de84 100644
--- a/src/ocrd_network/runtime_data/hosts.py
+++ b/src/ocrd_network/runtime_data/hosts.py
@@ -1,9 +1,9 @@
 from logging import Logger
 from time import sleep
-from typing import Dict, List, Union
+from typing import Dict, List
 
 from .connection_clients import create_docker_client, create_ssh_client
-from .network_agents import AgentType, DataNetworkAgent, DataProcessingWorker, DataProcessorServer, DeployType
+from .network_agents import DataProcessingWorker, DeployType
 
 
 class DataHost:
@@ -24,68 +24,39 @@ def __init__(
         self.ssh_client = None
         self.docker_client = None
 
-        # Time to wait between deploying agents
-        self.wait_between_agent_deploys: float = 0.3
+        # Time to wait between deploying single workers
+        self.wait_between_deploys: float = 0.3
 
-        # Lists of network agents based on their agent and deployment type
-        self.network_agents_worker_native = []
-        self.network_agents_worker_docker = []
-        self.network_agents_server_native = []
-        self.network_agents_server_docker = []
+        # Lists of Processing Workers based on their deployment type
+        self.workers_native = []
+        self.workers_docker = []
 
         if not workers:
             workers = []
         if not servers:
             servers = []
 
-        self.__parse_network_agents_workers(processing_workers=workers)
-        self.__parse_network_agents_servers(processor_servers=servers)
+        self.__parse_workers(processing_workers=workers)
 
-        # Used for caching deployed Processor Servers' ports on the current host
-        # Key: processor_name, Value: list of ports
-        self.processor_servers_ports: dict = {}
+    def __append_workers_to_lists(self, worker_data: DataProcessingWorker) -> None:
+        if worker_data.deploy_type != DeployType.DOCKER and worker_data.deploy_type != DeployType.NATIVE:
+            raise ValueError(f"Processing Worker deploy type is unknown: {worker_data.deploy_type}")
 
-    def __add_deployed_agent_server_port_to_cache(self, processor_name: str, port: int) -> None:
-        if processor_name not in self.processor_servers_ports:
-            self.processor_servers_ports[processor_name] = [port]
-            return
-        self.processor_servers_ports[processor_name] = self.processor_servers_ports[processor_name].append(port)
-
-    def __append_network_agent_to_lists(self, agent_data: DataNetworkAgent) -> None:
-        if agent_data.deploy_type != DeployType.DOCKER and agent_data.deploy_type != DeployType.NATIVE:
-            raise ValueError(f"Network agent deploy type is unknown: {agent_data.deploy_type}")
-        if agent_data.agent_type != AgentType.PROCESSING_WORKER and agent_data.agent_type != AgentType.PROCESSOR_SERVER:
-            raise ValueError(f"Network agent type is unknown: {agent_data.agent_type}")
-
-        if agent_data.deploy_type == DeployType.NATIVE:
+        if worker_data.deploy_type == DeployType.NATIVE:
             self.needs_ssh_connector = True
-            if agent_data.agent_type == AgentType.PROCESSING_WORKER:
-                self.network_agents_worker_native.append(agent_data)
-            if agent_data.agent_type == AgentType.PROCESSOR_SERVER:
-                self.network_agents_server_native.append(agent_data)
-        if agent_data.deploy_type == DeployType.DOCKER:
+            self.workers_native.append(worker_data)
+        if worker_data.deploy_type == DeployType.DOCKER:
             self.needs_docker_connector = True
-            if agent_data.agent_type == AgentType.PROCESSING_WORKER:
-                self.network_agents_worker_docker.append(agent_data)
-            if agent_data.agent_type == AgentType.PROCESSOR_SERVER:
-                self.network_agents_server_docker.append(agent_data)
-
-    def __parse_network_agents_servers(self, processor_servers: List[Dict]):
-        for server in processor_servers:
-            server_data = DataProcessorServer(
-                processor_name=server["name"], deploy_type=server["deploy_type"], host=self.host,
-                port=int(server["port"]), init_by_config=True, pid=None
-            )
-            self.__append_network_agent_to_lists(agent_data=server_data)
+            self.workers_docker.append(worker_data)
 
-    def __parse_network_agents_workers(self, processing_workers: List[Dict]):
+    def __parse_workers(self, processing_workers: List[Dict]):
         for worker in processing_workers:
             worker_data = DataProcessingWorker(
                 processor_name=worker["name"], deploy_type=worker["deploy_type"], host=self.host,
                 init_by_config=True, pid=None
             )
             for _ in range(int(worker["number_of_instance"])):
-                self.__append_network_agent_to_lists(agent_data=worker_data)
+                self.__append_workers_to_lists(worker_data=worker_data)
 
     def create_connection_client(self, client_type: str):
         if client_type not in ["docker", "ssh"]:
@@ -97,15 +68,14 @@ def create_connection_client(self, client_type: str):
             self.docker_client = create_docker_client(self.host, self.username, self.password, self.keypath)
             return self.docker_client
 
-    def __deploy_network_agent(
-        self, logger: Logger, agent_data: Union[DataProcessorServer, DataProcessingWorker],
+    def __deploy_single_worker(
+        self, logger: Logger, worker_data: DataProcessingWorker,
         mongodb_url: str, rabbitmq_url: str
     ) -> None:
-        deploy_type = agent_data.deploy_type
-        agent_type = agent_data.agent_type
-        name = agent_data.processor_name
-        agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}, host: {self.host}"
-        logger.info(f"Deploying {agent_info}")
+        deploy_type = worker_data.deploy_type
+        name = worker_data.processor_name
+        worker_info = f"Processing Worker, deploy: {deploy_type}, name: {name}, host: {self.host}"
+        logger.info(f"Deploying {worker_info}")
 
         connection_client = None
         if deploy_type == DeployType.NATIVE:
@@ -115,44 +85,29 @@ def __deploy_network_agent(
             assert self.docker_client, "Docker client connection missing."
             connection_client = self.docker_client
 
-        if agent_type == AgentType.PROCESSING_WORKER:
-            agent_data.deploy_network_agent(logger, connection_client, mongodb_url, rabbitmq_url)
-        if agent_type == AgentType.PROCESSOR_SERVER:
-            agent_data.deploy_network_agent(logger, connection_client, mongodb_url)
-
-        sleep(self.wait_between_agent_deploys)
+        worker_data.deploy_network_agent(logger, connection_client, mongodb_url, rabbitmq_url)
+        sleep(self.wait_between_deploys)
 
-    def __deploy_network_agents_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
+    def __deploy_all_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
         logger.info(f"Deploying processing workers on host: {self.host}")
-        amount_workers = len(self.network_agents_worker_native) + len(self.network_agents_worker_docker)
+        amount_workers = len(self.workers_native) + len(self.workers_docker)
         if not amount_workers:
             logger.info("No processing workers found to be deployed")
-        for data_worker in self.network_agents_worker_native:
-            self.__deploy_network_agent(logger, data_worker, mongodb_url, rabbitmq_url)
-        for data_worker in self.network_agents_worker_docker:
-            self.__deploy_network_agent(logger, data_worker, mongodb_url, rabbitmq_url)
-
-    def __deploy_network_agents_servers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str):
-        logger.info(f"Deploying processor servers on host: {self.host}")
-        amount_servers = len(self.network_agents_server_native) + len(self.network_agents_server_docker)
-        if not amount_servers:
-            logger.info("No processor servers found to be deployed")
-        for data_server in self.network_agents_server_native:
-            self.__deploy_network_agent(logger, data_server, mongodb_url, rabbitmq_url)
-            self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
-        for data_server in self.network_agents_server_docker:
-            self.__deploy_network_agent(logger, data_server, mongodb_url, rabbitmq_url)
-            self.__add_deployed_agent_server_port_to_cache(data_server.processor_name, data_server.port)
-
-    def deploy_network_agents(self, logger: Logger, mongodb_url: str, rabbitmq_url: str) -> None:
+        for data_worker in self.workers_native:
+            self.__deploy_single_worker(logger, data_worker, mongodb_url, rabbitmq_url)
+        for data_worker in self.workers_docker:
+            self.__deploy_single_worker(logger, data_worker, mongodb_url, rabbitmq_url)
+
+    def deploy_workers(self, logger: Logger, mongodb_url: str, rabbitmq_url: str) -> None:
         if self.needs_ssh_connector and not self.ssh_client:
             logger.debug("Creating missing ssh connector before deploying")
             self.ssh_client = self.create_connection_client(client_type="ssh")
         if self.needs_docker_connector:
             logger.debug("Creating missing docker connector before deploying")
             self.docker_client = self.create_connection_client(client_type="docker")
-        self.__deploy_network_agents_workers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
-        self.__deploy_network_agents_servers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
+
+        self.__deploy_all_workers(logger=logger, mongodb_url=mongodb_url, rabbitmq_url=rabbitmq_url)
+
         if self.ssh_client:
             self.ssh_client.close()
             self.ssh_client = None
@@ -160,13 +115,13 @@ def deploy_network_agents(self, logger: Logger, mongodb_url: str, rabbitmq_url:
             self.docker_client.close()
             self.docker_client = None
 
-    def __stop_network_agent(self, logger: Logger, name: str, deploy_type: DeployType, agent_type: AgentType, pid: str):
-        agent_info = f"network agent: {agent_type}, deploy: {deploy_type}, name: {name}"
+    def __stop_worker(self, logger: Logger, name: str, deploy_type: DeployType, pid: str):
+        worker_info = f"Processing Worker: deploy: {deploy_type}, name: {name}"
         if not pid:
-            logger.warning(f"No pid was passed for {agent_info}")
+            logger.warning(f"No pid was passed for {worker_info}")
             return
-        agent_info += f", pid: {pid}"
-        logger.info(f"Stopping {agent_info}")
+        worker_info += f", pid: {pid}"
+        logger.info(f"Stopping {worker_info}")
         if deploy_type == DeployType.NATIVE:
             assert self.ssh_client, "SSH client connection missing"
             self.ssh_client.exec_command(f"kill {pid}")
@@ -174,52 +129,28 @@ def __stop_network_agent(self, logger: Logger, name: str, deploy_type: DeployTyp
             assert self.docker_client, "Docker client connection missing"
             self.docker_client.containers.get(pid).stop()
 
-    def __stop_network_agents_workers(self, logger: Logger):
-        logger.info(f"Stopping processing workers on host: {self.host}")
-        amount_workers = len(self.network_agents_worker_native) + len(self.network_agents_worker_docker)
-        if not amount_workers:
-            logger.warning("No active processing workers to be stopped.")
-        for worker in self.network_agents_worker_native:
-            self.__stop_network_agent(logger, worker.processor_name, worker.deploy_type, worker.agent_type, worker.pid)
-        self.network_agents_worker_native = []
-        for worker in self.network_agents_worker_docker:
-            self.__stop_network_agent(logger, worker.processor_name, worker.deploy_type, worker.agent_type, worker.pid)
-        self.network_agents_worker_docker = []
-
-    def __stop_network_agents_servers(self, logger: Logger):
-        logger.info(f"Stopping processor servers on host: {self.host}")
-        amount_servers = len(self.network_agents_server_native) + len(self.network_agents_server_docker)
-        if not amount_servers:
-            logger.warning("No active processor servers to be stopped.")
-        for server in self.network_agents_server_native:
-            self.__stop_network_agent(logger, server.processor_name, server.deploy_type, server.agent_type, server.pid)
-        self.network_agents_server_native = []
-        for server in self.network_agents_server_docker:
-            self.__stop_network_agent(logger, server.processor_name, server.deploy_type, server.agent_type, server.pid)
-        self.network_agents_server_docker = []
-
-    def stop_network_agents(self, logger: Logger):
+    def stop_workers(self, logger: Logger):
         if self.needs_ssh_connector and not self.ssh_client:
             logger.debug("Creating missing ssh connector before stopping")
             self.ssh_client = self.create_connection_client(client_type="ssh")
         if self.needs_docker_connector and not self.docker_client:
             logger.debug("Creating missing docker connector before stopping")
             self.docker_client = self.create_connection_client(client_type="docker")
-        self.__stop_network_agents_workers(logger=logger)
-        self.__stop_network_agents_servers(logger=logger)
+
+        logger.info(f"Stopping processing workers on host: {self.host}")
+        amount_workers = len(self.workers_native) + len(self.workers_docker)
+        if not amount_workers:
+            logger.warning("No active processing workers to be stopped.")
+        for worker in self.workers_native:
+            self.__stop_worker(logger, worker.processor_name, worker.deploy_type, worker.pid)
+        self.workers_native = []
+        for worker in self.workers_docker:
+            self.__stop_worker(logger, worker.processor_name, worker.deploy_type, worker.pid)
+        self.workers_docker = []
+
         if self.ssh_client:
             self.ssh_client.close()
             self.ssh_client = None
         if self.docker_client:
             self.docker_client.close()
             self.docker_client = None
-
-    def resolve_processor_server_url(self, processor_name: str) -> str:
-        processor_server_url = ''
-        for data_server in self.network_agents_server_docker:
-            if data_server.processor_name == processor_name:
-                processor_server_url = f"http://{self.host}:{data_server.port}/"
-        for data_server in self.network_agents_server_native:
-            if data_server.processor_name == processor_name:
-                processor_server_url = f"http://{self.host}:{data_server.port}/"
-        return processor_server_url
diff --git a/src/ocrd_network/runtime_data/network_agents.py b/src/ocrd_network/runtime_data/network_agents.py
index 742f30309d..735b3c2c1d 100644
--- a/src/ocrd_network/runtime_data/network_agents.py
+++ b/src/ocrd_network/runtime_data/network_agents.py
@@ -2,14 +2,15 @@
 from typing import Any
 
 from re import search as re_search
-from ..constants import AgentType, DeployType
+from ..constants import DeployType
 
 
 # TODO: Find appropriate replacement for the hack
 def deploy_agent_native_get_pid_hack(logger: Logger, ssh_client, start_cmd: str):
     channel = ssh_client.invoke_shell()
     stdin, stdout = channel.makefile("wb"), channel.makefile("rb")
-    logger.debug(f"Executing command: {start_cmd}")
+    # TODO: set back to debug
+    logger.info(f"Executing command: {start_cmd}")
 
     # TODO: This hack should still be fixed
     #   Note left from @joschrew
@@ -40,14 +41,13 @@ def deploy_agent_docker_template(logger: Logger, docker_client, start_cmd: str):
 
 class DataNetworkAgent:
     def __init__(
-        self, processor_name: str, deploy_type: DeployType, agent_type: AgentType,
+        self, processor_name: str, deploy_type: DeployType,
         host: str, init_by_config: bool, pid: Any = None
     ) -> None:
         self.processor_name = processor_name
         self.deploy_type = deploy_type
         self.host = host
         self.deployed_by_config = init_by_config
-        self.agent_type = agent_type
         # The id is assigned when the agent is deployed
         self.pid = pid
 
@@ -69,13 +69,13 @@ def __init__(
         self, processor_name: str, deploy_type: DeployType, host: str, init_by_config: bool, pid: Any = None
     ) -> None:
         super().__init__(
-            processor_name=processor_name, host=host, deploy_type=deploy_type, agent_type=AgentType.PROCESSING_WORKER,
+            processor_name=processor_name, host=host, deploy_type=deploy_type,
             init_by_config=init_by_config, pid=pid
         )
 
     def deploy_network_agent(self, logger: Logger, connector_client, database_url: str, queue_url: str):
         if self.deploy_type == DeployType.NATIVE:
-            start_cmd = f"{self.processor_name} {self.agent_type} --database {database_url} --queue {queue_url} &"
+            start_cmd = f"{self.processor_name} --database {database_url} --queue {queue_url} &"
             self.pid = self._start_native_instance(logger, connector_client, start_cmd)
             return self.pid
         if self.deploy_type == DeployType.DOCKER:
@@ -84,27 +84,3 @@ def deploy_network_agent(self, logger: Logger, connector_client, database_url: s
             self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
             return self.pid
         raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
-
-
-class DataProcessorServer(DataNetworkAgent):
-    def __init__(
-        self, processor_name: str, deploy_type: DeployType, host: str, port: int, init_by_config: bool, pid: Any = None
-    ) -> None:
-        super().__init__(
-            processor_name=processor_name, host=host, deploy_type=deploy_type, agent_type=AgentType.PROCESSOR_SERVER,
-            init_by_config=init_by_config, pid=pid
-        )
-        self.port = port
-
-    def deploy_network_agent(self, logger: Logger, connector_client, database_url: str):
-        agent_address = f"{self.host}:{self.port}"
-        if self.deploy_type == DeployType.NATIVE:
-            start_cmd = f"{self.processor_name} {self.agent_type} --address {agent_address} --database {database_url} &"
-            self.pid = self._start_native_instance(logger, connector_client, start_cmd)
-            return self.pid
-        if self.deploy_type == DeployType.DOCKER:
-            # TODO: add real command to start processor server in docker here
-            start_cmd = ""
-            self.pid = self._start_docker_instance(logger, connector_client, start_cmd)
-            return self.pid
-        raise RuntimeError(f"Unknown deploy type of {self.__dict__}")
diff --git a/src/ocrd_network/server_utils.py b/src/ocrd_network/server_utils.py
index 7485a65dc2..0f12988571 100644
--- a/src/ocrd_network/server_utils.py
+++ b/src/ocrd_network/server_utils.py
@@ -124,50 +124,6 @@ async def _get_processor_job_log(logger: Logger, job_id: str) -> FileResponse:
     return FileResponse(path=log_file_path, filename=log_file_path.name)
 
 
-def request_processor_server_tool_json(logger: Logger, processor_server_base_url: str) -> Dict:
-    # Request the ocrd tool json from the Processor Server
-    try:
-        response = requests_get(
-            urljoin(base=processor_server_base_url, url="info"),
-            headers={"Content-Type": "application/json"}
-        )
-    except Exception as error:
-        message = f"Failed to retrieve ocrd tool json from: {processor_server_base_url}"
-        raise_http_exception(logger, status.HTTP_404_NOT_FOUND, message, error)
-    if response.status_code != 200:
-        message = f"Failed to retrieve tool json from: {processor_server_base_url}, code: {response.status_code}"
-        raise_http_exception(logger, status.HTTP_404_NOT_FOUND, message)
-    return response.json()
-
-
-async def forward_job_to_processor_server(
-    logger: Logger, job_input: PYJobInput, processor_server_base_url: str
-) -> PYJobOutput:
-    try:
-        json_data = dumps(job_input.dict(exclude_unset=True, exclude_none=True))
-    except Exception as error:
-        message = f"Failed to json dump the PYJobInput: {job_input}"
-        raise_http_exception(logger, status.HTTP_500_INTERNAL_SERVER_ERROR, message, error)
-
-    # TODO: The amount of pages should come as a request input
-    # TODO: cf https://github.com/OCR-D/core/pull/1030/files#r1152551161
-    #  currently, use 200 as a default
-    request_timeout = calculate_processing_request_timeout(amount_pages=200, timeout_per_page=20.0)
-
-    # Post a processing job to the Processor Server asynchronously
-    async with AsyncClient(timeout=Timeout(timeout=request_timeout, connect=30.0)) as client:
-        response = await client.post(
-            urljoin(base=processor_server_base_url, url="run"),
-            headers={"Content-Type": "application/json"},
-            json=loads(json_data)
-        )
-    if response.status_code != 202:
-        message = f"Failed to post '{job_input.processor_name}' job to: {processor_server_base_url}"
-        raise_http_exception(logger, status.HTTP_500_INTERNAL_SERVER_ERROR, message)
-    job_output = response.json()
-    return job_output
-
-
 async def get_workflow_content(logger: Logger, workflow_id: str, workflow: Union[UploadFile, str, None]) -> str:
     if not workflow and not workflow_id:
         message = "Either 'workflow' must be uploaded as a file or 'workflow_id' must be provided. Both are missing."
diff --git a/src/ocrd_utils/config.py b/src/ocrd_utils/config.py
index 402644af4a..c1aeb00c8e 100644
--- a/src/ocrd_utils/config.py
+++ b/src/ocrd_utils/config.py
@@ -142,15 +142,14 @@ def raw_value(self, name):
 
 config.add('OCRD_MAX_PROCESSOR_CACHE',
            description="Maximum number of processor instances (for each set of parameters) to be kept in memory "
-           "(including loaded models) for processing workers or processor servers.",
+           "(including loaded models) for processing workers.",
            parser=int,
            default=(True, 128))
 
 config.add('OCRD_MAX_PARALLEL_PAGES',
-           description="Maximum number of processor workers for page-parallel processing "
-           "(within each Processor's selected page range, independent of the number "
-           "of Processing Workers or Processor Servers). If set >1, then a METS Server "
-           "must be used for METS synchronisation.",
+           description="Maximum number of processor workers for page-parallel processing (within "
+           "each Processor's selected page range, independent of the number of Processing "
+           "Workers). If set >1, then a METS Server must be used for METS synchronisation.",
            parser=int,
            default=(True, 1))
 
diff --git a/src/ocrd_validators/processing_server_config.schema.yml b/src/ocrd_validators/processing_server_config.schema.yml
index 934ce0c132..d90404d6a3 100644
--- a/src/ocrd_validators/processing_server_config.schema.yml
+++ b/src/ocrd_validators/processing_server_config.schema.yml
@@ -68,16 +68,12 @@ properties:
       required:
         - address
         - username
+        - workers
       oneOf:
         - required:
             - password
         - required:
             - path_to_privkey
-      anyOf:
-        - required:
-            - workers
-        - required:
-            - servers
       properties:
         address:
           description: The IP address or domain name of the target machine
@@ -118,34 +114,6 @@ properties:
                   - native
                   - docker
                 default: native
-        servers:
-          description: List of processor servers that will be deployed
-          type: array
-          minItems: 1
-          items:
-            type: object
-            additionalProperties: false
-            required:
-              - name
-              - port
-            properties:
-              name:
-                description: Name of the processor
-                type: string
-                pattern: "^ocrd-.*$"
-                examples:
-                  - ocrd-cis-ocropy-binarize
-                  - ocrd-olena-binarize
-              deploy_type:
-                description: Should the processor server be deployed natively or with Docker
-                type: string
-                enum:
-                  - native
-                  - docker
-                default: native
-              port:
-                description: The port number to be deployed on the host
-                $ref: "#/$defs/port"
 
 $defs:
   address:
diff --git a/tests/network/config.py b/tests/network/config.py
index 611ad63821..97ffc63c0d 100644
--- a/tests/network/config.py
+++ b/tests/network/config.py
@@ -15,8 +15,8 @@
 test_config.add(
     name='OCRD_MAX_PROCESSOR_CACHE',
     description="""
-    Maximum number of processor instances (for each set of parameters) to be kept in memory (including loaded models) 
-    for processing workers or processor servers.
+    Maximum number of processor instances (for each set of parameters) to be kept in memory (including loaded models)
+    for processing workers
     """,
     parser=int,
     default=(True, 128)
@@ -97,7 +97,7 @@
 test_config.add(
     name="OCRD_NETWORK_RABBITMQ_HEARTBEAT",
     description="""
-    Controls AMQP heartbeat timeout (in seconds) negotiation during connection tuning. An integer value always overrides the value 
+    Controls AMQP heartbeat timeout (in seconds) negotiation during connection tuning. An integer value always overrides the value
     proposed by broker. Use 0 to deactivate heartbeat.
     """,
     parser=int,
diff --git a/tests/network/fixtures_processing_requests.py b/tests/network/fixtures_processing_requests.py
index 3afaf74711..9a8141f812 100644
--- a/tests/network/fixtures_processing_requests.py
+++ b/tests/network/fixtures_processing_requests.py
@@ -1,5 +1,4 @@
 from pytest import fixture
-from src.ocrd_network.constants import AgentType
 from src.ocrd_network.models import PYJobInput
 
 
@@ -10,7 +9,6 @@ def fixture_processing_request_1() -> PYJobInput:
         path_to_mets=workspace_key,
         input_file_grps=["DEFAULT"],
         output_file_grps=["OCR-D-BIN"],
-        agent_type=AgentType.PROCESSING_WORKER,
         page_id="PHYS_0001..PHYS_0003",
         parameters={}
     )
diff --git a/tests/network/test_integration_5_processing_server.py b/tests/network/test_integration_5_processing_server.py
index bf5fadee3c..9d14411797 100644
--- a/tests/network/test_integration_5_processing_server.py
+++ b/tests/network/test_integration_5_processing_server.py
@@ -3,7 +3,7 @@
 from src.ocrd_network.client_utils import (
     poll_job_status_till_timeout_fail_or_success, poll_wf_status_till_timeout_fail_or_success,
     post_ps_processing_request, post_ps_workflow_request)
-from src.ocrd_network.constants import AgentType, JobState
+from src.ocrd_network.constants import JobState
 from src.ocrd_network.logging_utils import get_processing_job_logging_file_path
 from tests.base import assets
 from tests.network.config import test_config
@@ -39,7 +39,6 @@ def test_processing_server_processing_request():
         "path_to_mets": path_to_mets,
         "input_file_grps": [input_file_grp],
         "output_file_grps": [output_file_grp],
-        "agent_type": AgentType.PROCESSING_WORKER,
         "parameters": {}
     }
     test_processor = "ocrd-dummy"
diff --git a/tests/network/test_integration_6_client.py b/tests/network/test_integration_6_client.py
index 1a693ed0b1..83ff010238 100644
--- a/tests/network/test_integration_6_client.py
+++ b/tests/network/test_integration_6_client.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from src.ocrd_network.constants import AgentType, JobState
+from src.ocrd_network.constants import JobState
 from tests.base import assets
 from tests.network.config import test_config
 from ocrd_network.client import Client
@@ -19,7 +19,6 @@ def test_client_processing_processor():
         "input_file_grps": ["OCR-D-IMG"],
         "output_file_grps": ["OCR-D-DUMMY-TEST-CLIENT"],
         "parameters": {},
-        "agent_type": AgentType.PROCESSING_WORKER
     }
     processing_job_id = client.send_processing_job_request(processor_name="ocrd-dummy", req_params=req_params)
     assert processing_job_id
diff --git a/tests/network/test_modules_logging_utils.py b/tests/network/test_modules_logging_utils.py
index 530b501e05..c53d4d864d 100644
--- a/tests/network/test_modules_logging_utils.py
+++ b/tests/network/test_modules_logging_utils.py
@@ -18,10 +18,6 @@ def test_root_logging_dir_mets_servers():
     root_logging_dir(module_name=NetworkLoggingDirs.METS_SERVERS)
 
 
-def test_root_logging_dir_processor_servers():
-    root_logging_dir(module_name=NetworkLoggingDirs.PROCESSOR_SERVERS)
-
-
 def test_root_logging_dir_processing_workers():
     root_logging_dir(module_name=NetworkLoggingDirs.PROCESSING_WORKERS)
 

From 3b57aa740bdc0cb89412bf586b4f39b7c9388fe4 Mon Sep 17 00:00:00 2001
From: joschrew <91774427+joschrew@users.noreply.github.com>
Date: Mon, 8 Sep 2025 10:23:33 +0200
Subject: [PATCH 2/5] Set native as default deploy_type in deployer code

In the schema.yml native is declared as default deploy_type which was
not implemented in the python-code yet
---
 src/ocrd_network/runtime_data/hosts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ocrd_network/runtime_data/hosts.py b/src/ocrd_network/runtime_data/hosts.py
index 847d94de84..033521aad4 100644
--- a/src/ocrd_network/runtime_data/hosts.py
+++ b/src/ocrd_network/runtime_data/hosts.py
@@ -52,8 +52,8 @@ def __append_workers_to_lists(self, worker_data: DataProcessingWorker) -> None:
     def __parse_workers(self, processing_workers: List[Dict]):
         for worker in processing_workers:
             worker_data = DataProcessingWorker(
-                processor_name=worker["name"], deploy_type=worker["deploy_type"], host=self.host,
-                init_by_config=True, pid=None
+                processor_name=worker["name"], deploy_type=worker.get("deploy_type", "native"),
+                host=self.host, init_by_config=True, pid=None
             )
             for _ in range(int(worker["number_of_instance"])):
                 self.__append_workers_to_lists(worker_data=worker_data)

From ec49540a8f5e9e2cb9676dac5f7b9991a708846c Mon Sep 17 00:00:00 2001
From: joschrew <91774427+joschrew@users.noreply.github.com>
Date: Mon, 8 Sep 2025 15:03:19 +0200
Subject: [PATCH 3/5] Implement ProcessingServers list-processors

---
 src/ocrd_network/processing_server.py       |  6 ++----
 src/ocrd_network/rabbitmq_utils/__init__.py |  2 ++
 src/ocrd_network/rabbitmq_utils/helpers.py  | 23 +++++++++++++++++++++
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/ocrd_network/processing_server.py b/src/ocrd_network/processing_server.py
index e89eef89ab..bdc16144dd 100644
--- a/src/ocrd_network/processing_server.py
+++ b/src/ocrd_network/processing_server.py
@@ -34,6 +34,7 @@
 from .rabbitmq_utils import (
     check_if_queue_exists,
     connect_rabbitmq_publisher,
+    get_message_queues,
     OcrdProcessingMessage
 )
 from .server_cache import CacheLockedPages, CacheProcessingRequests
@@ -606,10 +607,7 @@ async def remove_job_from_request_cache(self, result_message: PYResultMessage):
         await self.push_cached_jobs_to_workers(processing_jobs=consumed_cached_jobs)
 
     async def list_processors(self) -> List[str]:
-        # There is no caching on the Processing Server side
-        # TODO: Implement: Get all existing queues (every time new request to get them) and derive
-        # correct ProcessorNames
-        return []
+        return get_message_queues(self.log, self.rmq_data)
 
     async def task_sequence_to_processing_jobs(
         self,
diff --git a/src/ocrd_network/rabbitmq_utils/__init__.py b/src/ocrd_network/rabbitmq_utils/__init__.py
index 93a8249ef6..71df04317b 100644
--- a/src/ocrd_network/rabbitmq_utils/__init__.py
+++ b/src/ocrd_network/rabbitmq_utils/__init__.py
@@ -3,6 +3,7 @@
     "connect_rabbitmq_consumer",
     "connect_rabbitmq_publisher",
     "create_message_queues",
+    "get_message_queues",
     "verify_and_parse_mq_uri",
     "verify_rabbitmq_available",
     "RMQConsumer",
@@ -19,6 +20,7 @@
     connect_rabbitmq_consumer,
     connect_rabbitmq_publisher,
     create_message_queues,
+    get_message_queues,
     verify_and_parse_mq_uri,
     verify_rabbitmq_available
 )
diff --git a/src/ocrd_network/rabbitmq_utils/helpers.py b/src/ocrd_network/rabbitmq_utils/helpers.py
index 9109385c5d..f5e2a538a3 100644
--- a/src/ocrd_network/rabbitmq_utils/helpers.py
+++ b/src/ocrd_network/rabbitmq_utils/helpers.py
@@ -4,6 +4,9 @@
 from re import match as re_match
 from time import sleep
 from typing import Dict, List, Union
+from requests import get
+from requests.auth import HTTPBasicAuth
+from requests.exceptions import RequestException, HTTPError
 
 from .constants import RABBITMQ_URI_PATTERN, RECONNECT_TRIES, RECONNECT_WAIT
 from .consumer import RMQConsumer
@@ -75,6 +78,26 @@ def create_message_queues(logger: Logger, rmq_publisher: RMQPublisher, queue_nam
         rmq_publisher.create_queue(queue_name=queue_name)
 
 
+def get_message_queues(logger: Logger, rmq_data: Dict) -> List:
+    try:
+        response = get(
+            f"http://{rmq_data['host']}:{15672}/api/queues",
+            auth=HTTPBasicAuth(rmq_data["username"], rmq_data["password"])
+        )
+        response.raise_for_status()
+        queues = response.json()
+        return [queue['name'] for queue in queues]
+    except HTTPError:
+        logger.warn(
+            f"Error requesting all queue-names from rabbitmq. Status code: {response.status_code}. "
+            f"Response-Text: {response.text}"
+        )
+        return []
+    except RequestException as e:
+        logger.warn(f"Error querying RabbitMQ API: {e}")
+        return []
+
+
 def verify_and_parse_mq_uri(rabbitmq_address: str):
     """
     Check the full list of available parameters in the docs here:

From e7dc54a7714ab8f4848b129cede36790e6fa7a48 Mon Sep 17 00:00:00 2001
From: joschrew <91774427+joschrew@users.noreply.github.com>
Date: Tue, 9 Sep 2025 16:08:31 +0200
Subject: [PATCH 4/5] Fix test for list-processors

---
 tests/network/test_integration_5_processing_server.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/network/test_integration_5_processing_server.py b/tests/network/test_integration_5_processing_server.py
index 9d14411797..48ae86af92 100644
--- a/tests/network/test_integration_5_processing_server.py
+++ b/tests/network/test_integration_5_processing_server.py
@@ -20,14 +20,12 @@ def test_processing_server_connectivity():
     assert message.startswith("The home page of"), f"Processing server home page message is corrupted"
 
 
-# TODO: The processing workers are still not registered when deployed separately.
-#  Fix that by extending the processing server.
 def test_processing_server_deployed_processors():
     test_url = f"{PROCESSING_SERVER_URL}/processor"
     response = request_get(test_url)
     processors = response.json()
     assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}"
-    assert processors == [], f"Mismatch in deployed processors"
+    assert "ocrd-dummy" in processors
 
 
 def test_processing_server_processing_request():

From ee87c2fcc5b8c26558a5ad83f7df5a2118b111cc Mon Sep 17 00:00:00 2001
From: joschrew <91774427+joschrew@users.noreply.github.com>
Date: Wed, 24 Sep 2025 12:11:37 +0200
Subject: [PATCH 5/5] Reset the accidentally modified ocrd-all-tool.json

---
 src/ocrd/ocrd-all-tool.json | 1743 +----------------------------------
 1 file changed, 42 insertions(+), 1701 deletions(-)

diff --git a/src/ocrd/ocrd-all-tool.json b/src/ocrd/ocrd-all-tool.json
index 1a8b1cec41..5f27d9a1ae 100644
--- a/src/ocrd/ocrd-all-tool.json
+++ b/src/ocrd/ocrd-all-tool.json
@@ -1,1704 +1,45 @@
 {
-  "ocrd-dummy": {
-    "executable": "ocrd-dummy",
-    "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group",
-    "steps": [
-      "preprocessing/optimization"
-    ],
-    "categories": [
-      "Image preprocessing"
-    ],
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "parameters": {
-      "copy_files": {
-        "type": "boolean",
-        "default": false,
-        "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)"
-      }
-    }
-  },
-  "ocrd-filter": {
-    "executable": "ocrd-filter",
-    "description": "Bare-bones processor can be dynamically configured to remove segments based on XPath queries",
-    "steps": [
-      "recognition/post-correction"
-    ],
-    "categories": [
-      "Quality assurance"
-    ],
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "parameters": {
-      "select": {
-        "type": "string",
-        "default": "//*[ends-with(local-name(),'Region')]",
-        "description": "Which segments to select for removal. An XPath 2.0 query expression (path and optional predicates), with 'pc' as namespace prefix for PAGE-XML and our extension functions (see help text). Only selection of segment hierarchy elements is allowed (so e.g. `*` would be equivalent to `pc:NoiseRegion|pc:LineDrawingRegion|pc:AdvertRegion|pc:ImageRegion|pc:ChartRegion|pc:MusicRegion|pc:GraphicRegion|pc:UnknownRegion|pc:CustomRegion|pc:SeparatorRegion|pc:MathsRegion|pc:TextRegion|pc:MapRegion|pc:ChemRegion|pc:TableRegion|pc:TextLine|pc:Word|pc:Glyph`, but `pc:MetadataItem` or `pc:Border` or `pc:Coords` would not match).\nFor example, to remove words or glyphs with low text confidence, select '(pc:Word|pc:Glyph)[pc:TextEquiv/@conf < 0.7]'. Or low layout confidence, '*[pc:Coords/@conf < 0.7]'.\nTo remove high pixel-to-character rate, select '*[pc:pixelarea(.) div string-length(pc:textequiv(.)) > 10000]'."
-      },
-      "plot": {
-        "type": "boolean",
-        "default": false,
-        "description": "Whether to extract an image for each filtered segment and write to the output fileGrp."
-      }
-    }
-  },
-  "ocrd-tesserocr-deskew": {
-    "executable": "ocrd-tesserocr-deskew",
-    "categories": [
-      "Image preprocessing"
-    ],
-    "description": "Detect script, orientation and skew angle for pages or regions",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "preprocessing/optimization/deskewing"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "operation_level": {
-        "type": "string",
-        "enum": [
-          "page",
-          "region",
-          "line"
-        ],
-        "default": "region",
-        "description": "PAGE XML hierarchy level to operate on"
-      },
-      "min_orientation_confidence": {
-        "type": "number",
-        "format": "float",
-        "default": 1.5,
-        "description": "Minimum confidence score to apply orientation as detected by OSD"
-      }
-    }
-  },
-  "ocrd-tesserocr-fontshape": {
-    "executable": "ocrd-tesserocr-fontshape",
-    "categories": [
-      "Text recognition and optimization"
-    ],
-    "description": "Recognize font shapes (family/monospace/bold/italic) and size in segmented words with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons), annotating TextStyle",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "recognition/font-identification"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "padding": {
-        "type": "number",
-        "format": "integer",
-        "default": 0,
-        "description": "Number of background-filled pixels to add around the word image (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) on each side before recognition."
-      },
-      "model": {
-        "type": "string",
-        "format": "uri",
-        "content-type": "application/octet-stream",
-        "default": "osd",
-        "description": "tessdata model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or osd); must be an old (pre-LSTM) model"
-      }
-    }
-  },
-  "ocrd-tesserocr-recognize": {
-    "executable": "ocrd-tesserocr-recognize",
-    "categories": [
-      "Text recognition and optimization"
-    ],
-    "description": "Segment and/or recognize text with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons) on any level of the PAGE hierarchy.",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "layout/segmentation/region",
-      "layout/segmentation/line",
-      "recognition/text-recognition"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "padding": {
-        "type": "number",
-        "format": "integer",
-        "default": 0,
-        "description": "Extend detected region/cell/line/word rectangles by this many (true) pixels, or extend existing region/line/word images (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) by this many (background/white) pixels on each side before recognition."
-      },
-      "segmentation_level": {
-        "type": "string",
-        "enum": [
-          "region",
-          "cell",
-          "line",
-          "word",
-          "glyph",
-          "none"
-        ],
-        "default": "word",
-        "description": "Highest PAGE XML hierarchy level to remove existing annotation from and detect segments for (before iterating downwards); if ``none``, does not attempt any new segmentation; if ``cell``, starts at table regions, detecting text regions (cells). Ineffective when lower than ``textequiv_level``."
-      },
-      "textequiv_level": {
-        "type": "string",
-        "enum": [
-          "region",
-          "cell",
-          "line",
-          "word",
-          "glyph",
-          "none"
-        ],
-        "default": "word",
-        "description": "Lowest PAGE XML hierarchy level to re-use or detect segments for and add the TextEquiv results to (before projecting upwards); if ``none``, adds segmentation down to the glyph level, but does not attempt recognition at all; if ``cell``, stops short before text lines, adding text of text regions inside tables (cells) or on page level only."
-      },
-      "overwrite_segments": {
-        "type": "boolean",
-        "default": false,
-        "description": "If ``segmentation_level`` is not none, but an element already contains segments, remove them and segment again. Otherwise use the existing segments of that element."
-      },
-      "overwrite_text": {
-        "type": "boolean",
-        "default": true,
-        "description": "If ``textequiv_level`` is not none, but a segment already contains TextEquivs, remove them and replace with recognised text. Otherwise add new text as alternative. (Only the first entry is projected upwards.)"
-      },
-      "shrink_polygons": {
-        "type": "boolean",
-        "default": false,
-        "description": "When detecting any segments, annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols."
-      },
-      "block_polygons": {
-        "type": "boolean",
-        "default": false,
-        "description": "When detecting regions, annotate polygon coordinates instead of bounding box rectangles by querying Tesseract accordingly."
-      },
-      "find_tables": {
-        "type": "boolean",
-        "default": true,
-        "description": "When detecting regions, recognise tables as table regions (Tesseract's ``textord_tabfind_find_tables=1``)."
-      },
-      "find_staves": {
-        "type": "boolean",
-        "default": false,
-        "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves."
-      },
-      "sparse_text": {
-        "type": "boolean",
-        "default": false,
-        "description": "When detecting regions, use 'sparse text' page segmentation mode (finding as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space."
-      },
-      "raw_lines": {
-        "type": "boolean",
-        "default": false,
-        "description": "When detecting lines, do not attempt additional segmentation (baseline+xheight+ascenders/descenders prediction) on line images. Can increase accuracy for certain workflows. Disable when line segments/images may contain components of more than 1 line, or larger gaps/white-spaces."
-      },
-      "char_whitelist": {
-        "type": "string",
-        "default": "",
-        "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow exclusively; overruled by blacklist if set."
-      },
-      "char_blacklist": {
-        "type": "string",
-        "default": "",
-        "description": "When recognizing text, enumeration of character hypotheses (from the model) to suppress; overruled by unblacklist if set."
-      },
-      "char_unblacklist": {
-        "type": "string",
-        "default": "",
-        "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow inclusively."
-      },
-      "tesseract_parameters": {
-        "type": "object",
-        "default": {},
-        "description": "Dictionary of additional Tesseract runtime variables (cf. tesseract --print-parameters), string values."
-      },
-      "xpath_parameters": {
-        "type": "object",
-        "default": {},
-        "description": "Set additional Tesseract runtime variables according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) (Example: {'ancestor::TextRegion/@type=\"page-number\"': {'char_whitelist': '0123456789-'}, 'contains(@custom,\"ISBN\")': {'char_whitelist': '0123456789-'}})"
-      },
-      "xpath_model": {
-        "type": "object",
-        "default": {},
-        "description": "Prefer models mapped according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) If no queries / mappings match (or under the default empty parameter), then fall back to `model`. If there are multiple matches, combine their results. (Example: {'starts-with(@script,\"Latn\")': 'Latin', 'starts-with(@script,\"Grek\")': 'Greek', '@language=\"Latin\"': 'lat', '@language=\"Greek\"': 'grc+ell', 'ancestor::TextRegion/@type=\"page-number\"': 'eng'})"
-      },
-      "auto_model": {
-        "type": "boolean",
-        "default": false,
-        "description": "Prefer models performing best (by confidence) per segment (if multiple given in `model`). Repeats the OCR of the best model once (i.e. slower). (Use as a fallback to xpath_model if you do not trust script/language detection.)"
-      },
-      "model": {
-        "type": "string",
-        "format": "uri",
-        "content-type": "application/octet-stream",
-        "description": "The tessdata text recognition model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or Fraktur)."
-      },
-      "oem": {
-        "type": "string",
-        "enum": [
-          "TESSERACT_ONLY",
-          "LSTM_ONLY",
-          "TESSERACT_LSTM_COMBINED",
-          "DEFAULT"
-        ],
-        "default": "DEFAULT",
-        "description": "Tesseract OCR engine mode to use:\n* Run Tesseract only - fastest,\n* Run just the LSTM line recognizer. (>=v4.00),\n*Run the LSTM recognizer, but allow fallback to Tesseract when things get difficult. (>=v4.00),\n*Run both and combine results - best accuracy."
-      }
-    },
-    "resource_locations": [
-      "module"
-    ],
-    "resources": [
-      {
-        "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/Fraktur_5000000/tessdata_best/Fraktur_50000000.334_450937.traineddata",
-        "name": "Fraktur_GT4HistOCR.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model trained on GT4HistOCR",
-        "size": 1058487
-      },
-      {
-        "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/ONB/tessdata_best/ONB_1.195_300718_989100.traineddata",
-        "name": "ONB.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model based on Austrian National Library newspaper data",
-        "size": 4358948
-      },
-      {
-        "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/frak2021/tessdata_best/frak2021-0.905.traineddata",
-        "name": "frak2021.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model based on a mix of mostly German and Latin ground truth data",
-        "size": 3421140
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/equ.traineddata",
-        "name": "equ.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract legacy model for mathematical equations",
-        "size": 2251950
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/osd.traineddata",
-        "name": "osd.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract legacy model for orientation and script detection",
-        "size": 10562727
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata",
-        "name": "eng.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) English",
-        "size": 15400601
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/deu.traineddata",
-        "name": "deu.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) German",
-        "size": 8628461
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/deu_latf.traineddata",
-        "name": "deu_latf.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for historical (Fraktur typesetting and letterpress printing) German",
-        "size": 6423052
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/deu_latf.traineddata",
-        "name": "frk.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for historical German (deprecated, replaced by deu_latf)",
-        "size": 6423052
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Fraktur.traineddata",
-        "name": "Fraktur.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for historical Latin script with Fraktur typesetting (~125 characters with precomposed diacritics)",
-        "size": 17613343
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Latin.traineddata",
-        "name": "Latin.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Latin script (contemporary and historical, ~250 characters with precomposed diacritics)",
-        "size": 101402885
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Arabic.traineddata",
-        "name": "Arabic.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Arabic script",
-        "size": 17095279
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Armenian.traineddata",
-        "name": "Armenian.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Armenian script",
-        "size": 18908681
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Bengali.traineddata",
-        "name": "Bengali.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Bengali script (a Brahmic script)",
-        "size": 16711376
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Canadian_Aboriginal.traineddata",
-        "name": "Canadian_Aboriginal.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Canadian Aboriginal script",
-        "size": 15184388
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Cherokee.traineddata",
-        "name": "Cherokee.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Cherokee script",
-        "size": 7126553
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Cyrillic.traineddata",
-        "name": "Cyrillic.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Cyrillic script",
-        "size": 36730735
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Devanagari.traineddata",
-        "name": "Devanagari.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Devanagari script (a Brahmic script)",
-        "size": 28561664
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Ethiopic.traineddata",
-        "name": "Ethiopic.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Ethiopic script (Ge\u02bdez script)",
-        "size": 11211460
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Georgian.traineddata",
-        "name": "Georgian.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Georgian script",
-        "size": 13806109
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Greek.traineddata",
-        "name": "Greek.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Greek script (~210 characters + basic Latin)",
-        "size": 10634749
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Gujarati.traineddata",
-        "name": "Gujarati.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Gujarati script (a Brahmic script)",
-        "size": 7074537
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Gurmukhi.traineddata",
-        "name": "Gurmukhi.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Gurmukhi script (a Brahmic script)",
-        "size": 11642032
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Hangul.traineddata",
-        "name": "Hangul.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Hangul script (horizontal writing)",
-        "size": 12225308
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Hangul_vert.traineddata",
-        "name": "Hangul_vert.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Hangul script (vertical writing)",
-        "size": 16522140
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/HanS.traineddata",
-        "name": "HanS.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Han script (simplified, horizontal writing)",
-        "size": 16633038
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/HanS_vert.traineddata",
-        "name": "HanS_vert.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Han script (simplified, vertical writing)",
-        "size": 12320913
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/HanT.traineddata",
-        "name": "HanT.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Han script (traditional, horizontal writing)",
-        "size": 12344619
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/HanT_vert.traineddata",
-        "name": "HanT_vert.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Han script (traditional, vertical writing)",
-        "size": 12344866
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Hebrew.traineddata",
-        "name": "Hebrew.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Hebrew script",
-        "size": 12218204
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Japanese.traineddata",
-        "name": "Japanese.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Japanese script (Hiragana + Katakana + 2.4k Kanji + basic Latin, horizontal writing)",
-        "size": 17789735
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Japanese_vert.traineddata",
-        "name": "Japanese_vert.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Japanese script (Hiragana + Katakana + 2.4k Kanji + basic Latin, vertical writing)",
-        "size": 17789844
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Kannada.traineddata",
-        "name": "Kannada.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Kannada script (a Brahmic script)",
-        "size": 14163058
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Khmer.traineddata",
-        "name": "Khmer.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Khmer script",
-        "size": 12025463
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Lao.traineddata",
-        "name": "Lao.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Lao script",
-        "size": 17479398
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Malayalam.traineddata",
-        "name": "Malayalam.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Malayalam script (a Brahmic script)",
-        "size": 11073689
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Myanmar.traineddata",
-        "name": "Myanmar.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Myanmar script (Burmese alphabet)",
-        "size": 14664489
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Oriya.traineddata",
-        "name": "Oriya.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Oriya script (or Odia, a Brahmic script)",
-        "size": 16265327
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Sinhala.traineddata",
-        "name": "Sinhala.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Sinhala script (a Brahmic script)",
-        "size": 6842699
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Syriac.traineddata",
-        "name": "Syriac.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Syriac script (Syriac alphabet)",
-        "size": 15352617
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Tamil.traineddata",
-        "name": "Tamil.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Tamil script (a Brahmic script)",
-        "size": 18607472
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Telugu.traineddata",
-        "name": "Telugu.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Telugu script (a Brahmic script)",
-        "size": 13981001
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Thaana.traineddata",
-        "name": "Thaana.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Thaana script",
-        "size": 12783652
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Thai.traineddata",
-        "name": "Thai.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Thai script (a Brahmic script)",
-        "size": 11544984
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Tibetan.traineddata",
-        "name": "Tibetan.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Tibetan script (a Brahmic script)",
-        "size": 12523531
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/script/Vietnamese.traineddata",
-        "name": "Vietnamese.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Vietnamese script (Vietnamese alphabet)",
-        "size": 12435419
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/afr.traineddata",
-        "name": "afr.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Afrikaans",
-        "size": 12800552
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/amh.traineddata",
-        "name": "amh.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Amharic",
-        "size": 8389639
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ara.traineddata",
-        "name": "ara.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Arabic",
-        "size": 12603724
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/asm.traineddata",
-        "name": "asm.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Assamese",
-        "size": 11315350
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/aze_cyrl.traineddata",
-        "name": "aze_cyrl.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Azerbaijani (in Cyrillic script)",
-        "size": 4700277
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/aze.traineddata",
-        "name": "aze.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Azerbaijani (in Latin script)",
-        "size": 6281404
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bel.traineddata",
-        "name": "bel.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Belarusian",
-        "size": 10870278
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ben.traineddata",
-        "name": "ben.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Bengali",
-        "size": 11045427
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bod.traineddata",
-        "name": "bod.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Tibetan",
-        "size": 8623846
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bos.traineddata",
-        "name": "bos.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Bosnian",
-        "size": 5264248
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bre.traineddata",
-        "name": "bre.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Breton",
-        "size": 15640760
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/bul.traineddata",
-        "name": "bul.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Bulgarian",
-        "size": 8844613
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/cat.traineddata",
-        "name": "cat.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Catalan",
-        "size": 3802329
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ceb.traineddata",
-        "name": "ceb.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Cebuano",
-        "size": 3452674
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ces.traineddata",
-        "name": "ces.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Czech",
-        "size": 10918912
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chi_sim.traineddata",
-        "name": "chi_sim.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Mandarin Chinese (simplified, horizontal writing)",
-        "size": 13077423
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chi_sim_vert.traineddata",
-        "name": "chi_sim_vert.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Mandarin Chinese (simplified, vertical writing)",
-        "size": 13077507
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chi_tra.traineddata",
-        "name": "chi_tra.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Mandarin Chinese (traditional, horizontal writing)",
-        "size": 12985735
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chi_tra_vert.traineddata",
-        "name": "chi_tra_vert.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Mandarin Chinese (traditional, vertical writing)",
-        "size": 12985521
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/chr.traineddata",
-        "name": "chr.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Cherokee",
-        "size": 2258703
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/cos.traineddata",
-        "name": "cos.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Corsican",
-        "size": 8830216
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/cym.traineddata",
-        "name": "cym.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Welsh",
-        "size": 8750784
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/dan.traineddata",
-        "name": "dan.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Danish",
-        "size": 9758142
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/div.traineddata",
-        "name": "div.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Dhivehi",
-        "size": 4574116
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/dzo.traineddata",
-        "name": "dzo.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Dzongkha",
-        "size": 3243805
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ell.traineddata",
-        "name": "ell.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Modern Greek (1453-)",
-        "size": 8945021
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/enm.traineddata",
-        "name": "enm.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Middle English (1100-1500)",
-        "size": 13281564
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/epo.traineddata",
-        "name": "epo.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Esperanto",
-        "size": 7402169
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/est.traineddata",
-        "name": "est.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Estonian",
-        "size": 15833749
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/eus.traineddata",
-        "name": "eus.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Basque",
-        "size": 7933869
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fao.traineddata",
-        "name": "fao.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Faroese",
-        "size": 10030003
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fas.traineddata",
-        "name": "fas.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Persian",
-        "size": 3325955
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fil.traineddata",
-        "name": "fil.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Filipino",
-        "size": 8978743
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fin.traineddata",
-        "name": "fin.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Finnish",
-        "size": 14369979
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fra.traineddata",
-        "name": "fra.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for French",
-        "size": 3972885
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/frm.traineddata",
-        "name": "frm.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Middle French (ca. 1400-1600)",
-        "size": 4043005
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/fry.traineddata",
-        "name": "fry.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Western Frisian",
-        "size": 8442509
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/gla.traineddata",
-        "name": "gla.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Scottish Gaelic",
-        "size": 9599424
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/gle.traineddata",
-        "name": "gle.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Irish",
-        "size": 3942458
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/glg.traineddata",
-        "name": "glg.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Galician",
-        "size": 12709487
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/grc.traineddata",
-        "name": "grc.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Ancient Greek (to 1453)",
-        "size": 5168122
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/guj.traineddata",
-        "name": "guj.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Gujarati",
-        "size": 8515761
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hat.traineddata",
-        "name": "hat.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Haitian",
-        "size": 12128251
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/heb.traineddata",
-        "name": "heb.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Hebrew",
-        "size": 3704077
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hin.traineddata",
-        "name": "hin.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Hindi",
-        "size": 11895564
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hrv.traineddata",
-        "name": "hrv.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Croatian",
-        "size": 11195424
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hun.traineddata",
-        "name": "hun.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Hungarian",
-        "size": 12350405
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/hye.traineddata",
-        "name": "hye.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Armenian",
-        "size": 6372242
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/iku.traineddata",
-        "name": "iku.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Inuktitut",
-        "size": 6139484
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ind.traineddata",
-        "name": "ind.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Indonesian",
-        "size": 8253606
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/isl.traineddata",
-        "name": "isl.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Icelandic",
-        "size": 9486436
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ita_old.traineddata",
-        "name": "ita_old.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for historical Italian",
-        "size": 9852171
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ita.traineddata",
-        "name": "ita.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for modern Italian",
-        "size": 8863635
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/jav.traineddata",
-        "name": "jav.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Javanese",
-        "size": 8650382
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/jpn.traineddata",
-        "name": "jpn.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Japanese (horizontal writing)",
-        "size": 14330109
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/jpn_vert.traineddata",
-        "name": "jpn_vert.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Japanese (vertical writing)",
-        "size": 14330809
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kan.traineddata",
-        "name": "kan.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Kannada",
-        "size": 10233763
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kat_old.traineddata",
-        "name": "kat_old.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for historical Georgian",
-        "size": 3174400
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kat.traineddata",
-        "name": "kat.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for modern Georgian",
-        "size": 4487336
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kaz.traineddata",
-        "name": "kaz.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Kazakh",
-        "size": 7528853
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/khm.traineddata",
-        "name": "khm.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Central Khmer",
-        "size": 8104332
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kir.traineddata",
-        "name": "kir.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Kirghiz",
-        "size": 11948344
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kmr.traineddata",
-        "name": "kmr.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Northern Kurdish",
-        "size": 10196464
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kor.traineddata",
-        "name": "kor.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Korean (horizontal writing)",
-        "size": 12528128
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/kor_vert.traineddata",
-        "name": "kor_vert.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Korean (vertical writing)",
-        "size": 3964469
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/lao.traineddata",
-        "name": "lao.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Lao",
-        "size": 13532551
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/lat.traineddata",
-        "name": "lat.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Latin",
-        "size": 9705145
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/lav.traineddata",
-        "name": "lav.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Latvian",
-        "size": 5623473
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/lit.traineddata",
-        "name": "lit.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Lithuanian",
-        "size": 10252680
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ltz.traineddata",
-        "name": "ltz.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Luxembourgish",
-        "size": 12721945
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mal.traineddata",
-        "name": "mal.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Malayalam",
-        "size": 12524967
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mar.traineddata",
-        "name": "mar.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Marathi",
-        "size": 13437670
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mkd.traineddata",
-        "name": "mkd.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Macedonian",
-        "size": 3453054
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mlt.traineddata",
-        "name": "mlt.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Maltese",
-        "size": 5060029
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mon.traineddata",
-        "name": "mon.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Mongolian",
-        "size": 8646663
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mri.traineddata",
-        "name": "mri.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Maori",
-        "size": 3610177
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/msa.traineddata",
-        "name": "msa.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Malay (macrolanguage)",
-        "size": 8230552
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/mya.traineddata",
-        "name": "mya.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Burmese",
-        "size": 14971060
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/nep.traineddata",
-        "name": "nep.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Nepali (macrolanguage)",
-        "size": 12387399
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/nld.traineddata",
-        "name": "nld.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Dutch",
-        "size": 8903736
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/nor.traineddata",
-        "name": "nor.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Norwegian",
-        "size": 14312333
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/oci.traineddata",
-        "name": "oci.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Occitan (post 1500)",
-        "size": 12917692
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ori.traineddata",
-        "name": "ori.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Oriya (macrolanguage)",
-        "size": 8110602
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/pan.traineddata",
-        "name": "pan.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Panjabi",
-        "size": 11893154
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/pol.traineddata",
-        "name": "pol.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Polish",
-        "size": 11978867
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/por.traineddata",
-        "name": "por.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Portuguese",
-        "size": 8159939
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/pus.traineddata",
-        "name": "pus.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Pushto",
-        "size": 11987930
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/que.traineddata",
-        "name": "que.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Quechua",
-        "size": 10774587
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ron.traineddata",
-        "name": "ron.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Romanian",
-        "size": 9595755
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/rus.traineddata",
-        "name": "rus.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Russian",
-        "size": 15301764
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/san.traineddata",
-        "name": "san.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Sanskrit",
-        "size": 15136202
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/sin.traineddata",
-        "name": "sin.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Sinhala",
-        "size": 8282713
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/slk.traineddata",
-        "name": "slk.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Slovak",
-        "size": 11542252
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/slv.traineddata",
-        "name": "slv.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Slovenian",
-        "size": 5879151
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/snd.traineddata",
-        "name": "snd.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Sindhi",
-        "size": 11981538
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/spa_old.traineddata",
-        "name": "spa_old.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for historical Spanish",
-        "size": 9476925
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/spa.traineddata",
-        "name": "spa.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for modern Spanish",
-        "size": 13570187
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/sqi.traineddata",
-        "name": "sqi.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Albanian",
-        "size": 4631498
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/srp_latn.traineddata",
-        "name": "srp_latn.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Serbian (in Latin script)",
-        "size": 9831713
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/srp.traineddata",
-        "name": "srp.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Serbian (in Cyrillic script)",
-        "size": 9345851
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/sun.traineddata",
-        "name": "sun.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Sundanese",
-        "size": 4132820
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/swa.traineddata",
-        "name": "swa.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Swahili (macrolanguage)",
-        "size": 4914855
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/swe.traineddata",
-        "name": "swe.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Swedish",
-        "size": 14325549
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/syr.traineddata",
-        "name": "syr.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Syriac",
-        "size": 12498294
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tam.traineddata",
-        "name": "tam.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Tamil",
-        "size": 6023201
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tat.traineddata",
-        "name": "tat.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Tatar",
-        "size": 7585204
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tel.traineddata",
-        "name": "tel.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Telugu",
-        "size": 9098795
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tgk.traineddata",
-        "name": "tgk.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Tajik",
-        "size": 4602842
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tha.traineddata",
-        "name": "tha.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Thai",
-        "size": 7614571
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tir.traineddata",
-        "name": "tir.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Tigrinya",
-        "size": 2410256
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ton.traineddata",
-        "name": "ton.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Tonga (Tonga Islands)",
-        "size": 3729371
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/tur.traineddata",
-        "name": "tur.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Turkish",
-        "size": 7456265
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/uig.traineddata",
-        "name": "uig.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Uighur",
-        "size": 13074609
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/ukr.traineddata",
-        "name": "ukr.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Ukrainian",
-        "size": 10859081
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/urd.traineddata",
-        "name": "urd.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Urdu",
-        "size": 7994323
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/uzb_cyrl.traineddata",
-        "name": "uzb_cyrl.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Uzbek (in Cyrillic script)",
-        "size": 4325478
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/uzb.traineddata",
-        "name": "uzb.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Uzbek (in Latin script)",
-        "size": 12953454
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/vie.traineddata",
-        "name": "vie.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Vietnamese",
-        "size": 12435550
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/yid.traineddata",
-        "name": "yid.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Yiddish",
-        "size": 3278995
-      },
-      {
-        "url": "https://github.com/tesseract-ocr/tessdata_best/raw/main/yor.traineddata",
-        "name": "yor.traineddata",
-        "parameter_usage": "without-extension",
-        "description": "Tesseract LSTM model for Yoruba",
-        "size": 3736121
-      }
-    ]
-  },
-  "ocrd-tesserocr-segment": {
-    "executable": "ocrd-tesserocr-segment",
-    "categories": [
-      "Layout analysis"
-    ],
-    "description": "Segment page into regions and lines with Tesseract",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "layout/segmentation/region",
-      "layout/segmentation/line"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "padding": {
-        "type": "number",
-        "format": "integer",
-        "description": "extend detected region rectangles by this many (true) pixels",
-        "default": 4
-      },
-      "shrink_polygons": {
-        "type": "boolean",
-        "default": false,
-        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
-      },
-      "block_polygons": {
-        "type": "boolean",
-        "default": false,
-        "description": "annotate polygon coordinates instead of bounding box rectangles"
-      },
-      "find_tables": {
-        "type": "boolean",
-        "default": true,
-        "description": "recognise tables as table regions (textord_tabfind_find_tables)"
-      },
-      "find_staves": {
-        "type": "boolean",
-        "default": false,
-        "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves."
-      },
-      "sparse_text": {
-        "type": "boolean",
-        "default": false,
-        "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space"
-      }
-    }
-  },
-  "ocrd-tesserocr-segment-region": {
-    "executable": "ocrd-tesserocr-segment-region",
-    "categories": [
-      "Layout analysis"
-    ],
-    "description": "Segment page into regions with Tesseract",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "layout/segmentation/region"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "overwrite_regions": {
-        "type": "boolean",
-        "default": true,
-        "description": "Remove existing layout and text annotation below the Page level (otherwise skip page; no incremental annotation yet)."
-      },
-      "padding": {
-        "type": "number",
-        "format": "integer",
-        "description": "extend detected region rectangles by this many (true) pixels",
-        "default": 0
-      },
-      "shrink_polygons": {
-        "type": "boolean",
-        "default": false,
-        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
-      },
-      "crop_polygons": {
-        "type": "boolean",
-        "default": false,
-        "description": "annotate polygon coordinates instead of bounding box rectangles"
-      },
-      "find_tables": {
-        "type": "boolean",
-        "default": true,
-        "description": "recognise tables as table regions (textord_tabfind_find_tables)"
-      },
-      "find_staves": {
-        "type": "boolean",
-        "default": false,
-        "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves."
-      },
-      "sparse_text": {
-        "type": "boolean",
-        "default": false,
-        "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space"
-      }
-    }
-  },
-  "ocrd-tesserocr-segment-table": {
-    "executable": "ocrd-tesserocr-segment-table",
-    "categories": [
-      "Layout analysis"
-    ],
-    "description": "Segment table regions into cell text regions with Tesseract",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "layout/segmentation/region"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "overwrite_cells": {
-        "type": "boolean",
-        "default": true,
-        "description": "Remove existing layout and text annotation below the TableRegion level (otherwise skip table; no incremental annotation yet)."
-      },
-      "padding": {
-        "type": "number",
-        "format": "integer",
-        "description": "extend detected cell rectangles by this many (true) pixels",
-        "default": 0
-      },
-      "shrink_polygons": {
-        "type": "boolean",
-        "default": false,
-        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
-      }
-    }
-  },
-  "ocrd-tesserocr-segment-line": {
-    "executable": "ocrd-tesserocr-segment-line",
-    "categories": [
-      "Layout analysis"
-    ],
-    "description": "Segment regions into lines with Tesseract",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "layout/segmentation/line"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "overwrite_lines": {
-        "type": "boolean",
-        "default": true,
-        "description": "Remove existing layout and text annotation below the TextRegion level (otherwise skip region; no incremental annotation yet)."
-      },
-      "padding": {
-        "type": "number",
-        "format": "integer",
-        "description": "extend detected line rectangles by this many (true) pixels",
-        "default": 0
-      },
-      "shrink_polygons": {
-        "type": "boolean",
-        "default": false,
-        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
-      }
-    }
-  },
-  "ocrd-tesserocr-segment-word": {
-    "executable": "ocrd-tesserocr-segment-word",
-    "categories": [
-      "Layout analysis"
-    ],
-    "description": "Segment lines into words with Tesseract",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "layout/segmentation/word"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "overwrite_words": {
-        "type": "boolean",
-        "default": true,
-        "description": "Remove existing layout and text annotation below the TextLine level (otherwise skip line; no incremental annotation yet)."
-      },
-      "padding": {
-        "type": "number",
-        "format": "integer",
-        "description": "extend detected cell rectangles by this many (true) pixels",
-        "default": 0
-      },
-      "shrink_polygons": {
-        "type": "boolean",
-        "default": false,
-        "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols"
-      }
-    }
-  },
-  "ocrd-tesserocr-crop": {
-    "executable": "ocrd-tesserocr-crop",
-    "categories": [
-      "Image preprocessing"
-    ],
-    "description": "Poor man's cropping via region segmentation",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "preprocessing/optimization/cropping"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "padding": {
-        "type": "number",
-        "format": "integer",
-        "description": "extend detected border by this many (true) pixels on every side",
-        "default": 4
-      }
-    }
-  },
-  "ocrd-tesserocr-binarize": {
-    "executable": "ocrd-tesserocr-binarize",
-    "categories": [
-      "Image preprocessing"
-    ],
-    "description": "Binarize regions or lines with Tesseract's global Otsu",
-    "input_file_grp_cardinality": 1,
-    "output_file_grp_cardinality": 1,
-    "steps": [
-      "preprocessing/optimization/binarization"
-    ],
-    "parameters": {
-      "dpi": {
-        "type": "number",
-        "format": "float",
-        "description": "pixel density in dots per inch (overrides any meta-data in the images)",
-        "default": 0
-      },
-      "operation_level": {
-        "type": "string",
-        "enum": [
-          "page",
-          "region",
-          "line"
-        ],
-        "default": "page",
-        "description": "PAGE XML hierarchy level to operate on"
-      },
-      "tiseg": {
-        "type": "boolean",
-        "default": false,
-        "description": "also separate text vs image by detecting and suppressing photo+sepline mask"
-      }
-    }
+ "ocrd-dummy": {
+  "executable": "ocrd-dummy",
+  "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group",
+  "steps": [
+   "preprocessing/optimization"
+  ],
+  "categories": [
+   "Image preprocessing"
+  ],
+  "input_file_grp_cardinality": 1,
+  "output_file_grp_cardinality": 1,
+  "parameters": {
+   "copy_files": {
+    "type": "boolean",
+    "default": false,
+    "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)"
+   }
   }
+ },
+ "ocrd-filter": {
+  "executable": "ocrd-filter",
+  "description": "Bare-bones processor can be dynamically configured to remove segments based on XPath queries",
+  "steps": [
+   "recognition/post-correction"
+  ],
+  "categories": [
+   "Quality assurance"
+  ],
+  "input_file_grp_cardinality": 1,
+  "output_file_grp_cardinality": 1,
+  "parameters": {
+   "select": {
+    "type": "string",
+    "default": "//*[ends-with(local-name(),'Region')]",
+    "description": "Which segments to select for removal. An XPath 2.0 query expression (path and optional predicates), with 'pc' as namespace prefix for PAGE-XML and our extension functions (see help text). Only selection of segment hierarchy elements is allowed (so e.g. `*` would be equivalent to `pc:NoiseRegion|pc:LineDrawingRegion|pc:AdvertRegion|pc:ImageRegion|pc:ChartRegion|pc:MusicRegion|pc:GraphicRegion|pc:UnknownRegion|pc:CustomRegion|pc:SeparatorRegion|pc:MathsRegion|pc:TextRegion|pc:MapRegion|pc:ChemRegion|pc:TableRegion|pc:TextLine|pc:Word|pc:Glyph`, but `pc:MetadataItem` or `pc:Border` or `pc:Coords` would not match).\nFor example, to remove words or glyphs with low text confidence, select '(pc:Word|pc:Glyph)[pc:TextEquiv/@conf < 0.7]'. Or low layout confidence, '*[pc:Coords/@conf < 0.7]'.\nTo remove high pixel-to-character rate, select '*[pc:pixelarea(.) div string-length(pc:textequiv(.)) > 10000]'."
+   },
+   "plot": {
+    "type": "boolean",
+    "default": false,
+    "description": "Whether to extract an image for each filtered segment and write to the output fileGrp."
+   }
+  }
+ }
 }