diff --git a/ocrd_network/ocrd_network/processing_server.py b/ocrd_network/ocrd_network/processing_server.py index 582c1134cc..eeae002f68 100644 --- a/ocrd_network/ocrd_network/processing_server.py +++ b/ocrd_network/ocrd_network/processing_server.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, List import uvicorn from fastapi import FastAPI, status, Request, HTTPException @@ -7,7 +7,7 @@ from pika.exceptions import ChannelClosedByBroker -from ocrd_utils import getLogger, get_ocrd_tool_json +from ocrd_utils import getLogger from ocrd_validators import ParameterValidator from .database import ( db_get_processing_job, @@ -23,7 +23,11 @@ PYJobOutput, StateEnum ) -from .utils import generate_created_time, generate_id +from .utils import ( + download_ocrd_all_tool_json, + generate_created_time, + generate_id +) class ProcessingServer(FastAPI): @@ -44,6 +48,7 @@ def __init__(self, config_path: str, host: str, port: int) -> None: self.log = getLogger(__name__) self.hostname = host self.port = port + self.ocrd_all_tool_json = download_ocrd_all_tool_json() self.config = ProcessingServerConfig(config_path) self.deployer = Deployer(self.config) self.mongodb_url = None @@ -240,12 +245,11 @@ async def push_processor_job(self, processor_name: str, data: PYJobInput) -> PYJ detail=f"Process queue with id '{processor_name}' not existing" ) - # validate parameters - ocrd_tool = get_ocrd_tool_json(processor_name) + ocrd_tool = self.ocrd_all_tool_json.get(processor_name, None) if not ocrd_tool: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Processor '{processor_name}' not available. Empty or missing ocrd_tool" + detail=f"Ocrd tool JSON of '{processor_name}' not found!" ) report = ParameterValidator(ocrd_tool).validate(dict(data.parameters)) if not report.is_valid: @@ -289,12 +293,13 @@ async def push_processor_job(self, processor_name: str, data: PYJobInput) -> PYJ async def get_processor_info(self, processor_name) -> Dict: """ Return a processor's ocrd-tool.json """ - if processor_name not in self.processor_list: + ocrd_tool = self.ocrd_all_tool_json.get(processor_name, None) + if not ocrd_tool: raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail='Processor not available' + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Ocrd tool JSON of '{processor_name}' not found!" ) - return get_ocrd_tool_json(processor_name) + return ocrd_tool async def get_job(self, processor_name: str, job_id: str) -> PYJobOutput: """ Return processing job-information from the database @@ -308,7 +313,7 @@ async def get_job(self, processor_name: str, job_id: str) -> PYJobOutput: detail=f"Processing job with id '{job_id}' of processor type '{processor_name}' not existing" ) - async def list_processors(self) -> str: + async def list_processors(self) -> List[str]: """ Return a list of all available processors """ return self.processor_list diff --git a/ocrd_network/ocrd_network/utils.py b/ocrd_network/ocrd_network/utils.py index 759a31597a..a47c502408 100644 --- a/ocrd_network/ocrd_network/utils.py +++ b/ocrd_network/ocrd_network/utils.py @@ -1,6 +1,7 @@ from datetime import datetime from functools import wraps from re import match as re_match +import requests from pika import URLParameters from pymongo import uri_parser as mongo_uri_parser from uuid import uuid4 @@ -16,6 +17,7 @@ def func_wrapper(*args, **kwargs): if asyncio.iscoroutine(result): return asyncio.get_event_loop().run_until_complete(result) return result + return func_wrapper @@ -69,3 +71,13 @@ def verify_and_parse_mq_uri(rabbitmq_address: str): 'vhost': url_params.virtual_host } return parsed_data + + +def download_ocrd_all_tool_json(): + # TODO: Make this more configurable/flexible. + url = "https://ocr-d.de/js/ocrd-all-tool.json" + headers = {'Accept': 'application/json'} + response = requests.get(url, headers=headers) + if not response.status_code == 200: + raise ValueError(f'Failed to download ocrd all tool json from: "{url}"') + return response.json()