From 1a6362a0383abedb9b9e7599f3d363f5cd5cf2d6 Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Tue, 28 Mar 2023 11:58:55 +0200 Subject: [PATCH 1/4] processing server - download ocrd all tool --- .../ocrd_network/processing_server.py | 22 ++++++++++++------- ocrd_network/ocrd_network/utils.py | 12 ++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/ocrd_network/ocrd_network/processing_server.py b/ocrd_network/ocrd_network/processing_server.py index 988480ee3f..5e281fc43e 100644 --- a/ocrd_network/ocrd_network/processing_server.py +++ b/ocrd_network/ocrd_network/processing_server.py @@ -7,7 +7,7 @@ from pika.exceptions import ChannelClosedByBroker -from ocrd_utils import getLogger, get_ocrd_tool_json +from ocrd_utils import getLogger from ocrd_validators import ParameterValidator from .database import ( db_get_processing_job, @@ -23,7 +23,11 @@ PYJobOutput, StateEnum ) -from .utils import generate_created_time, generate_id +from .utils import ( + download_ocrd_all_tool_json, + generate_created_time, + generate_id +) class ProcessingServer(FastAPI): @@ -44,6 +48,7 @@ def __init__(self, config_path: str, host: str, port: int) -> None: self.log = getLogger(__name__) self.hostname = host self.port = port + self.ocrd_all_tool_json = download_ocrd_all_tool_json() self.config = ProcessingServerConfig(config_path) self.deployer = Deployer(self.config) self.mongodb_url = None @@ -243,11 +248,11 @@ async def push_processor_job(self, processor_name: str, data: PYJobInput) -> PYJ # validate additional parameters if data.parameters: - ocrd_tool = get_ocrd_tool_json(processor_name) + ocrd_tool = self.ocrd_all_tool_json.get(processor_name, None) if not ocrd_tool: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Processor '{processor_name}' not available. Empty or missing ocrd_tool" + detail=f"Ocrd tool JSON of '{processor_name}' not found!" ) report = ParameterValidator(ocrd_tool).validate(data.parameters) if not report.is_valid: @@ -291,12 +296,13 @@ async def push_processor_job(self, processor_name: str, data: PYJobInput) -> PYJ async def get_processor_info(self, processor_name) -> Dict: """ Return a processor's ocrd-tool.json """ - if processor_name not in self.processor_list: + ocrd_tool = self.ocrd_all_tool_json.get(processor_name, None) + if not ocrd_tool: raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail='Processor not available' + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Ocrd tool JSON of '{processor_name}' not found!" ) - return get_ocrd_tool_json(processor_name) + return ocrd_tool async def get_job(self, processor_name: str, job_id: str) -> PYJobOutput: """ Return processing job-information from the database diff --git a/ocrd_network/ocrd_network/utils.py b/ocrd_network/ocrd_network/utils.py index 759a31597a..a47c502408 100644 --- a/ocrd_network/ocrd_network/utils.py +++ b/ocrd_network/ocrd_network/utils.py @@ -1,6 +1,7 @@ from datetime import datetime from functools import wraps from re import match as re_match +import requests from pika import URLParameters from pymongo import uri_parser as mongo_uri_parser from uuid import uuid4 @@ -16,6 +17,7 @@ def func_wrapper(*args, **kwargs): if asyncio.iscoroutine(result): return asyncio.get_event_loop().run_until_complete(result) return result + return func_wrapper @@ -69,3 +71,13 @@ def verify_and_parse_mq_uri(rabbitmq_address: str): 'vhost': url_params.virtual_host } return parsed_data + + +def download_ocrd_all_tool_json(): + # TODO: Make this more configurable/flexible. + url = "https://ocr-d.de/js/ocrd-all-tool.json" + headers = {'Accept': 'application/json'} + response = requests.get(url, headers=headers) + if not response.status_code == 200: + raise ValueError(f'Failed to download ocrd all tool json from: "{url}"') + return response.json() From 9bd1ce2413207a6482c13317513c3c8da7e44c10 Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Tue, 28 Mar 2023 12:28:42 +0200 Subject: [PATCH 2/4] check defaults, pass shallow copy --- .../ocrd_network/processing_server.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/ocrd_network/ocrd_network/processing_server.py b/ocrd_network/ocrd_network/processing_server.py index 9a9c3231e2..e6e8f0b72d 100644 --- a/ocrd_network/ocrd_network/processing_server.py +++ b/ocrd_network/ocrd_network/processing_server.py @@ -245,17 +245,16 @@ async def push_processor_job(self, processor_name: str, data: PYJobInput) -> PYJ detail=f"Process queue with id '{processor_name}' not existing" ) - # validate additional parameters - if data.parameters: - ocrd_tool = self.ocrd_all_tool_json.get(processor_name, None) - if not ocrd_tool: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Ocrd tool JSON of '{processor_name}' not found!" - ) - report = ParameterValidator(ocrd_tool).validate(data.parameters) - if not report.is_valid: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=report.errors) + # validate parameters + ocrd_tool = self.ocrd_all_tool_json.get(processor_name, None) + if not ocrd_tool: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Ocrd tool JSON of '{processor_name}' not found!" + ) + report = ParameterValidator(ocrd_tool).validate(dict(data.parameters)) + if not report.is_valid: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=report.errors) if bool(data.path_to_mets) == bool(data.workspace_id): raise HTTPException( From 7a2f8fc7c38986bad4253f44332c3c496b26ba8f Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 30 Mar 2023 08:24:28 +0200 Subject: [PATCH 3/4] fix: return list not str --- ocrd_network/ocrd_network/processing_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocrd_network/ocrd_network/processing_server.py b/ocrd_network/ocrd_network/processing_server.py index 9a9c3231e2..d3f9d42a0b 100644 --- a/ocrd_network/ocrd_network/processing_server.py +++ b/ocrd_network/ocrd_network/processing_server.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, List import uvicorn from fastapi import FastAPI, status, Request, HTTPException @@ -315,7 +315,7 @@ async def get_job(self, processor_name: str, job_id: str) -> PYJobOutput: detail=f"Processing job with id '{job_id}' of processor type '{processor_name}' not existing" ) - async def list_processors(self) -> str: + async def list_processors(self) -> List[str]: """ Return a list of all available processors """ return self.processor_list From ccc6c54a2e70a87243faf0f237aa81aa2045034a Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Mon, 3 Apr 2023 11:22:11 +0200 Subject: [PATCH 4/4] fix the merge --- ocrd_network/ocrd_network/processing_server.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ocrd_network/ocrd_network/processing_server.py b/ocrd_network/ocrd_network/processing_server.py index 1f1cea328b..eeae002f68 100644 --- a/ocrd_network/ocrd_network/processing_server.py +++ b/ocrd_network/ocrd_network/processing_server.py @@ -245,10 +245,6 @@ async def push_processor_job(self, processor_name: str, data: PYJobInput) -> PYJ detail=f"Process queue with id '{processor_name}' not existing" ) -<<<<<<< HEAD - # validate parameters -======= ->>>>>>> a88495ff5522a096dff466dd2714f02de4695b6a ocrd_tool = self.ocrd_all_tool_json.get(processor_name, None) if not ocrd_tool: raise HTTPException(