diff --git a/.circleci/config.yml b/.circleci/config.yml index 3fc9dcf157..f4e4436527 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,7 +16,9 @@ jobs: steps: - checkout - run: HOMEBREW_NO_AUTO_UPDATE=1 brew install imagemagick geos bash - - run: make install + - run: which bash + - run: bash --version + - run: make install - run: make deps-test test benchmark test-python37: diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 453cae65b6..25999a199e 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -23,9 +23,11 @@ jobs: name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Build the Docker image + # default tag uses docker.io, so override on command-line run: make docker DOCKER_TAG=${{ env.DOCKER_TAG }} - name: Build the Docker image with GPU support - run: make docker-cuda DOCKER_TAG=${{ env.DOCKER_TAG }}-cuda + # default tag uses docker.io, so override on command-line + run: make docker-cuda DOCKER_TAG=${{ env.DOCKER_TAG }}-cuda DOCKER_BASE_IMAGE=${{ env.DOCKER_TAG }} - name: Login to GitHub Container Registry uses: docker/login-action@v2 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 82323b0675..2406864752 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,27 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Added: + + * `make deps-cuda`: Makefile target to set up a working CUDA installation, both for native and Dockerfile.cuda, #1055 + +## [2.51.0] - 2023-06-07 + +Changed: + + * `core cuda` Docker: CUDA base image working again, based on `ocrd/core` not `nvidia/cuda` in a separate `Dockerfile.cuda`, #1041 + * `core-cuda` Docker: adopt #1008 (venv under /usr/local, as in ocrd_all, instead of dist-packages), #1041 + * `core-cuda` Docker: use conda ([micromamba](https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html)) for CUDA toolkit, and [nvidia-pyindex](https://pypi.org/project/nvidia-pyindex/) for CUDA libs – instead of [nvidia/cuda](https://hub.docker.com/r/nvidia/cuda) base image, #1041 + * more robust workaround for shapely#1598, #1041 + +Removed: + + * Revert #882 (fastentrypoints) as it enforces deps versions at runtime + * Drop `ocrd_utils.package_resources` and use `pkg_resources.*` directly, #1041 + * `ocrd resmgr`: Drop redundant (processor-provided) entries in the central `resource_list.yml`. + +## [2.50.0] - 2023-04-24 + Added: * :fire: `ocrd_network`: Components related to OCR-D Web API, #974 @@ -1729,6 +1750,9 @@ Fixed Initial Release +[2.51.0]: ../../compare/v2.51.0..v2.50.0 +[2.50.0]: ../../compare/v2.50.0..v2.49.0 +[2.49.0]: ../../compare/v2.49.0..v2.48.1 [2.48.1]: ../../compare/v2.48.1..v2.48.0 [2.48.0]: ../../compare/v2.48.0..v2.47.4 [2.47.4]: ../../compare/v2.47.4..v2.47.3 diff --git a/Dockerfile b/Dockerfile index 3316f3aeb4..04df1e3564 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV PYTHONIOENCODING utf8 ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 -ENV PIP=pip3 +ENV PIP=pip WORKDIR /build-ocrd COPY ocrd ./ocrd @@ -24,7 +24,6 @@ RUN apt-get update && apt-get -y install software-properties-common \ && apt-get update && apt-get -y install \ ca-certificates \ python3-dev \ - python3-pip \ python3-venv \ gcc \ make \ @@ -34,11 +33,11 @@ RUN apt-get update && apt-get -y install software-properties-common \ sudo \ git \ && make deps-ubuntu \ - && pip3 install --upgrade pip setuptools \ + && python3 -m venv /usr/local \ + && hash -r \ + && pip install --upgrade pip setuptools \ && make install \ - && apt-get remove -y gcc \ - && apt-get autoremove -y \ - && $FIXUP \ + && eval $FIXUP \ && rm -rf /build-ocrd WORKDIR /data diff --git a/Dockerfile.cuda b/Dockerfile.cuda new file mode 100644 index 0000000000..52d7a27619 --- /dev/null +++ b/Dockerfile.cuda @@ -0,0 +1,22 @@ +ARG BASE_IMAGE +FROM $BASE_IMAGE + +ENV MAMBA_EXE=/usr/local/bin/conda +ENV MAMBA_ROOT_PREFIX=/conda +ENV PATH=$MAMBA_ROOT_PREFIX/bin:$PATH +ENV CONDA_EXE=$MAMBA_EXE +ENV CONDA_PREFIX=$MAMBA_ROOT_PREFIX +ENV CONDA_SHLVL='1' + +WORKDIR /build + +COPY Makefile . + +RUN make deps-cuda + +WORKDIR /data + +RUN rm -fr /build + +CMD ["/usr/local/bin/ocrd", "--help"] + diff --git a/Makefile b/Makefile index 207b0a3c32..f13549333d 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,8 @@ help: @echo "" @echo " Targets" @echo "" - @echo " deps-ubuntu Dependencies for deployment in an ubuntu/debian linux" + @echo " deps-cuda Dependencies for deployment with GPU support via Conda" + @echo " deps-ubuntu Dependencies for deployment in an Ubuntu/Debian Linux" @echo " deps-test Install test python deps via pip" @echo " install (Re)install the tool" @echo " install-dev Install with pip install -e" @@ -32,31 +33,75 @@ help: @echo " docs-clean Clean docs" @echo " docs-coverage Calculate docstring coverage" @echo " docker Build docker image" - @echo " docker-cuda Build docker GPU / CUDA image" - @echo " cuda-ubuntu Install native CUDA toolkit in different versions" + @echo " docker-cuda Build docker image for GPU / CUDA" @echo " pypi Build wheels and source dist and twine upload them" @echo "" @echo " Variables" @echo "" - @echo " DOCKER_TAG Docker tag. Default: '$(DOCKER_TAG)'." - @echo " DOCKER_BASE_IMAGE Docker base image. Default: '$(DOCKER_BASE_IMAGE)'." + @echo " DOCKER_TAG Docker target image tag. Default: '$(DOCKER_TAG)'." + @echo " DOCKER_BASE_IMAGE Docker source image tag. Default: '$(DOCKER_BASE_IMAGE)'." @echo " DOCKER_ARGS Additional arguments to docker build. Default: '$(DOCKER_ARGS)'" @echo " PIP_INSTALL pip install command. Default: $(PIP_INSTALL)" # END-EVAL -# Docker tag. Default: '$(DOCKER_TAG)'. -DOCKER_TAG = ocrd/core - -# Docker base image. Default: '$(DOCKER_BASE_IMAGE)'. -DOCKER_BASE_IMAGE = ubuntu:20.04 - -# Additional arguments to docker build. Default: '$(DOCKER_ARGS)' -DOCKER_ARGS = - # pip install command. Default: $(PIP_INSTALL) PIP_INSTALL = $(PIP) install +deps-cuda: CONDA_EXE ?= /usr/local/bin/conda +deps-cuda: export CONDA_PREFIX ?= /conda +deps-cuda: PYTHON_PREFIX != $(PYTHON) -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])' +deps-cuda: + curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba + mv bin/micromamba $(CONDA_EXE) +# Install Conda system-wide (for interactive / login shells) + echo 'export MAMBA_EXE=$(CONDA_EXE) MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) CONDA_PREFIX=$(CONDA_PREFIX) PATH=$(CONDA_PREFIX)/bin:$$PATH' >> /etc/profile.d/98-conda.sh + mkdir -p $(CONDA_PREFIX)/lib $(CONDA_PREFIX)/include + echo $(CONDA_PREFIX)/lib >> /etc/ld.so.conf.d/conda.conf +# Get CUDA toolkit, including compiler and libraries with dev, +# however, the Nvidia channels do not provide (recent) cudnn (needed for Torch, TF etc): +#MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) \ +#conda install -c nvidia/label/cuda-11.8.0 cuda && conda clean -a +# +# The conda-forge channel has cudnn and cudatoolkit but no cudatoolkit-dev anymore (and we need both!), +# so let's combine nvidia and conda-forge (will be same lib versions, no waste of space), +# but omitting cuda-cudart-dev and cuda-libraries-dev (as these will be pulled by pip for torch anyway): + MAMBA_ROOT_PREFIX=$(CONDA_PREFIX) \ + conda install -c nvidia/label/cuda-11.8.0 \ + cuda-nvcc \ + cuda-cccl \ + && conda clean -a \ + && find $(CONDA_PREFIX) -name "*_static.a" -delete +#conda install -c conda-forge \ +# cudatoolkit=11.8.0 \ +# cudnn=8.8.* && \ +#conda clean -a && \ +#find $(CONDA_PREFIX) -name "*_static.a" -delete +# +# Since Torch will pull in the CUDA libraries (as Python pkgs) anyway, +# let's jump the shark and pull these via NGC index directly, +# but then share them with the rest of the system so native compilation/linking +# works, too: + $(PIP) install nvidia-pyindex \ + && $(PIP) install nvidia-cudnn-cu11==8.6.0.163 \ + nvidia-cublas-cu11 \ + nvidia-cusparse-cu11 \ + nvidia-cusolver-cu11 \ + nvidia-curand-cu11 \ + nvidia-cufft-cu11 \ + nvidia-cuda-runtime-cu11 \ + nvidia-cuda-nvrtc-cu11 \ + && for pkg in cudnn cublas cusparse cusolver curand cufft cuda_runtime cuda_nvrtc; do \ + for lib in $(PYTHON_PREFIX)/nvidia/$$pkg/lib/lib*.so.*; do \ + base=`basename $$lib`; \ + ln -s $$lib $(CONDA_PREFIX)/lib/$$base.so; \ + ln -s $$lib $(CONDA_PREFIX)/lib/$${base%.so.*}.so; \ + done \ + && ln -s $(PYTHON_PREFIX)/nvidia/$$pkg/include/* $(CONDA_PREFIX)/include/; \ + done \ + && ldconfig +# gputil/nvidia-smi would be nice, too – but that drags in Python as a conda dependency... + # Dependencies for deployment in an ubuntu/debian linux deps-ubuntu: apt-get install -y python3 imagemagick libgeos-dev @@ -68,12 +113,13 @@ deps-test: # (Re)install the tool install: - $(PIP) install -U pip wheel setuptools fastentrypoints + $(PIP) install -U pip wheel setuptools @# speedup for end-of-life builds + @# we cannot use pip config here due to pip#11988 if $(PYTHON) -V | fgrep -e 3.5 -e 3.6; then $(PIP) install --prefer-binary opencv-python-headless numpy; fi for mod in $(BUILD_ORDER);do (cd $$mod ; $(PIP_INSTALL) .);done @# workaround for shapely#1598 - $(PIP) install --no-binary shapely --force-reinstall shapely + $(PIP) config set global.no-binary shapely # Install with pip install -e install-dev: uninstall @@ -149,9 +195,16 @@ assets: repo/assets .PHONY: test # Run all unit tests test: assets + $(PYTHON) -m pytest --continue-on-collection-errors --durations=10\ + --ignore=$(TESTDIR)/test_logging.py \ + --ignore=$(TESTDIR)/test_logging_conf.py \ + --ignore-glob="$(TESTDIR)/**/*bench*.py" \ + $(TESTDIR) + #$(MAKE) test-logging + +test-logging: HOME=$(CURDIR)/ocrd_utils $(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging $(TESTDIR) HOME=$(CURDIR) $(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging $(TESTDIR) - $(PYTHON) -m pytest --continue-on-collection-errors --durations=10 --ignore=$(TESTDIR)/test_logging.py --ignore-glob="$(TESTDIR)/**/*bench*.py" $(TESTDIR) benchmark: $(PYTHON) -m pytest $(TESTDIR)/model/test_ocrd_mets_bench.py @@ -214,40 +267,22 @@ pyclean: .PHONY: docker docker-cuda +# Additional arguments to docker build. Default: '$(DOCKER_ARGS)' +DOCKER_ARGS = + # Build docker image -docker docker-cuda: - docker build -t $(DOCKER_TAG) --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) . +docker: DOCKER_BASE_IMAGE = ubuntu:20.04 +docker: DOCKER_TAG = ocrd/core +docker: DOCKER_FILE = Dockerfile -# Build docker GPU / CUDA image -docker-cuda: DOCKER_BASE_IMAGE = nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04 +docker-cuda: DOCKER_BASE_IMAGE = ocrd/core docker-cuda: DOCKER_TAG = ocrd/core-cuda -docker-cuda: DOCKER_ARGS += --build-arg FIXUP="make cuda-ubuntu cuda-ldconfig" - -# -# CUDA -# - -.PHONY: cuda-ubuntu cuda-ldconfig - -# Install native CUDA toolkit in different versions -cuda-ubuntu: cuda-ldconfig - apt-get -y install --no-install-recommends cuda-runtime-11-0 cuda-runtime-11-1 cuda-runtime-11-3 cuda-runtime-11-7 cuda-runtime-12-1 +docker-cuda: DOCKER_FILE = Dockerfile.cuda -cuda-ldconfig: /etc/ld.so.conf.d/cuda.conf - ldconfig +docker-cuda: docker -/etc/ld.so.conf.d/cuda.conf: - @echo > $@ - @echo /usr/local/cuda-11.0/lib64 >> $@ - @echo /usr/local/cuda-11.0/targets/x86_64-linux/lib >> $@ - @echo /usr/local/cuda-11.1/lib64 >> $@ - @echo /usr/local/cuda-11.1/targets/x86_64-linux/lib >> $@ - @echo /usr/local/cuda-11.3/lib64 >> $@ - @echo /usr/local/cuda-11.3/targets/x86_64-linux/lib >> $@ - @echo /usr/local/cuda-11.7/lib64 >> $@ - @echo /usr/local/cuda-11.7/targets/x86_64-linux/lib >> $@ - @echo /usr/local/cuda-12.1/lib64 >> $@ - @echo /usr/local/cuda-12.1/targets/x86_64-linux/lib >> $@ +docker docker-cuda: + docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) . # Build wheels and source dist and twine upload them pypi: uninstall install diff --git a/README.md b/README.md index d2b34344b8..4572d7e32e 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,9 @@ > Python modules implementing [OCR-D specs](https://github.com/OCR-D/spec) and related tools [![image](https://img.shields.io/pypi/v/ocrd.svg)](https://pypi.org/project/ocrd/) -[![image](https://travis-ci.org/OCR-D/core.svg?branch=master)](https://travis-ci.org/OCR-D/core) [![image](https://circleci.com/gh/OCR-D/core.svg?style=svg)](https://circleci.com/gh/OCR-D/core) +[![Docker Image CI](https://github.com/OCR-D/core/actions/workflows/docker-image.yml/badge.svg)](https://github.com/OCR-D/core/actions/workflows/docker-image.yml) [![image](https://scrutinizer-ci.com/g/OCR-D/core/badges/build.png?b=master)](https://scrutinizer-ci.com/g/OCR-D/core) -[![Docker Automated build](https://img.shields.io/docker/automated/ocrd/core.svg)](https://hub.docker.com/r/ocrd/core/tags/) [![image](https://codecov.io/gh/OCR-D/core/branch/master/graph/badge.svg)](https://codecov.io/gh/OCR-D/core) [![image](https://scrutinizer-ci.com/g/OCR-D/core/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/OCR-D/core) diff --git a/ocrd/ocrd/constants.py b/ocrd/ocrd/constants.py index 2e9c17c649..1d436a7fa9 100644 --- a/ocrd/ocrd/constants.py +++ b/ocrd/ocrd/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd. """ -from ocrd_utils.package_resources import resource_filename +from pkg_resources import resource_filename __all__ = [ 'TMP_PREFIX', diff --git a/ocrd/ocrd/processor/builtin/ocrd-tool.json b/ocrd/ocrd/processor/builtin/dummy/ocrd-tool.json similarity index 100% rename from ocrd/ocrd/processor/builtin/ocrd-tool.json rename to ocrd/ocrd/processor/builtin/dummy/ocrd-tool.json diff --git a/ocrd/ocrd/processor/builtin/dummy_processor.py b/ocrd/ocrd/processor/builtin/dummy_processor.py index 0fb07fc6d7..c0371e2d0e 100644 --- a/ocrd/ocrd/processor/builtin/dummy_processor.py +++ b/ocrd/ocrd/processor/builtin/dummy_processor.py @@ -1,6 +1,6 @@ # pylint: disable=missing-module-docstring,invalid-name from os.path import join, basename -from ocrd_utils.package_resources import resource_string +from pkg_resources import resource_string import click @@ -17,7 +17,7 @@ ) from ocrd_modelfactory import page_from_file -OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) +OCRD_TOOL = parse_json_string_with_comments(resource_string(__name__, 'dummy/ocrd-tool.json').decode('utf8')) class DummyProcessor(Processor): """ diff --git a/ocrd/ocrd/resource_list.yml b/ocrd/ocrd/resource_list.yml index fcedf85913..5a047daad8 100644 --- a/ocrd/ocrd/resource_list.yml +++ b/ocrd/ocrd/resource_list.yml @@ -59,19 +59,3 @@ ocrd-sbb-binarize: type: archive path_in_archive: models size: 1654623597 -ocrd-sbb-textline-detector: - - url: https://qurator-data.de/sbb_textline_detector/models.tar.gz - description: default models provided by github.com/qurator-spk - name: default - type: archive - size: 1194551551 -ocrd-kraken-segment: - - url: https://github.com/mittagessen/kraken/raw/master/kraken/blla.mlmodel - description: Pretrained baseline segmentation model - name: blla.mlmodel - size: 5046835 -ocrd-kraken-recognize: - - url: https://zenodo.org/record/2577813/files/en_best.mlmodel?download=1 - name: en_best.mlmodel - description: This model has been trained on a large corpus of modern printed English text\naugmented with ~10000 lines of historical pages - size: 2930723 diff --git a/ocrd/ocrd/workspace_bagger.py b/ocrd/ocrd/workspace_bagger.py index 97d4527180..447f33d139 100644 --- a/ocrd/ocrd/workspace_bagger.py +++ b/ocrd/ocrd/workspace_bagger.py @@ -9,6 +9,7 @@ import sys from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module from distutils.dir_util import copy_tree +from pkg_resources import get_distribution from ocrd_utils import ( pushd_popd, @@ -22,7 +23,6 @@ from ocrd_validators.constants import BAGIT_TXT, TMP_BAGIT_PREFIX, OCRD_BAGIT_PROFILE_URL from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import to_xml -from ocrd_utils.package_resources import get_distribution from .workspace import Workspace diff --git a/ocrd/setup.py b/ocrd/setup.py index be28ba0d6b..654b362378 100644 --- a/ocrd/setup.py +++ b/ocrd/setup.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import fastentrypoints from setuptools import setup, find_packages from ocrd_utils import VERSION diff --git a/ocrd_models/ocrd_models/constants.py b/ocrd_models/ocrd_models/constants.py index b3fe89a4c9..6c8b0e1017 100644 --- a/ocrd_models/ocrd_models/constants.py +++ b/ocrd_models/ocrd_models/constants.py @@ -1,7 +1,7 @@ """ Constants for ocrd_models. """ -from ocrd_utils.package_resources import resource_string +from pkg_resources import resource_string import re __all__ = [ diff --git a/ocrd_network/ocrd_network/deployer.py b/ocrd_network/ocrd_network/deployer.py index 491e6a632d..f5b7f045d5 100644 --- a/ocrd_network/ocrd_network/deployer.py +++ b/ocrd_network/ocrd_network/deployer.py @@ -17,7 +17,8 @@ from .deployment_utils import ( create_docker_client, DeployType, - wait_for_rabbitmq_availability + verify_mongodb_available, + verify_rabbitmq_available, ) from .runtime_data import ( @@ -229,6 +230,16 @@ def deploy_rabbitmq( remove: bool, ports_mapping: Union[Dict, None] = None ) -> str: + if self.data_queue.skip_deployment: + self.log.debug(f"RabbitMQ is externaly managed. Skipping deployment") + verify_rabbitmq_available( + self.data_queue.address, + self.data_queue.port, + self.data_queue.vhost, + self.data_queue.username, + self.data_queue.password + ) + return self.data_queue.url self.log.debug(f"Trying to deploy '{image}', with modes: " f"detach='{detach}', remove='{remove}'") @@ -271,7 +282,7 @@ def deploy_rabbitmq( rmq_port = int(self.data_queue.port) rmq_vhost = '/' - wait_for_rabbitmq_availability( + verify_rabbitmq_available( host=rmq_host, port=rmq_port, vhost=rmq_vhost, @@ -289,6 +300,11 @@ def deploy_mongodb( remove: bool, ports_mapping: Union[Dict, None] = None ) -> str: + if self.data_mongo.skip_deployment: + self.log.debug('MongoDB is externaly managed. Skipping deployment') + verify_mongodb_available(self.data_mongo.url); + return self.data_mongo.url + self.log.debug(f"Trying to deploy '{image}', with modes: " f"detach='{detach}', remove='{remove}'") @@ -305,11 +321,20 @@ def deploy_mongodb( ports_mapping = { 27017: self.data_mongo.port } + if self.data_mongo.username: + environment = [ + f'MONGO_INITDB_ROOT_USERNAME={self.data_mongo.username}', + f'MONGO_INITDB_ROOT_PASSWORD={self.data_mongo.password}' + ] + else: + environment = [] + res = client.containers.run( image=image, detach=detach, remove=remove, - ports=ports_mapping + ports=ports_mapping, + environment=environment ) if not res or not res.id: raise RuntimeError('Failed to start MongoDB docker container on host: ' @@ -322,7 +347,9 @@ def deploy_mongodb( return self.data_mongo.url def kill_rabbitmq(self) -> None: - if not self.data_queue.pid: + if self.data_queue.skip_deployment: + return + elif not self.data_queue.pid: self.log.warning('No running RabbitMQ instance found') return client = create_docker_client( @@ -337,7 +364,9 @@ def kill_rabbitmq(self) -> None: self.log.info('The RabbitMQ is stopped') def kill_mongodb(self) -> None: - if not self.data_mongo.pid: + if self.data_mongo.skip_deployment: + return + elif not self.data_mongo.pid: self.log.warning('No running MongoDB instance found') return client = create_docker_client( diff --git a/ocrd_network/ocrd_network/deployment_utils.py b/ocrd_network/ocrd_network/deployment_utils.py index 9be063cb2c..a5c01de6ec 100644 --- a/ocrd_network/ocrd_network/deployment_utils.py +++ b/ocrd_network/ocrd_network/deployment_utils.py @@ -4,8 +4,10 @@ from docker.transport import SSHHTTPAdapter from paramiko import AutoAddPolicy, SSHClient from time import sleep +import re from .rabbitmq_utils import RMQPublisher +from pymongo import MongoClient __all__ = [ 'create_docker_client', @@ -83,7 +85,7 @@ def _create_paramiko_client(self, base_url: str) -> None: self.ssh_client.set_missing_host_key_policy(AutoAddPolicy) -def wait_for_rabbitmq_availability( +def verify_rabbitmq_available( host: str, port: int, vhost: str, @@ -101,7 +103,16 @@ def wait_for_rabbitmq_availability( else: # TODO: Disconnect the dummy_publisher here before returning... return - raise RuntimeError('Error waiting for queue startup: timeout exceeded') + raise RuntimeError(f'Cannot connect to RabbitMQ host: {host}, port: {port}, ' + f'vhost: {vhost}, username: {username}') + + +def verify_mongodb_available(mongo_url: str) -> None: + try: + client = MongoClient(mongo_url, serverSelectionTimeoutMS=1000.0) + client.admin.command("ismaster") + except Exception: + raise RuntimeError(f'Cannot connect to MongoDB: {re.sub(r":[^@]+@", ":****@", mongo_url)}') class DeployType(Enum): diff --git a/ocrd_network/ocrd_network/runtime_data.py b/ocrd_network/ocrd_network/runtime_data.py index 8ab9a2896a..59c658ada1 100644 --- a/ocrd_network/ocrd_network/runtime_data.py +++ b/ocrd_network/ocrd_network/runtime_data.py @@ -98,12 +98,24 @@ class DataMongoDB: def __init__(self, config: Dict) -> None: self.address = config['address'] self.port = int(config['port']) - self.ssh_username = config['ssh']['username'] - self.ssh_keypath = config['ssh'].get('path_to_privkey', None) - self.ssh_password = config['ssh'].get('password', None) - self.username = config['credentials']['username'] - self.password = config['credentials']['password'] - self.url = f'mongodb://{self.address}:{self.port}' + if 'ssh' in config: + self.ssh_username = config['ssh']['username'] + self.ssh_keypath = config['ssh'].get('path_to_privkey', None) + self.ssh_password = config['ssh'].get('password', None) + else: + self.ssh_username = None + self.ssh_keypath = None + self.ssh_password = None + + if 'credentials' in config: + self.username = config['credentials']['username'] + self.password = config['credentials']['password'] + self.url = f'mongodb://{self.username}:{self.password}@{self.address}:{self.port}' + else: + self.username = None + self.password = None + self.url = f'mongodb://{self.address}:{self.port}' + self.skip_deployment = config.get('skip_deployment', False) # Assigned when deployed self.pid = None @@ -112,12 +124,19 @@ class DataRabbitMQ: def __init__(self, config: Dict) -> None: self.address = config['address'] self.port = int(config['port']) - self.ssh_username = config['ssh']['username'] - self.ssh_keypath = config['ssh'].get('path_to_privkey', None) - self.ssh_password = config['ssh'].get('password', None) + if 'ssh' in config: + self.ssh_username = config['ssh']['username'] + self.ssh_keypath = config['ssh'].get('path_to_privkey', None) + self.ssh_password = config['ssh'].get('password', None) + else: + self.ssh_username = None + self.ssh_keypath = None + self.ssh_password = None + self.vhost = '/' self.username = config['credentials']['username'] self.password = config['credentials']['password'] self.url = f'amqp://{self.username}:{self.password}@{self.address}:{self.port}{self.vhost}' + self.skip_deployment = config.get('skip_deployment', False) # Assigned when deployed self.pid = None diff --git a/ocrd_utils/ocrd_utils/constants.py b/ocrd_utils/ocrd_utils/constants.py index cfe5b20e09..3b1449e30c 100644 --- a/ocrd_utils/ocrd_utils/constants.py +++ b/ocrd_utils/ocrd_utils/constants.py @@ -1,12 +1,11 @@ """ Constants for ocrd_utils. """ +from pkg_resources import get_distribution from re import compile as regex_compile from os import environ from os.path import join, expanduser -from ocrd_utils.package_resources import get_distribution - __all__ = [ 'EXT_TO_MIME', 'LOG_FORMAT', diff --git a/ocrd_utils/ocrd_utils/package_resources.py b/ocrd_utils/ocrd_utils/package_resources.py deleted file mode 100644 index c142f0e119..0000000000 --- a/ocrd_utils/ocrd_utils/package_resources.py +++ /dev/null @@ -1,49 +0,0 @@ -import atexit -from contextlib import ExitStack -from pathlib import Path - -try: - from importlib.resources import as_file, files -except ImportError: - from importlib_resources import as_file, files # type: ignore - -try: - from importlib.metadata import distribution as get_distribution -except ImportError: - from importlib_metadata import distribution as get_distribution - -# See https://importlib-resources.readthedocs.io/en/latest/migration.html#pkg-resources-resource-filename -_file_manager = ExitStack() -atexit.register(_file_manager.close) - - -def resource_filename(package: str, resource: str) -> Path: - """ - Reimplementation of the function with the same name from pkg_resources - - Using importlib for better performance - - package : str - The package from where to start looking for resource (often __name__) - resource : str - The resource to look up - """ - parent_package = package.rsplit('.',1)[0] - return _file_manager.enter_context(as_file(files(parent_package).joinpath(resource))) - - -def resource_string(package: str, resource: str) -> bytes: - """ - Reimplementation of the function with the same name from pkg_resources - - Using importlib for better performance - - package : str - The package from where to start looking for resource (often __name__) - resource : str - The resource to look up - """ - parent_package = package.rsplit('.',1)[0] - return files(parent_package).joinpath(resource).read_bytes() - -__all__ = ['resource_filename', 'resource_string', 'get_distribution'] diff --git a/ocrd_utils/requirements.txt b/ocrd_utils/requirements.txt index 12f36abec3..b4dccec0ad 100644 --- a/ocrd_utils/requirements.txt +++ b/ocrd_utils/requirements.txt @@ -3,6 +3,4 @@ Pillow >= 7.2.0 # tensorflow versions might require different versions numpy atomicwrites >= 1.3.0 -importlib_metadata;python_version<'3.8' -importlib_resources;python_version<'3.9' frozendict>=2.3.4 diff --git a/ocrd_utils/setup.py b/ocrd_utils/setup.py index 870eede6ee..1ab0b56d17 100644 --- a/ocrd_utils/setup.py +++ b/ocrd_utils/setup.py @@ -5,7 +5,7 @@ setup( name='ocrd_utils', - version='2.49.0', + version='2.51.0', description='OCR-D framework - shared code, helpers, constants', long_description=open('README.md').read(), long_description_content_type='text/markdown', diff --git a/ocrd_validators/ocrd_validators/constants.py b/ocrd_validators/ocrd_validators/constants.py index b3834f7eb0..fc1ff445ae 100644 --- a/ocrd_validators/ocrd_validators/constants.py +++ b/ocrd_validators/ocrd_validators/constants.py @@ -2,7 +2,7 @@ Constants for ocrd_validators. """ import yaml -from ocrd_utils.package_resources import resource_string, resource_filename +from pkg_resources import resource_string, resource_filename __all__ = [ 'PROCESSING_SERVER_CONFIG_SCHEMA', diff --git a/ocrd_validators/ocrd_validators/processing_server_config.schema.yml b/ocrd_validators/ocrd_validators/processing_server_config.schema.yml index 4039e4917f..6cbe8d41c3 100644 --- a/ocrd_validators/ocrd_validators/processing_server_config.schema.yml +++ b/ocrd_validators/ocrd_validators/processing_server_config.schema.yml @@ -26,6 +26,9 @@ properties: ssh: description: Information required for an SSH connection $ref: "#/$defs/ssh" + skip_deployment: + description: set to true to deploy queue yourself + type: boolean database: description: Information about the MongoDB type: object @@ -46,6 +49,9 @@ properties: ssh: description: Information required for an SSH connection $ref: "#/$defs/ssh" + skip_deployment: + description: set to true to deploy database yourself + type: boolean hosts: description: A list of hosts where Processing Servers will be deployed type: array diff --git a/tests/cli/test_bashlib.py b/tests/cli/test_bashlib.py index e6db8c6f7a..3d178dcdd6 100644 --- a/tests/cli/test_bashlib.py +++ b/tests/cli/test_bashlib.py @@ -128,7 +128,7 @@ def test_bashlib_cp_processor(self): } } } - script = """#!/bin/bash + script = """#!/usr/bin/env bash set -eu set -o pipefail MIMETYPE_PAGE=$(ocrd bashlib constants MIMETYPE_PAGE) diff --git a/tests/validator/test_workspace_bagger.py b/tests/validator/test_workspace_bagger.py index fac3cd1084..fc3b035039 100644 --- a/tests/validator/test_workspace_bagger.py +++ b/tests/validator/test_workspace_bagger.py @@ -33,7 +33,7 @@ def tearDown(self): def test_bag_zip_and_spill(self): self.workspace.mets.find_all_files(ID='INPUT_0017')[0].url = 'bad-scheme://foo' - self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com' + self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://ocr-d.de' self.bagger.bag(self.workspace, 'kant_aufklaerung_1784', skip_zip=False, dest=join(self.tempdir, 'out.ocrd.zip')) self.bagger.spill(join(self.tempdir, 'out.ocrd.zip'), join(self.tempdir, 'out')) @@ -52,7 +52,7 @@ def test_bag_wo_dest_zip(self): def test_bag_partial_http_nostrict(self): self.bagger.strict = False makedirs(BACKUPDIR) - self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://google.com' + self.workspace.mets.find_all_files(ID='INPUT_0020')[0].url = 'http://ocr-d.de' self.bagger.bag(self.workspace, 'kant_aufklaerung_1784') def test_bag_full(self):