diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..e07c16f --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,146 @@ +name: build +on: push +jobs: + build: + name: Build, Test, Verify, Publish + runs-on: ubuntu-latest + steps: + - name: Checkout sdap-nexus + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Install miniconda + run: | + curl -o /tmp/miniconda.sh "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" + bash /tmp/miniconda.sh -b -p /opt/miniconda + echo "/opt/miniconda/condabin" >> $GITHUB_PATH # Preserve conda's PATH across jobs + /opt/miniconda/condabin/conda init bash + - name: Setup environments and install dependencies + run: | + conda config -q --add channels conda-forge + conda config -q --set channel_priority strict + conda create -qyn sdap-granule-ingester --file granule_ingester/conda-requirements.txt pip + # setuptools >= 58.0.0 dropped 2to3 support; required for pystache + # https://setuptools.pypa.io/en/latest/history.html#v58-0-0 + conda create -qyn sdap-collection-manager pip setuptools\<=57.5.0 # cm manages its own dependencies with PyPI + - name: Install modules + shell: bash -ieo pipefail {0} + run: | + conda activate sdap-granule-ingester + pip install common/ granule_ingester/ + + conda activate sdap-collection-manager + pip install common/ collection_manager/ + - name: Install pipeline tools + run: | + conda install -qyn sdap-granule-ingester pylint flake8 pytest + conda install -qyn sdap-collection-manager pylint flake8 pytest + - name: Lint + shell: bash -ieo pipefail {0} + continue-on-error: true + run: | + conda activate sdap-granule-ingester + pylint granule_ingester + flake8 granule_ingester + + conda activate sdap-collection-manager + pylint collection_manager + flake8 collection_manager + - name: Test and coverage + continue-on-error: true + shell: bash -ieo pipefail {0} + run: | + conda activate sdap-granule-ingester + pytest granule_ingester/tests/ + + conda activate sdap-collection-manager + pytest collection_manager/tests/ + - name: Get module commits relative to develop + if: ${{ github.ref != 'refs/heads/develop' && github.ref != 'refs/heads/master' }} + run: | + echo "BRANCH=origin/develop" >> $GITHUB_ENV + - name: Get module commits relative to master + if: ${{ github.ref == 'refs/heads/develop' }} + run: | + echo "BRANCH=origin/master" >> $GITHUB_ENV + - name: Get module commits relative to latest tag + if: ${{ github.ref == 'refs/heads/master' }} + run: | + echo "BRANCH=$(git describe --tags --abbrev=0)" >> $GITHUB_ENV + - name: Get module commits + id: get_module_commits + run: | + COLLECTION_MANAGER_COMMITS=$(git rev-list $BRANCH -- collection_manager | wc -l | awk '{print $1}') + GRANULE_INGESTER_COMMITS=$(git rev-list $BRANCH -- granule_ingester | wc -l | awk '{print $1}') + + echo "collection_manager commits: $COLLECTION_MANAGER_COMMITS" + echo "granule_ingester commits: $GRANULE_INGESTER_COMMITS" + + echo "::set-output name=collection_manager_commits::$DATA_ACCESS_COMMITS" + echo "::set-output name=granule_ingester_commits::$GRANULE_INGESTER_COMMITS" + - name: Bump pre-alpha version + if: ${{ github.ref != 'refs/heads/develop' && github.ref != 'refs/heads/master' }} + run: | + echo "PHASE=pre-alpha" >> $GITHUB_ENV + echo "VALUE=$(git rev-parse --short ${GITHUB_SHA})" >> $GITHUB_ENV + - name: Bump alpha version + if: ${{ github.ref == 'refs/heads/develop' }} + run: | + echo "PHASE=alpha" >> $GITHUB_ENV + echo "VALUE=auto" >> $GITHUB_ENV + - name: Bump release version + if: ${{ github.ref == 'refs/heads/master' }} + run: | + echo "PHASE=patch" >> $GITHUB_ENV + echo "VALUE=auto" >> $GITHUB_ENV + - name: Bump versions + id: bump_versions + run: | + if [ ${{steps.get_module_commits.outputs.collection_manager_commits}} -gt 0 ]; then + COLLECTION_MANAGER_VERSION=$(python3 .github/workflows/version.py collection_manager/VERSION.txt --phase $PHASE --value $VALUE) + else + COLLECTION_MANAGER_VERSION=$(python3 .github/workflows/version.py collection_manager/VERSION.txt) + fi + if [ ${{steps.get_module_commits.outputs.granule_ingester_commits}} -gt 0 ]; then + GRANULE_INGESTER_VERSION=$(python3 .github/workflows/version.py collection_manager/VERSION.txt --phase $PHASE --value $VALUE) + else + GRANULE_INGESTER_VERSION=$(python3 .github/workflows/version.py collection_manager/VERSION.txt) + fi + echo "collection_manager: $COLLECTION_MANAGER_VERSION" + echo "granule_ingester: $GRANULE_INGESTER_VERSION" + + echo "::set-output name=collection_manager_version::$COLLECTION_MANAGER_VERSION" + echo "::set-output name=granule_ingester_version::$GRANULE_INGESTER_VERSION" + - name: Commit version bump + tag + if: | + github.ref == 'refs/heads/develop' || + github.ref == 'refs/heads/master' || + startsWith(github.ref, 'refs/heads/release') + run: | + git config --global user.name 'sdap-nexus bot' + git config --global user.email 'sdap-nexus@noreply.github.com' + + CONCATED_VERSION="c.${{steps.bump_versions.outputs.collection_manager_version}}-g.${{steps.bump_versions.outputs.granule_ingester_version}}" + git commit -am "/version $CONCATED_VERSION" + git tag -a "$CONCATED_VERSION" + git push origin + git push origin "$CONCATED_VERSION" + - name: Log in to the Container registry + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USER }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Build and push collection_manager + id: build_collection_manager + uses: docker/build-push-action@v2 + with: + push: true + file: 'collection_manager/docker/Dockerfile' + tags: nexusjpl/collection-manager:${{ steps.bump_versions.outputs.collection_manager_version }} + - name: Build and push granule_ingester + id: build_granule_ingester + uses: docker/build-push-action@v2 + with: + push: true + file: 'granule_ingester/docker/Dockerfile' + tags: nexusjpl/granule-ingester:${{ steps.bump_versions.outputs.granule_ingester_version }} diff --git a/.github/workflows/github-dev-release.yml b/.github/workflows/github-dev-release.yml deleted file mode 100644 index 9f9b555..0000000 --- a/.github/workflows/github-dev-release.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: dev build release - -on: - push: - branches: - - dev - -jobs: - build: - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: [3.7] - - steps: - - uses: actions/checkout@v2 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Lint with flake8 - run: | - pip install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 sdap_ingest_manager --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 sdap_ingest_manager --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Deploy locally - run: | - pip install . - - name: Update default configuration - run: | - # google spreadsheet client does not work in the test environment - # it requires a manual copy/paste of the security code - import sys - import os - import pystache - renderer = pystache.Renderer() - credentials_template_path = os.path.join(sys.prefix, ".sdap_ingest_manager/credentials.json.template") - credentials_content = renderer.render_path(credentials_template_path, {'client_id': '${{secrets.google_api_client_id}}', 'client_secret': '${{secrets.google_api_client_secret}}'}) - credentials_target_path = os.path.join(sys.prefix, ".sdap_ingest_manager/credentials.json") - with open(credentials_target_path, "w") as f: - f.write(credentials_content) - shell: python - - name: Test with pytest - run: | - pip install pytest - pytest -s - - name: Create the package - run: | - pip install setuptools wheel - rm -f dist/* - python setup.py sdist bdist_wheel - - name: Publish new snapshot release - run: | - pip install pds-github-util - python-snapshot-release --token ${{ secrets.GITHUB_TOKEN }} - - name: Publish the Python distribution to PyPI - uses: pypa/gh-action-pypi-publish@master - with: - user: ${{ secrets.pypi_username }} - password: ${{ secrets.pypi_password }} - repository_url: https://test.pypi.org/legacy/ - - diff --git a/.github/workflows/pypi-tag-release.yml b/.github/workflows/pypi-tag-release.yml deleted file mode 100644 index 7e7321e..0000000 --- a/.github/workflows/pypi-tag-release.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: PyPi publication - -on: - push: - tags: - - '[0-9].[0-9].*' - -jobs: - build: - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: [3.7] - - steps: - - uses: actions/checkout@v2 - with: - lfs: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Lint with flake8 - run: | - pip install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 sdap_ingest_manager --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - #flake8 sdap_ingest_manager --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Deploy locally - run: | - pip install . - - name: Update default configuration - run: | - import sys - import os - import pystache - renderer = pystache.Renderer() - credentials_template_path = os.path.join(sys.prefix, ".sdap_ingest_manager/credentials.json.template") - credentials_content = renderer.render_path(credentials_template_path, {'client_id': '${{secrets.google_api_client_id}}', 'client_secret': '${{secrets.google_api_client_secret}}'}) - credentials_target_path = os.path.join(sys.prefix, ".sdap_ingest_manager/credentials.json") - with open(credentials_target_path, "w") as f: - f.write(credentials_content) - shell: python - - name: Test with pytest - run: | - pip install pytest - pytest -s - - name: Create the package - run: | - pip install setuptools wheel - rm -f dist/* - python setup.py sdist bdist_wheel - - name: Publish the Python distribution to PyPI - uses: pypa/gh-action-pypi-publish@master - with: - user: ${{ secrets.pypi_username }} - password: ${{ secrets.pypi_password }} - diff --git a/.github/workflows/version.py b/.github/workflows/version.py new file mode 100755 index 0000000..81c65f5 --- /dev/null +++ b/.github/workflows/version.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +from argparse import ArgumentParser +import re + +version_regex = re.compile( + r'(?P\d+)\.(?P\d+)\.(?P\d+)((a(?P\d+))|(-(?P.*)))?' # noqa: E501 +) + + +def main(): + parser = ArgumentParser() + parser.add_argument('file') + parser.add_argument('--phase', default='current') + parser.add_argument('--value', default='auto') + parser.add_argument('--track', nargs='+', default=[]) + + args = parser.parse_args() + + version_file = open(args.file, 'r+') + version_contents = version_file.read() + + current_version = version_regex.search(version_contents) + if current_version is None: + print('Version not found in file') + exit(1) + + if args.phase == 'current': + print(current_version.group(0)) + version_file.close() + return + + new_version = bump_version(current_version, args.phase, args.value) + version_contents = version_regex.sub( + new_version, version_contents, count=1) + + print(new_version) + + version_file.seek(0) + version_file.write(version_contents) + version_file.truncate() + version_file.close() + + for tracked_path in args.track: + with open(tracked_path, 'r+') as tracked_file: + contents = tracked_file.read() + new_contents = contents.replace( + current_version.group(0), new_version, 1) + + tracked_file.seek(0) + tracked_file.write(new_contents) + tracked_file.truncate() + + +def bump_version(version, phase, value): + major = int(version.group('major')) + minor = int(version.group('minor')) + patch = int(version.group('patch')) + + if phase == 'manual': + if value == 'auto': + raise Exception('value cannot be auto on manual') + + return value + elif phase == 'pre-alpha': + if value == 'auto': + raise Exception('value cannot be auto on pre-alpha') + + return f'{major}.{minor}.{patch}-{value}' + elif phase == 'alpha': + alpha_num = version.group('alpha_num') + alpha_num = 0 if alpha_num is None else int(alpha_num) + 1 + + return f'{major}.{minor}.{patch}a{alpha_num}' + elif phase == 'patch': + patch = patch + 1 if value == 'auto' else value + + return f'{major}.{minor}.{patch}' + elif phase == 'minor': + minor = minor + 1 if value == 'auto' else value + patch = 0 + + return f'{major}.{minor}.{patch}' + elif phase == 'major': + major = major + 1 if value == 'auto' else value + minor = 0 + patch = 0 + + return f'{major}.{minor}.{patch}' + + +if __name__ == '__main__': + main() diff --git a/collection_manager/VERSION.txt b/collection_manager/VERSION.txt new file mode 100644 index 0000000..b81029a --- /dev/null +++ b/collection_manager/VERSION.txt @@ -0,0 +1 @@ +0.1.6a14 \ No newline at end of file diff --git a/collection_manager/collection_manager/__init__.py b/collection_manager/collection_manager/__init__.py index 899e700..e69de29 100644 --- a/collection_manager/collection_manager/__init__.py +++ b/collection_manager/collection_manager/__init__.py @@ -1 +0,0 @@ -__version__ = '1.0.0.dev0' diff --git a/collection_manager/collection_manager/entities/Collection.py b/collection_manager/collection_manager/entities/Collection.py index 0875e7a..20738f4 100644 --- a/collection_manager/collection_manager/entities/Collection.py +++ b/collection_manager/collection_manager/entities/Collection.py @@ -28,6 +28,7 @@ class Collection: path: str historical_priority: int forward_processing_priority: Optional[int] = None + preprocess: Optional[str] = None date_from: Optional[datetime] = None date_to: Optional[datetime] = None @@ -69,6 +70,7 @@ def from_dict(properties: dict): path=properties['path'], historical_priority=properties['priority'], forward_processing_priority=properties.get('forward-processing-priority', None), + preprocess=properties.get('preprocess', None), date_to=date_to, date_from=date_from) return collection diff --git a/collection_manager/collection_manager/services/CollectionProcessor.py b/collection_manager/collection_manager/services/CollectionProcessor.py index a328243..ed2e151 100644 --- a/collection_manager/collection_manager/services/CollectionProcessor.py +++ b/collection_manager/collection_manager/services/CollectionProcessor.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -SUPPORTED_FILE_EXTENSIONS = ['.nc', '.nc4', '.h5'] +SUPPORTED_FILE_EXTENSIONS = ['.nc', '.nc4', '.h5', '.bz2'] class CollectionProcessor: diff --git a/collection_manager/setup.py b/collection_manager/setup.py index e1178f8..1af70d8 100644 --- a/collection_manager/setup.py +++ b/collection_manager/setup.py @@ -4,9 +4,8 @@ PACKAGE_NAME = "sdap_collection_manager" -with open("./collection_manager/__init__.py") as fi: - result = re.search(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', fi.read()) -version = result.group(1) +with open('VERSION.txt', 'r') as f: + __version__ = f.readline() with open("README.md", "r") as fh: long_description = fh.read() @@ -16,7 +15,7 @@ setuptools.setup( name=PACKAGE_NAME, - version=version, + version=__version__, author="Apache - SDAP", author_email="dev@sdap.apache.org", description="a helper to ingest data in sdap", diff --git a/granule_ingester/VERSION.txt b/granule_ingester/VERSION.txt new file mode 100644 index 0000000..fc27d1a --- /dev/null +++ b/granule_ingester/VERSION.txt @@ -0,0 +1 @@ +0.1.6a30 \ No newline at end of file diff --git a/granule_ingester/granule_ingester/pipeline/Pipeline.py b/granule_ingester/granule_ingester/pipeline/Pipeline.py index abc07a0..b12f548 100644 --- a/granule_ingester/granule_ingester/pipeline/Pipeline.py +++ b/granule_ingester/granule_ingester/pipeline/Pipeline.py @@ -28,6 +28,7 @@ from granule_ingester.granule_loaders import GranuleLoader from granule_ingester.pipeline.Modules import \ modules as processor_module_mappings +from granule_ingester.preprocessors import get_preprocessor from granule_ingester.processors.TileProcessor import TileProcessor from granule_ingester.slicers import TileSlicer from granule_ingester.writers import DataStore, MetadataStore @@ -95,13 +96,16 @@ def __init__(self, data_store_factory, metadata_store_factory, tile_processors: List[TileProcessor], - max_concurrency: int): + max_concurrency: int, + preprocessor=None + ): self._granule_loader = granule_loader self._tile_processors = tile_processors self._slicer = slicer self._data_store_factory = data_store_factory self._metadata_store_factory = metadata_store_factory self._max_concurrency = max_concurrency + self._preprocessor = preprocessor # Create a SyncManager so that we can to communicate exceptions from the # worker processes back to the main process. @@ -142,6 +146,9 @@ def _build_pipeline(cls, try: granule_loader = GranuleLoader(**config['granule']) + if 'preprocess' in config: + preprocessor = get_preprocessor(config['preprocess']) + slicer_config = config['slicer'] slicer = cls._parse_module(slicer_config, module_mappings) @@ -155,7 +162,9 @@ def _build_pipeline(cls, data_store_factory, metadata_store_factory, tile_processors, - max_concurrency) + max_concurrency, + preprocessor + ) except PipelineBuildingError: raise except KeyError as e: @@ -182,6 +191,9 @@ async def run(self): async with self._granule_loader as (dataset, granule_name): start = time.perf_counter() + if self._preprocessor is not None: + dataset = self._preprocessor(dataset) + shared_memory = self._manager.Namespace() async with Pool(initializer=_init_worker, initargs=(self._tile_processors, diff --git a/granule_ingester/granule_ingester/preprocessors/__init__.py b/granule_ingester/granule_ingester/preprocessors/__init__.py new file mode 100644 index 0000000..51e7dfc --- /dev/null +++ b/granule_ingester/granule_ingester/preprocessors/__init__.py @@ -0,0 +1,8 @@ +import aquarius + +PREPROCESSORS = { + 'AQUARIUS': aquarius +} + +def get_preprocessor(name): + return PREPROCESSORS[name].run_preprocess \ No newline at end of file diff --git a/granule_ingester/granule_ingester/preprocessors/aquarius.py b/granule_ingester/granule_ingester/preprocessors/aquarius.py new file mode 100644 index 0000000..fe58db1 --- /dev/null +++ b/granule_ingester/granule_ingester/preprocessors/aquarius.py @@ -0,0 +1,38 @@ +import numpy as np +import xarray as xr + +def run_preprocess(dataset): + # Convert dimension names to lat/lon + l3m_data = dataset.data_vars['l3m_data'] + dims = l3m_data.sizes + dim_translations = {} + + for name, size in dims.items(): + if size == 180: + dim_translations[name] = 'lat' + elif size == 360: + dim_translations[name] = 'lon' + + dataset = dataset.rename_dims(dim_translations) + + # Generate lat/lon variables + lat_data = np.array([i for i in range(90, -90, -1)], np.int_) + lat = xr.Variable('lat', lat_data, { + 'standard_name': 'latitude', + 'long_name': 'latitude', + 'valid_min': -90, + 'valid_max': 90 + }) + + lon_data = np.array([i for i in range(-180, 180)], np.int_) + lon = xr.Variable('lon', lon_data, { + 'standard_name': 'longitude', + 'long_name': 'longitude', + 'valid_min': -180, + 'valid_max': 180 + }) + + dataset['lat'] = lat + dataset['lon'] = lon + + return dataset \ No newline at end of file diff --git a/granule_ingester/setup.py b/granule_ingester/setup.py index 2a5920e..c940bd9 100644 --- a/granule_ingester/setup.py +++ b/granule_ingester/setup.py @@ -5,13 +5,14 @@ with open('requirements.txt') as f: pip_requirements = f.readlines() +with open('VERSION.txt', 'r') as f: + __version__ = f.readline() + try: check_call(['conda', 'install', '-y', '-c', 'conda-forge', '--file', 'conda-requirements.txt']) except (CalledProcessError, IOError) as e: raise EnvironmentError("Error installing conda packages", e) -__version__ = '1.0.0-SNAPSHOT' - setup( name='sdap_granule_ingester', version=__version__,