diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml new file mode 100644 index 0000000..f701e41 --- /dev/null +++ b/.github/workflows/validate.yml @@ -0,0 +1,29 @@ +name: Validate DAP4 XML + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + validate-dmr: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install lxml pytest + + - name: Run DMR XML validation + run: | + pytest -v diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..476442c --- /dev/null +++ b/.gitignore @@ -0,0 +1,51 @@ +*.py[cod] + +# C extensions +*.so + +# Packages +*.egg +*.egg-info +dist +build +eggs +parts +bin +var +sdist +develop-eggs +.installed.cfg +lib +lib64 + +# Installer logs +pip-log.txt + +# ignore shell script at base level +*.sh + +# Unit test / coverage reports +.coverage +.tox +nosetests.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Vim +*.swp + +.cache +__pycache__ +tests/__pycache__ + +# OS-X Finder +*.DS_Store + +# IDEA Projects +.idea diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..55aa715 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,29 @@ +repos: + - repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black + files: ^tests/.*\.py$ + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.3.2 + hooks: + - id: ruff + args: ["--fix"] + files: ^tests/.*\.py$ + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-xml + files: | + ^dap4/dap4\.xsd$ + ^tests/data/.*\.dmr$ + - id: trailing-whitespace + files: | + ^dap4/.*$ + ^tests/.*$ + - id: end-of-file-fixer + files: | + ^dap4/.*$ + ^tests/.*$ diff --git a/dap4/dap4.xsd b/dap4/dap4.xsd index 71e95a4..b79155d 100644 --- a/dap4/dap4.xsd +++ b/dap4/dap4.xsd @@ -1,196 +1,278 @@ - - - - - - - Semantic restriction: xml attributes are allowed - only on the root group, where both dapVersion and base are - required and ns is optional. - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DAP Variable Types + + + + + + + + + + + + + + + + + + + + + + + + + + + + This is the XML representation of a DAP DMR object. + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - + + + + + A Group is a lexical scoping tool used to replicate HDF5 and netCDF4 + Groups. Each Group defines a lexical scope. Each dataset has at least one Group; if + only one is present, it may be anonymous. In this case, by convention, it\'s name + attribute should be \'anonymous\'. + + + + + + + + + + + + + + + This holds a dimension, a name and size, that may be shared between + and Array. SharedDimensions are lexically scoped. + + + + + + + + + + + + + + + + + + + DAP Attribute Type + + + + + + + + + + + + + + + + + + + + + - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + When we want to embed arbitrary XML in a DMR use this node. This + functions like an attribute and appear in the same general place as an attribute, + but its contents are ignored by DAP software. Other software might find the + information useful. + + + + + + - - - - - - - - - + + + + DAP Base Type + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + - - - - - - - - - - - - + + + + + + + - - - - - - - - - + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + @@ -200,22 +282,36 @@ - - - - - - - - - + + + + + + + + + + + + + + + + + - - - - - - - - + + + + + + + + + + + + + + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..897aa94 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,33 @@ +[tool.setuptools] +packages = [] + +[project] +name = "dap4-validator" +version = "0.1.0" +description = "Test suite for validating DAP4 XML (DMR) documents against a schema." +authors = [ + { name = "Miguel Angel Jimenez-Urias", email = "mjimenez@opendap.org" } +] +license = { text = "MIT" } +requires-python = ">=3.11" + +# Core dependencies only +dependencies = [ + "lxml>=4.9.3", + "pytest>=7.0", +] + +[project.optional-dependencies] +dev = [ + "ruff>=0.1.0", + "black>=23.0", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "-v" + +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + diff --git a/tests/data/Attributes_BaseTypes.dmr b/tests/data/Attributes_BaseTypes.dmr new file mode 100644 index 0000000..31a87a8 --- /dev/null +++ b/tests/data/Attributes_BaseTypes.dmr @@ -0,0 +1,45 @@ + + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + 1 + + + Data + + + URL here + + diff --git a/tests/data/Attributes_test1.dmr b/tests/data/Attributes_test1.dmr new file mode 100644 index 0000000..6f1d568 --- /dev/null +++ b/tests/data/Attributes_test1.dmr @@ -0,0 +1,18 @@ + + + + DMR for testing Maps, Dims at root level (no Groups, Sequences or Structures). + + + 1 + + + 1 + 2 + 3 + + + + + + \ No newline at end of file diff --git a/tests/data/Attributes_test2.dmr b/tests/data/Attributes_test2.dmr new file mode 100644 index 0000000..59a583d --- /dev/null +++ b/tests/data/Attributes_test2.dmr @@ -0,0 +1,8 @@ + + + + + DODS FreeFrom based on FFND release 4.2.3 + + + diff --git a/tests/data/Attributes_test3.dmr b/tests/data/Attributes_test3.dmr new file mode 100644 index 0000000..ee90bdd --- /dev/null +++ b/tests/data/Attributes_test3.dmr @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/tests/data/Attributes_test4.dmr b/tests/data/Attributes_test4.dmr new file mode 100644 index 0000000..971834e --- /dev/null +++ b/tests/data/Attributes_test4.dmr @@ -0,0 +1,28 @@ + + + + + Passive soil moisture estimates onto a 36-km global Earth-fixed grid, based on radiometer measurements acquired when the SMAP spacecraft is travelling from North to South at approximately 6:00 AM local time. + + + File_001.h5 + File_002.h5 + File_003.h5 + File_004.h5 + File_005.h5 + + + L2Data + + + 2017-01-04 + 2017-01-04 + 2017-01-04 + 2017-01-04 + 2017-01-05 + + + 36. + + + \ No newline at end of file diff --git a/tests/data/Dataset_Declaration.dmr b/tests/data/Dataset_Declaration.dmr new file mode 100644 index 0000000..48be688 --- /dev/null +++ b/tests/data/Dataset_Declaration.dmr @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/tests/data/Enum_test1.dmr b/tests/data/Enum_test1.dmr new file mode 100644 index 0000000..0deef8f --- /dev/null +++ b/tests/data/Enum_test1.dmr @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/Enum_test2.dmr b/tests/data/Enum_test2.dmr new file mode 100644 index 0000000..8fbf934 --- /dev/null +++ b/tests/data/Enum_test2.dmr @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/Enum_test3.dmr b/tests/data/Enum_test3.dmr new file mode 100644 index 0000000..770c65e --- /dev/null +++ b/tests/data/Enum_test3.dmr @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/GroupStructureSequence.dmr b/tests/data/GroupStructureSequence.dmr new file mode 100644 index 0000000..b557988 --- /dev/null +++ b/tests/data/GroupStructureSequence.dmr @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/GroupTest1.dmr b/tests/data/GroupTest1.dmr new file mode 100644 index 0000000..67c2223 --- /dev/null +++ b/tests/data/GroupTest1.dmr @@ -0,0 +1,36 @@ + + + + + + + + + + + + DMR for testing Maps, Dims at root level (no Groups, Sequences or Structures). + + + + + + + + + + + + + + + + + + + + + DMR for testing Maps, Dims at root level (no Groups, Sequences or Structures). + + + \ No newline at end of file diff --git a/tests/data/Invalid_BaseType_Dim.dmr b/tests/data/Invalid_BaseType_Dim.dmr new file mode 100644 index 0000000..5ad12a8 --- /dev/null +++ b/tests/data/Invalid_BaseType_Dim.dmr @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/tests/data/MapsArraysOnly.dmr b/tests/data/MapsArraysOnly.dmr new file mode 100644 index 0000000..b36a9aa --- /dev/null +++ b/tests/data/MapsArraysOnly.dmr @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + DMR for testing Maps, Dims at root level (no Groups, Sequences or Structures). + + \ No newline at end of file diff --git a/tests/data/NestedGroup.dmr b/tests/data/NestedGroup.dmr new file mode 100644 index 0000000..94f296e --- /dev/null +++ b/tests/data/NestedGroup.dmr @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/tests/data/NestedStructure.dmr b/tests/data/NestedStructure.dmr new file mode 100644 index 0000000..f11f0a6 --- /dev/null +++ b/tests/data/NestedStructure.dmr @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/tests/data/OpaqueTest.dmr b/tests/data/OpaqueTest.dmr new file mode 100644 index 0000000..26fe96a --- /dev/null +++ b/tests/data/OpaqueTest.dmr @@ -0,0 +1,9 @@ + + + + + + + \ No newline at end of file diff --git a/tests/data/Structure_test.dmr b/tests/data/Structure_test.dmr new file mode 100644 index 0000000..9614ef5 --- /dev/null +++ b/tests/data/Structure_test.dmr @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/Structure_test2.dmr b/tests/data/Structure_test2.dmr new file mode 100644 index 0000000..cdbde25 --- /dev/null +++ b/tests/data/Structure_test2.dmr @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/ValidBaseTypes.dmr b/tests/data/ValidBaseTypes.dmr new file mode 100644 index 0000000..13081ad --- /dev/null +++ b/tests/data/ValidBaseTypes.dmr @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/testSequence1.dmr b/tests/data/testSequence1.dmr new file mode 100644 index 0000000..1353067 --- /dev/null +++ b/tests/data/testSequence1.dmr @@ -0,0 +1,12 @@ + + + + + + + + + + Test sequence. + + \ No newline at end of file diff --git a/tests/data/testSequence2.dmr b/tests/data/testSequence2.dmr new file mode 100644 index 0000000..4a0d2df --- /dev/null +++ b/tests/data/testSequence2.dmr @@ -0,0 +1,16 @@ + + + + Test sequence. + + + + + + + + + Test sequence. + + + \ No newline at end of file diff --git a/tests/test_validate_dmrs.py b/tests/test_validate_dmrs.py new file mode 100644 index 0000000..e622fc7 --- /dev/null +++ b/tests/test_validate_dmrs.py @@ -0,0 +1,44 @@ +from lxml import etree +import pytest +from pathlib import Path +from validate_dmr_semantics import validate_dim_semantics + +# Path to this test file +TEST_DIR = Path(__file__).resolve().parent + +# Path to project root +ROOT_DIR = TEST_DIR.parent +# Path to the schema +SCHEMA_PATH = ROOT_DIR / "dap4" / "dap4.xsd" +# Path to test dmrs +DATA_DIR = TEST_DIR / "data" +DMR_PATHS = list(DATA_DIR.glob("*.dmr")) + + +@pytest.fixture(scope="session") +def dap4_schema(): + """Load and compile the DAP4 XML schema.""" + with open(SCHEMA_PATH, "rb") as f: + schema_doc = etree.parse(f) + return etree.XMLSchema(schema_doc) + + +@pytest.mark.parametrize("dmr_file", DMR_PATHS) +def test_valid_dmrs(dap4_schema, dmr_file): + if not dmr_file.name.startswith("Invalid"): + doc = etree.parse(str(dmr_file)) + # XSD validation + dap4_schema.assertValid(doc) + # Semantic validation + validate_dim_semantics(doc) + + +def test_fail_validate_dim_BaseType(dap4_schema): + dmr_file = DATA_DIR / "Invalid_BaseType_Dim.dmr" + + if dmr_file.name.startswith("Invalid"): + doc = etree.parse(str(dmr_file)) + # XSD validation + dap4_schema.assertValid(doc) + with pytest.raises(ValueError): + validate_dim_semantics(doc) diff --git a/tests/validate_dmr_semantics.py b/tests/validate_dmr_semantics.py new file mode 100644 index 0000000..343e647 --- /dev/null +++ b/tests/validate_dmr_semantics.py @@ -0,0 +1,65 @@ +from lxml import etree + +DAP4_NS = "http://xml.opendap.org/ns/DAP/4.0#" +NS = {"d": DAP4_NS} + +BASE_TYPES = { + "Byte", + "SignedByte", + "Int16", + "UInt16", + "Int32", + "UInt32", + "Int64", + "UInt64", + "Float32", + "Float64", + "String", + "Url", + "Opaque", + "Structure", +} + + +DAP4_NS = "http://xml.opendap.org/ns/DAP/4.0#" +NS = {"d": DAP4_NS} + +BASE_TYPES = { + "Byte", + "SignedByte", + "Int16", + "UInt16", + "Int32", + "UInt32", + "Int64", + "UInt64", + "Float32", + "Float64", + "String", + "Url", + "Opaque", + "Structure", +} + + +def validate_dim_semantics(doc): + """ + Enforce: Every inside BaseTypes must have (name or size), + and cannot omit both. + """ + root = doc.getroot() + + for tag in BASE_TYPES: + for base in root.xpath(f"//d:{tag}", namespaces=NS): + dims = base.xpath(".//d:Dim", namespaces=NS) + + for dim in dims: + name = dim.get("name") + size = dim.get("size") + + # must have one or the other + if name is None and size is None: + raise ValueError( + f" in base type <{tag}> must have either @name or @size: " + f"{etree.tostring(dim, encoding='UTF-8')}" + )