diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
new file mode 100644
index 0000000..f701e41
--- /dev/null
+++ b/.github/workflows/validate.yml
@@ -0,0 +1,29 @@
+name: Validate DAP4 XML
+
+on:
+ push:
+ branches: [ master ]
+ pull_request:
+ branches: [ master ]
+
+jobs:
+ validate-dmr:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.12'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install lxml pytest
+
+ - name: Run DMR XML validation
+ run: |
+ pytest -v
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..476442c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,51 @@
+*.py[cod]
+
+# C extensions
+*.so
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+lib
+lib64
+
+# Installer logs
+pip-log.txt
+
+# ignore shell script at base level
+*.sh
+
+# Unit test / coverage reports
+.coverage
+.tox
+nosetests.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Vim
+*.swp
+
+.cache
+__pycache__
+tests/__pycache__
+
+# OS-X Finder
+*.DS_Store
+
+# IDEA Projects
+.idea
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..55aa715
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,29 @@
+repos:
+ - repo: https://github.com/psf/black
+ rev: 23.12.1
+ hooks:
+ - id: black
+ files: ^tests/.*\.py$
+
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.3.2
+ hooks:
+ - id: ruff
+ args: ["--fix"]
+ files: ^tests/.*\.py$
+
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.5.0
+ hooks:
+ - id: check-xml
+ files: |
+ ^dap4/dap4\.xsd$
+ ^tests/data/.*\.dmr$
+ - id: trailing-whitespace
+ files: |
+ ^dap4/.*$
+ ^tests/.*$
+ - id: end-of-file-fixer
+ files: |
+ ^dap4/.*$
+ ^tests/.*$
diff --git a/dap4/dap4.xsd b/dap4/dap4.xsd
index 71e95a4..b79155d 100644
--- a/dap4/dap4.xsd
+++ b/dap4/dap4.xsd
@@ -1,196 +1,278 @@
-
-
-
-
-
-
- Semantic restriction: xml attributes are allowed
- only on the root group, where both dapVersion and base are
- required and ns is optional.
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ DAP Variable Types
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is the XML representation of a DAP DMR object.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+ A Group is a lexical scoping tool used to replicate HDF5 and netCDF4
+ Groups. Each Group defines a lexical scope. Each dataset has at least one Group; if
+ only one is present, it may be anonymous. In this case, by convention, it\'s name
+ attribute should be \'anonymous\'.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This holds a dimension, a name and size, that may be shared between
+ and Array. SharedDimensions are lexically scoped.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ DAP Attribute Type
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When we want to embed arbitrary XML in a DMR use this node. This
+ functions like an attribute and appear in the same general place as an attribute,
+ but its contents are ignored by DAP software. Other software might find the
+ information useful.
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
+
+
+
+ DAP Base Type
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
@@ -200,22 +282,36 @@
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..897aa94
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,33 @@
+[tool.setuptools]
+packages = []
+
+[project]
+name = "dap4-validator"
+version = "0.1.0"
+description = "Test suite for validating DAP4 XML (DMR) documents against a schema."
+authors = [
+ { name = "Miguel Angel Jimenez-Urias", email = "mjimenez@opendap.org" }
+]
+license = { text = "MIT" }
+requires-python = ">=3.11"
+
+# Core dependencies only
+dependencies = [
+ "lxml>=4.9.3",
+ "pytest>=7.0",
+]
+
+[project.optional-dependencies]
+dev = [
+ "ruff>=0.1.0",
+ "black>=23.0",
+]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "-v"
+
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
diff --git a/tests/data/Attributes_BaseTypes.dmr b/tests/data/Attributes_BaseTypes.dmr
new file mode 100644
index 0000000..31a87a8
--- /dev/null
+++ b/tests/data/Attributes_BaseTypes.dmr
@@ -0,0 +1,45 @@
+
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ 1
+
+
+ Data
+
+
+ URL here
+
+
diff --git a/tests/data/Attributes_test1.dmr b/tests/data/Attributes_test1.dmr
new file mode 100644
index 0000000..6f1d568
--- /dev/null
+++ b/tests/data/Attributes_test1.dmr
@@ -0,0 +1,18 @@
+
+
+
+ DMR for testing Maps, Dims at root level (no Groups, Sequences or Structures).
+
+
+ 1
+
+
+ 1
+ 2
+ 3
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/Attributes_test2.dmr b/tests/data/Attributes_test2.dmr
new file mode 100644
index 0000000..59a583d
--- /dev/null
+++ b/tests/data/Attributes_test2.dmr
@@ -0,0 +1,8 @@
+
+
+
+
+ DODS FreeFrom based on FFND release 4.2.3
+
+
+
diff --git a/tests/data/Attributes_test3.dmr b/tests/data/Attributes_test3.dmr
new file mode 100644
index 0000000..ee90bdd
--- /dev/null
+++ b/tests/data/Attributes_test3.dmr
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/Attributes_test4.dmr b/tests/data/Attributes_test4.dmr
new file mode 100644
index 0000000..971834e
--- /dev/null
+++ b/tests/data/Attributes_test4.dmr
@@ -0,0 +1,28 @@
+
+
+
+
+ Passive soil moisture estimates onto a 36-km global Earth-fixed grid, based on radiometer measurements acquired when the SMAP spacecraft is travelling from North to South at approximately 6:00 AM local time.
+
+
+ File_001.h5
+ File_002.h5
+ File_003.h5
+ File_004.h5
+ File_005.h5
+
+
+ L2Data
+
+
+ 2017-01-04
+ 2017-01-04
+ 2017-01-04
+ 2017-01-04
+ 2017-01-05
+
+
+ 36.
+
+
+
\ No newline at end of file
diff --git a/tests/data/Dataset_Declaration.dmr b/tests/data/Dataset_Declaration.dmr
new file mode 100644
index 0000000..48be688
--- /dev/null
+++ b/tests/data/Dataset_Declaration.dmr
@@ -0,0 +1,3 @@
+
+
+
\ No newline at end of file
diff --git a/tests/data/Enum_test1.dmr b/tests/data/Enum_test1.dmr
new file mode 100644
index 0000000..0deef8f
--- /dev/null
+++ b/tests/data/Enum_test1.dmr
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/Enum_test2.dmr b/tests/data/Enum_test2.dmr
new file mode 100644
index 0000000..8fbf934
--- /dev/null
+++ b/tests/data/Enum_test2.dmr
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/Enum_test3.dmr b/tests/data/Enum_test3.dmr
new file mode 100644
index 0000000..770c65e
--- /dev/null
+++ b/tests/data/Enum_test3.dmr
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/GroupStructureSequence.dmr b/tests/data/GroupStructureSequence.dmr
new file mode 100644
index 0000000..b557988
--- /dev/null
+++ b/tests/data/GroupStructureSequence.dmr
@@ -0,0 +1,22 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/GroupTest1.dmr b/tests/data/GroupTest1.dmr
new file mode 100644
index 0000000..67c2223
--- /dev/null
+++ b/tests/data/GroupTest1.dmr
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+ DMR for testing Maps, Dims at root level (no Groups, Sequences or Structures).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ DMR for testing Maps, Dims at root level (no Groups, Sequences or Structures).
+
+
+
\ No newline at end of file
diff --git a/tests/data/Invalid_BaseType_Dim.dmr b/tests/data/Invalid_BaseType_Dim.dmr
new file mode 100644
index 0000000..5ad12a8
--- /dev/null
+++ b/tests/data/Invalid_BaseType_Dim.dmr
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/MapsArraysOnly.dmr b/tests/data/MapsArraysOnly.dmr
new file mode 100644
index 0000000..b36a9aa
--- /dev/null
+++ b/tests/data/MapsArraysOnly.dmr
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ DMR for testing Maps, Dims at root level (no Groups, Sequences or Structures).
+
+
\ No newline at end of file
diff --git a/tests/data/NestedGroup.dmr b/tests/data/NestedGroup.dmr
new file mode 100644
index 0000000..94f296e
--- /dev/null
+++ b/tests/data/NestedGroup.dmr
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/NestedStructure.dmr b/tests/data/NestedStructure.dmr
new file mode 100644
index 0000000..f11f0a6
--- /dev/null
+++ b/tests/data/NestedStructure.dmr
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/OpaqueTest.dmr b/tests/data/OpaqueTest.dmr
new file mode 100644
index 0000000..26fe96a
--- /dev/null
+++ b/tests/data/OpaqueTest.dmr
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/Structure_test.dmr b/tests/data/Structure_test.dmr
new file mode 100644
index 0000000..9614ef5
--- /dev/null
+++ b/tests/data/Structure_test.dmr
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/Structure_test2.dmr b/tests/data/Structure_test2.dmr
new file mode 100644
index 0000000..cdbde25
--- /dev/null
+++ b/tests/data/Structure_test2.dmr
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/data/ValidBaseTypes.dmr b/tests/data/ValidBaseTypes.dmr
new file mode 100644
index 0000000..13081ad
--- /dev/null
+++ b/tests/data/ValidBaseTypes.dmr
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/testSequence1.dmr b/tests/data/testSequence1.dmr
new file mode 100644
index 0000000..1353067
--- /dev/null
+++ b/tests/data/testSequence1.dmr
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+ Test sequence.
+
+
\ No newline at end of file
diff --git a/tests/data/testSequence2.dmr b/tests/data/testSequence2.dmr
new file mode 100644
index 0000000..4a0d2df
--- /dev/null
+++ b/tests/data/testSequence2.dmr
@@ -0,0 +1,16 @@
+
+
+
+ Test sequence.
+
+
+
+
+
+
+
+
+ Test sequence.
+
+
+
\ No newline at end of file
diff --git a/tests/test_validate_dmrs.py b/tests/test_validate_dmrs.py
new file mode 100644
index 0000000..e622fc7
--- /dev/null
+++ b/tests/test_validate_dmrs.py
@@ -0,0 +1,44 @@
+from lxml import etree
+import pytest
+from pathlib import Path
+from validate_dmr_semantics import validate_dim_semantics
+
+# Path to this test file
+TEST_DIR = Path(__file__).resolve().parent
+
+# Path to project root
+ROOT_DIR = TEST_DIR.parent
+# Path to the schema
+SCHEMA_PATH = ROOT_DIR / "dap4" / "dap4.xsd"
+# Path to test dmrs
+DATA_DIR = TEST_DIR / "data"
+DMR_PATHS = list(DATA_DIR.glob("*.dmr"))
+
+
+@pytest.fixture(scope="session")
+def dap4_schema():
+ """Load and compile the DAP4 XML schema."""
+ with open(SCHEMA_PATH, "rb") as f:
+ schema_doc = etree.parse(f)
+ return etree.XMLSchema(schema_doc)
+
+
+@pytest.mark.parametrize("dmr_file", DMR_PATHS)
+def test_valid_dmrs(dap4_schema, dmr_file):
+ if not dmr_file.name.startswith("Invalid"):
+ doc = etree.parse(str(dmr_file))
+ # XSD validation
+ dap4_schema.assertValid(doc)
+ # Semantic validation
+ validate_dim_semantics(doc)
+
+
+def test_fail_validate_dim_BaseType(dap4_schema):
+ dmr_file = DATA_DIR / "Invalid_BaseType_Dim.dmr"
+
+ if dmr_file.name.startswith("Invalid"):
+ doc = etree.parse(str(dmr_file))
+ # XSD validation
+ dap4_schema.assertValid(doc)
+ with pytest.raises(ValueError):
+ validate_dim_semantics(doc)
diff --git a/tests/validate_dmr_semantics.py b/tests/validate_dmr_semantics.py
new file mode 100644
index 0000000..343e647
--- /dev/null
+++ b/tests/validate_dmr_semantics.py
@@ -0,0 +1,65 @@
+from lxml import etree
+
+DAP4_NS = "http://xml.opendap.org/ns/DAP/4.0#"
+NS = {"d": DAP4_NS}
+
+BASE_TYPES = {
+ "Byte",
+ "SignedByte",
+ "Int16",
+ "UInt16",
+ "Int32",
+ "UInt32",
+ "Int64",
+ "UInt64",
+ "Float32",
+ "Float64",
+ "String",
+ "Url",
+ "Opaque",
+ "Structure",
+}
+
+
+DAP4_NS = "http://xml.opendap.org/ns/DAP/4.0#"
+NS = {"d": DAP4_NS}
+
+BASE_TYPES = {
+ "Byte",
+ "SignedByte",
+ "Int16",
+ "UInt16",
+ "Int32",
+ "UInt32",
+ "Int64",
+ "UInt64",
+ "Float32",
+ "Float64",
+ "String",
+ "Url",
+ "Opaque",
+ "Structure",
+}
+
+
+def validate_dim_semantics(doc):
+ """
+ Enforce: Every inside BaseTypes must have (name or size),
+ and cannot omit both.
+ """
+ root = doc.getroot()
+
+ for tag in BASE_TYPES:
+ for base in root.xpath(f"//d:{tag}", namespaces=NS):
+ dims = base.xpath(".//d:Dim", namespaces=NS)
+
+ for dim in dims:
+ name = dim.get("name")
+ size = dim.get("size")
+
+ # must have one or the other
+ if name is None and size is None:
+ raise ValueError(
+ f" in base type <{tag}> must have either @name or @size: "
+ f"{etree.tostring(dim, encoding='UTF-8')}"
+ )