diff --git a/src/fairscape_cli/models/__init__.py b/src/fairscape_cli/models/__init__.py
index bdc45ab..41a2ef8 100644
--- a/src/fairscape_cli/models/__init__.py
+++ b/src/fairscape_cli/models/__init__.py
@@ -1,6 +1,8 @@
 from fairscape_cli.models.dataset import (
     Dataset,
-    GenerateDataset
+    GenerateDataset,
+    generateSummaryStatsElements,
+    registerOutputs
 )
 from fairscape_cli.models.software import Software, GenerateSoftware
 from fairscape_cli.models.computation import Computation, GenerateComputation
@@ -9,13 +11,16 @@
         GenerateROCrate,
         ReadROCrateMetadata, 
         AppendCrate, 
-        CopyToROCrate
+        CopyToROCrate,
+        UpdateCrate
 )
 from fairscape_cli.models.bagit import BagIt
 
 __all__ = [
     'Dataset',
     'GenerateDataset',
+    'generateSummaryStatsElements',
+    'registerOutputs',
     'Software',
     'GenerateSoftware',
     'Computation',
@@ -25,5 +30,6 @@
     'ReadROCrateMetadata',
     'AppendCrate',
     'CopyToROCrate',
+    'UpdateCrate',
     'BagIt'
 ]
diff --git a/src/fairscape_cli/models/computation.py b/src/fairscape_cli/models/computation.py
index fa3f7f3..24e67ac 100644
--- a/src/fairscape_cli/models/computation.py
+++ b/src/fairscape_cli/models/computation.py
@@ -1,19 +1,12 @@
-from fairscape_cli.models.base import FairscapeBaseModel
-from fairscape_cli.models.utils import GenerateDatetimeSquid
-from fairscape_cli.config import NAAN
-
-from typing import (
-    Optional,
-    List,
-    Union,
-    Dict,
-)
-from pydantic import (
-    Field,
-    AnyUrl
-)
 import re
 from datetime import datetime
+from typing import Optional, List, Union, Dict
+
+from pydantic import Field, AnyUrl
+
+from fairscape_cli.config import NAAN
+from fairscape_cli.models.base import FairscapeBaseModel
+from fairscape_cli.models.guid_utils import GenerateDatetimeSquid
 
 
 class Computation(FairscapeBaseModel):
@@ -67,7 +60,7 @@ def GenerateComputation(
     computation_model = Computation.model_validate(   
         {
         "@id": guid,
-        "@type": "https://w2id.org/EVI#Computation",
+        "@type": "https://w3id.org/EVI#Computation",
         "name": name,
         "description": description,
         "keywords": keywords,
diff --git a/src/fairscape_cli/models/dataset.py b/src/fairscape_cli/models/dataset.py
index 1ca3a8f..9f662ff 100644
--- a/src/fairscape_cli/models/dataset.py
+++ b/src/fairscape_cli/models/dataset.py
@@ -1,22 +1,7 @@
-from fairscape_cli.models.base import (
-    FairscapeBaseModel,
-    Identifier
-)
-from fairscape_cli.config import (
-    NAAN
-)
-from fairscape_cli.models.utils import GenerateDatetimeSquid, FileNotInCrateException
-from fairscape_cli.models.schema.tabular import (
-    TabularValidationSchema
-)
-
+# Standard library imports
 import pathlib
-from typing import (
-    Optional,
-    List,
-    Union,
-    Dict
-)
+from datetime import datetime
+from typing import Optional, List, Union, Dict, Tuple, Set
 
 from pydantic import (
     BaseModel,
@@ -25,7 +10,10 @@
     AnyUrl,
     field_serializer
 )
-from datetime import datetime
+
+from fairscape_cli.models.base import FairscapeBaseModel
+from fairscape_cli.models.guid_utils import GenerateDatetimeSquid
+from fairscape_cli.config import NAAN
 
 
 class Dataset(FairscapeBaseModel):
@@ -44,6 +32,7 @@ class Dataset(FairscapeBaseModel):
     derivedFrom: Optional[List[str]] = Field(default=[])
     usedBy: Optional[List[str]] = Field(default=[])
     contentUrl: Optional[str] = Field(default=None)
+    hasSummaryStatistics: Optional[Union[str, List[str]]] = Field(default=None)
 
     #@field_serializer('datePublished')
     #def serialize_date_published(self, datePublished: datetime):
@@ -68,11 +57,13 @@ def GenerateDataset(
     usedBy: Optional[List[str]],
     generatedBy: Optional[List[str]],
     filepath: Optional[str],
-    cratePath
+    cratePath,
+    summary_stats_guid: Optional[str] = None
     ):
    
-    sq = GenerateDatetimeSquid()
-    guid = f"ark:{NAAN}/dataset-{name.lower().replace(' ', '-')}-{sq}"
+    if not guid:
+        sq = GenerateDatetimeSquid()
+        guid = f"ark:{NAAN}/dataset-{name.lower().replace(' ', '-')}-{sq}"
     
     datasetMetadata = {
             "@id": guid,
@@ -88,22 +79,14 @@ def GenerateDataset(
             "additionalDocumentation": additionalDocumentation,
             "format": dataFormat,
             "schema": schema,
-            # sanitize input lists of newline breaks
-            "derivedFrom": [
-                derived.strip("\n") for derived in derivedFrom
-            ],
-            "usedBy": [
-                used.strip("\n") for used in usedBy 
-            ],
-            "generatedBy": [
-                gen.strip("\n") for gen in generatedBy
-            ]
+            "derivedFrom": [derived.strip("\n") for derived in derivedFrom],
+            "usedBy": [used.strip("\n") for used in usedBy],
+            "generatedBy": [gen.strip("\n") for gen in generatedBy],
+            "hasSummaryStatistics": summary_stats_guid
         }
 
-    datasetMetadata['contentURL'] = setRelativeFilepath(cratePath, filepath)
-
+    datasetMetadata['contentUrl'] = setRelativeFilepath(cratePath, filepath)
     datasetInstance = Dataset.model_validate(datasetMetadata)
-
     return datasetInstance
 
 
@@ -136,4 +119,117 @@ def setRelativeFilepath(cratePath, filePath):
     # if relative filepath
     datasetPath = pathlib.Path(filePath).absolute()
     relativePath = datasetPath.relative_to(rocratePath)
-    return f"file:///{str(relativePath)}"
\ No newline at end of file
+    return f"file:///{str(relativePath)}"
+
+
+from fairscape_cli.models.computation import GenerateComputation, Computation
+def generateSummaryStatsElements(
+    name: str,
+    author: str,
+    keywords: List[str],
+    date_published: str,
+    version: str,
+    associated_publication: Optional[str],
+    additional_documentation: Optional[str],
+    schema: Optional[str],
+    dataset_guid: str,
+    summary_statistics_filepath: str,
+    crate_path: pathlib.Path
+) -> Tuple[str, Dataset, Computation]:
+    """Generate summary statistics dataset and computation elements
+    
+    Args:
+        name: Name of the main dataset
+        author: Author of the dataset
+        keywords: Dataset keywords
+        date_published: Publication date
+        version: Dataset version
+        associated_publication: Optional associated publication
+        additional_documentation: Optional additional documentation
+        schema: Optional schema
+        dataset_guid: GUID of the main dataset
+        summary_statistics_filepath: Path to summary statistics file
+        crate_path: Path to RO-Crate
+        
+    Returns:
+        Tuple containing:
+        - Summary statistics GUID
+        - Summary statistics Dataset instance
+        - Computation instance that generated the summary statistics
+    """
+    # Generate GUIDs
+    sq_stats = GenerateDatetimeSquid()
+    summary_stats_guid = f"ark:{NAAN}/dataset-{name.lower().replace(' ', '-')}-stats-{sq_stats}"
+    
+    sq_comp = GenerateDatetimeSquid()
+    computation_guid = f"ark:{NAAN}/computation-{name.lower().replace(' ', '-')}-stats-{sq_comp}"
+    
+    # Create computation instance
+    computation_instance = GenerateComputation(
+        guid=computation_guid,
+        name=f"Summary Statistics Computation for {name}",
+        runBy=author,
+        command="",
+        dateCreated=date_published,
+        description=f"Computation that generated summary statistics for dataset: {name}",
+        keywords=keywords,
+        usedSoftware=[],
+        usedDataset=[dataset_guid],
+        generated=[summary_stats_guid]
+    )
+
+    # Create summary statistics dataset
+    summary_stats_instance = GenerateDataset(
+        guid=summary_stats_guid,
+        url=None,
+        author=author,
+        name=f"{name} - Summary Statistics",
+        description=f"Summary statistics for dataset: {name}",
+        keywords=keywords,
+        datePublished=date_published,
+        version=version,
+        associatedPublication=associated_publication,
+        additionalDocumentation=additional_documentation,
+        dataFormat='pdf',
+        schema=schema,
+        derivedFrom=[],
+        generatedBy=[computation_guid],
+        usedBy=[],
+        filepath=summary_statistics_filepath,
+        cratePath=crate_path,
+        summary_stats_guid=None
+    )
+    
+    return summary_stats_guid, summary_stats_instance, computation_instance
+
+def registerOutputs(
+    new_files: Set[pathlib.Path],
+    computation_id: str, 
+    dataset_id: str,
+    author: str
+) -> List[Dict]:
+    """Register all outputs as datasets"""
+    output_instances = []
+    for file_path in new_files:
+        file_path_str = str(file_path)
+        output_instance = GenerateDataset(
+            guid=None,
+            name=f"Statistics Output - {file_path.name}",
+            author=author,  # Use the original author
+            description=f"Statistical analysis output for {dataset_id}",
+            keywords=["statistics"],
+            datePublished=datetime.now().isoformat(),
+            version="1.0",
+            dataFormat=file_path.suffix[1:],
+            filepath=file_path_str,
+            cratePath=str(file_path.parent),
+            url=None,
+            associatedPublication=None,
+            additionalDocumentation=None,
+            schema=None,
+            derivedFrom=[],
+            usedBy=[],
+            generatedBy=[computation_id]
+        )
+        output_instances.append(output_instance)
+    return output_instances
\ No newline at end of file
diff --git a/src/fairscape_cli/models/guid_utils.py b/src/fairscape_cli/models/guid_utils.py
new file mode 100644
index 0000000..a85988f
--- /dev/null
+++ b/src/fairscape_cli/models/guid_utils.py
@@ -0,0 +1,31 @@
+from sqids import Sqids
+import random
+import datetime
+
+from typing import Set, Dict, List, Optional, Tuple
+
+from fairscape_cli.config import NAAN
+
+squids = Sqids(min_length=6)
+
+def GenerateDatetimeSquid():
+    try:
+        timestamp_int = int(datetime.datetime.now(datetime.UTC).timestamp())
+        sq = squids.encode([timestamp_int, random.randint(0, 10000)])
+    except:
+        timestamp_int = int(datetime.datetime.utcnow().timestamp())
+        sq = squids.encode([timestamp_int])
+    return sq
+
+def GenerateDatetimeGUID(prefix: str)->str:
+    try:
+        timestamp_int = int(datetime.datetime.now(datetime.UTC).timestamp())
+        sq = squids.encode([timestamp_int])
+    except:
+        timestamp_int = int(datetime.datetime.utcnow().timestamp())
+        sq = squids.encode([timestamp_int])
+    return f"ark:{NAAN}/{prefix}-{sq}"
+
+def GenerateGUID(data: List[int], prefix: str)-> str:
+    squid_encoded = squids.encode(data)
+    return f"ark:{NAAN}/{prefix}-{squid_encoded}"
\ No newline at end of file
diff --git a/src/fairscape_cli/models/rocrate.py b/src/fairscape_cli/models/rocrate.py
index 91c8ade..275c8b1 100644
--- a/src/fairscape_cli/models/rocrate.py
+++ b/src/fairscape_cli/models/rocrate.py
@@ -1,30 +1,16 @@
-from fairscape_cli.models import (
-    Software,
-    Dataset,
-    Computation
-)
-from fairscape_cli.models.utils import GenerateDatetimeSquid
-from fairscape_cli.config import (
-    DEFAULT_CONTEXT,
-    NAAN
-)
-
 import pathlib
 import shutil
 import json
+from typing import Optional, Union, List, Literal, Dict
+
 from prettytable import PrettyTable
-from pydantic import (
-    BaseModel,
-    computed_field,
-    Field,
-)
-from typing import (
-    Optional,
-    Union,
-    List,
-    Literal,
-    Dict
-)
+from pydantic import BaseModel, computed_field, Field
+
+from fairscape_cli.config import NAAN, DEFAULT_CONTEXT
+from fairscape_cli.models.software import Software
+from fairscape_cli.models.dataset import Dataset
+from fairscape_cli.models.computation import Computation
+from fairscape_cli.models.guid_utils import GenerateDatetimeSquid
 
 class ROCrateMetadata(BaseModel):
     guid: Optional[str] = Field(alias="@id", default=None)
@@ -321,3 +307,32 @@ def CopyToROCrate(source_filepath: str, destination_filepath: str):
     # copy the file into the destinationPath
     shutil.copy(source_path, destination_path)
 
+def UpdateCrate(
+    cratePath: pathlib.Path,
+    element: Union[Dataset, Software, Computation]
+):
+    """Update an existing element in the RO-Crate metadata by matching @id
+    
+    Args:
+        cratePath: Path to the RO-Crate directory or metadata file
+        element: Updated element to replace existing one with matching @id
+    """
+    if cratePath.is_dir():
+        cratePath = cratePath / 'ro-crate-metadata.json'
+
+    with cratePath.open("r+") as rocrate_metadata_file:
+        rocrate_metadata = json.load(rocrate_metadata_file)
+        
+        # Find and replace the element with matching @id
+        for i, existing in enumerate(rocrate_metadata['@graph']):
+            if existing.get('@id') == element.guid:
+                rocrate_metadata['@graph'][i] = element.model_dump(
+                    by_alias=True,
+                    exclude_none=True
+                )
+                break
+        
+        # Write back the updated metadata
+        rocrate_metadata_file.seek(0)
+        rocrate_metadata_file.truncate()
+        json.dump(rocrate_metadata, rocrate_metadata_file, indent=2)
\ No newline at end of file
diff --git a/src/fairscape_cli/models/schema/tabular.py b/src/fairscape_cli/models/schema/tabular.py
index 387444a..fda4551 100644
--- a/src/fairscape_cli/models/schema/tabular.py
+++ b/src/fairscape_cli/models/schema/tabular.py
@@ -1,17 +1,13 @@
-import jsonschema
 import pathlib
-from functools import lru_cache
 import os
 import json
 import pandas as pd
-import pyarrow.parquet as pq
-import pyarrow.compute as pc
 import h5py
+from datetime import datetime
 from enum import Enum
 from pydantic import (
     BaseModel,
     ConfigDict,
-    computed_field,
     Field,
     ValidationError,
     model_validator
@@ -19,21 +15,16 @@
 from typing import (
     Dict, 
     List, 
-    Optional,
-    Union,
+    Optional, 
     Literal,
-    Type
+    Union
 )
+from frictionless import Schema, Resource, describe, fields
+
 
 from fairscape_cli.models.schema.utils import (
-    GenerateSlice,
     PropertyNameException,
     ColumnIndexException,
-    map_arrow_type_to_json_schema
-)
-
-from fairscape_cli.models.utils import (
-    GenerateDatetimeSquid
 )
 
 from fairscape_cli.config import (
@@ -46,14 +37,11 @@ class FileType(str, Enum):
     CSV = "csv"
     TSV = "tsv"
     PARQUET = "parquet"
-    HDF5 = "h5"
     
     @classmethod
     def from_extension(cls, filepath: str) -> 'FileType':
-        ext = pathlib.Path(filepath).suffix.lower()[1:]  # Remove the dot
-        if ext == 'h5' or ext == 'hdf5':
-            return cls.HDF5
-        elif ext == 'parquet':
+        ext = pathlib.Path(filepath).suffix.lower()[1:]
+        if ext == 'parquet':
             return cls.PARQUET
         elif ext == 'tsv':
             return cls.TSV
@@ -62,6 +50,14 @@ def from_extension(cls, filepath: str) -> 'FileType':
         else:
             raise ValueError(f"Unsupported file extension: {ext}")
 
+class ValidationError(BaseModel):
+    message: str
+    row: Optional[int] = None
+    field: Optional[str] = None
+    type: str = "ValidationError"
+    failed_keyword: str
+    path: Optional[str] = None
+
 class DatatypeEnum(str, Enum):
     NULL = "null"
     BOOLEAN = "boolean"
@@ -142,250 +138,220 @@ def check_max_min(self) -> 'IntegerProperty':
                 raise ValueError('IntegerProperty attribute maximum !< minimum')
         return self
 
-class BaseSchema(BaseModel):
+def frictionless_type_to_json_schema(field_type: str) -> str:
+    """Convert Frictionless types to JSON Schema types"""
+    type_mapping = {
+        'string': 'string',
+        'integer': 'integer',
+        'number': 'number',
+        'boolean': 'boolean',
+        'date': 'string',
+        'datetime': 'string',
+        'year': 'integer',
+        'yearmonth': 'string',
+        'duration': 'string',
+        'geopoint': 'array',
+        'geojson': 'object',
+        'array': 'array',
+        'object': 'object',
+        'time': 'string'
+    }
+    return type_mapping.get(field_type, 'string')
+
+class TabularValidationSchema(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+    
     guid: Optional[str] = Field(alias="@id", default=None)
     context: Optional[Dict] = Field(default=DEFAULT_CONTEXT, alias="@context")
     metadataType: Optional[str] = Field(default=DEFAULT_SCHEMA_TYPE, alias="@type")
-    schema_version: str = Field(default="https://json-schema.org/draft/2020-12/schema", alias="schema")
+    schema_version: str = Field(default="https://json-schema.org/draft/2020-12/schema", alias="$schema")
     name: str
     description: str
     datatype: str = Field(default="object", alias="type")
+    separator: str = Field(description="Field separator for the file")
+    header: bool = Field(description="Do files of this schema have a header row", default=True)
+    required: List[str] = Field(default=[])
+    properties: Dict[str, Dict] = Field(default={})
     additionalProperties: bool = Field(default=True)
-    required: List[str] = Field(description="list of required properties by name", default=[])
-    examples: Optional[List[Dict[str, str]]] = Field(default=[])
+    
+    # Store the frictionless schema
+    _frictionless_schema: Optional[Schema] = None
 
     def generate_guid(self) -> str:
+        """Generate a unique identifier for the schema"""
         if self.guid is None:
             prefix = f"schema-{self.name.lower().replace(' ', '-')}"
-            sq = GenerateDatetimeSquid()
-            self.guid = f"ark:{NAAN}/{prefix}-{sq}"
+            timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+            self.guid = f"ark:{NAAN}/{prefix}-{timestamp}"
         return self.guid
-    
+
     @model_validator(mode='after')
-    def generate_all_guids(self) -> 'BaseSchema':
+    def generate_all_guids(self) -> 'TabularValidationSchema':
         """Generate GUIDs for this schema and any nested schemas"""
         self.generate_guid()
-        
-        # Generate GUIDs for any nested schemas in properties
-        if hasattr(self, 'properties'):
-            for prop in self.properties.values():
-                if isinstance(prop, BaseSchema):
-                    prop.generate_guid()
-        
         return self
-    
-    def to_json_schema(self) -> dict:
-        """Convert the HDF5Schema to JSON Schema format"""
-        schema = self.model_dump(
-            by_alias=True,
-            exclude_unset=True,
-            exclude_none=True
-        )
-        return schema
-
-PropertyUnion = Union[StringProperty, ArrayProperty, BooleanProperty, NumberProperty, IntegerProperty, NullProperty]
-class TabularValidationSchema(BaseSchema):
-    properties: Dict[str, PropertyUnion] = Field(default={})
-    separator: str = Field(description="Field separator for the file")
-    header: bool = Field(description="Do files of this schema have a header row", default=False)
 
     @classmethod
     def infer_from_file(cls, filepath: str, name: str, description: str, include_min_max: bool = False) -> 'TabularValidationSchema':
-        """Infer schema from a file"""
+        """Infer schema from a file using Frictionless"""
         file_type = FileType.from_extension(filepath)
+        separator = '\t' if file_type == FileType.TSV else ','
         
-        if file_type == FileType.PARQUET:
-            return cls.infer_from_parquet(name, description, None, filepath, include_min_max)
-        else:  # csv or tsv
-            separator = '\t' if file_type == FileType.TSV else ','
-            df = pd.read_csv(filepath, sep=separator)
-            return cls.infer_from_dataframe(df, name, description, include_min_max, separator)
-        
-    @classmethod
-    def infer_from_dataframe(cls, df: pd.DataFrame, name: str, description: str, include_min_max: bool = False, separator: str = ',') -> 'TabularValidationSchema':
-        """Infer schema from a pandas DataFrame"""
-        type_map = {
-            'int16': ('integer', IntegerProperty, int),
-            'int32': ('integer', IntegerProperty, int),
-            'int64': ('integer', IntegerProperty, int),
-            'uint8': ('integer', IntegerProperty, int),
-            'uint16': ('integer', IntegerProperty, int),
-            'uint32': ('integer', IntegerProperty, int),
-            'uint64': ('integer', IntegerProperty, int),
-            'float16': ('number', NumberProperty, float),
-            'float32': ('number', NumberProperty, float),
-            'float64': ('number', NumberProperty, float),
-            'bool': ('boolean', BooleanProperty, None),
-        }
+        resource = describe(filepath)
         
         properties = {}
-        for i, (column_name, dtype) in enumerate(df.dtypes.items()):
-            dtype_str = str(dtype)
-            datatype, property_class, converter = type_map.get(dtype_str, ('string', StringProperty, None))
+        required_fields = []
+        
+        for i, field in enumerate(resource.schema.fields):
+            json_schema_type = frictionless_type_to_json_schema(field.type)
             
-            kwargs = {
-                "datatype": datatype,
-                "description": f"Column {column_name}",
+            property_def = {
+                "type": json_schema_type,
+                "description": field.description or f"Column {field.name}",
                 "index": i
             }
-            
-            if include_min_max and converter:
-                kwargs.update({
-                    "minimum": converter(df[column_name].min()),
-                    "maximum": converter(df[column_name].max())
-                })
-                
-            properties[column_name] = property_class(**kwargs)
+                    
+            properties[field.name] = property_def
+            required_fields.append(field.name)
         
-        return cls(
+        # Create our schema instance
+        schema = cls(
             name=name,
             description=description,
-            properties=properties,
-            required=list(properties.keys()),
             separator=separator,
-            header=True
+            header=True,
+            properties=properties,
+            required=required_fields
         )
+        
+        # Store the frictionless schema for validation
+        schema._frictionless_schema = resource.schema
+        return schema
 
-    @classmethod
-    def infer_from_parquet(cls, name: str, description: str, guid: Optional[str], filepath: str, include_min_max: bool = False) -> 'TabularValidationSchema':
-        """Infer schema from a Parquet file"""
-        table = pq.read_table(filepath)
-        schema = table.schema
-        properties = {}
-
-        for i, field in enumerate(schema):
-            field_name = field.name
-            field_type = map_arrow_type_to_json_schema(field.type)
-            
-            if field_type == 'string':
-                properties[field_name] = StringProperty(
-                    datatype='string',
-                    description=f"Column {field_name}",
-                    index=i
-                )
-            elif field_type == 'integer':
-                if include_min_max:
-                    column = table.column(field_name)
-                    min_max = pc.min_max(column)
-                    properties[field_name] = IntegerProperty(
-                        datatype='integer',
-                        description=f"Column {field_name}",
-                        index=i,
-                        minimum=min_max['min'].as_py(),
-                        maximum=min_max['max'].as_py()
-                    )
-                else:
-                    properties[field_name] = IntegerProperty(
-                        datatype='integer',
-                        description=f"Column {field_name}",
-                        index=i
-                    )
-            elif field_type == 'number':
-                if include_min_max:
-                    column = table.column(field_name)
-                    min_max = pc.min_max(column)
-                    properties[field_name] = NumberProperty(
-                        datatype='number',
-                        description=f"Column {field_name}",
-                        index=i,
-                        minimum=min_max['min'].as_py(),
-                        maximum=min_max['max'].as_py()
+    def validate_file(self, filepath: str) -> List[ValidationError]:
+        """Validate a file against the schema using Frictionless"""
+        if not self._frictionless_schema:
+            raise ValueError("Schema not properly initialized")
+        
+        resource = Resource(
+            path=os.path.basename(filepath), 
+            basepath=os.path.dirname(filepath), 
+            schema=self._frictionless_schema
+        )
+        report = resource.validate()
+        
+        errors = []
+        for task in report.tasks:
+            for error in task.errors:
+                if isinstance(error, TypeError):
+                    validation_error = ValidationError(
+                        message=str(error),
+                        type="ValidationError",
+                        failed_keyword="type"
                     )
                 else:
-                    properties[field_name] = NumberProperty(
-                        datatype='number',
-                        description=f"Column {field_name}",
-                        index=i
+                    validation_error = ValidationError(
+                        message=error.message,
+                        row=error.row_number if hasattr(error, 'row_number') else None,
+                        field=error.field_name if hasattr(error, 'field_name') else None,
+                        failed_keyword=error.code if hasattr(error, 'code') else "error"
                     )
-            elif field_type == 'boolean':
-                properties[field_name] = BooleanProperty(
-                    datatype='boolean',
-                    description=f"Column {field_name}",
-                    index=i
-                )
+                errors.append(validation_error)
+                
+        return errors
 
-        return cls(
-            name=name,
-            description=description,
-            guid=guid,
-            properties=properties,
-            required=list(properties.keys()),
-            separator=",",  # Not used for parquet but required
-            header=True    # Not used for parquet but required
-        )
+    def to_dict(self) -> dict:
+        """Convert the schema to a dictionary format"""
+        return self.model_dump(by_alias=True, exclude={'_frictionless_schema'})
 
-    def validate_file(self, filepath: str) -> List[Dict]:
-        """Validate a file against the schema"""
-        file_type = FileType.from_extension(filepath)
+    @classmethod
+    def from_dict(cls, data: dict) -> 'TabularValidationSchema':
+        """Create a schema instance from a dictionary"""
+        properties = data.pop('properties', {})
+        required_fields = data.pop('required', [])
         
-        if file_type == FileType.PARQUET:
-            df = pd.read_parquet(filepath)
-        else:  # csv or tsv
-            sep = '\t' if file_type == FileType.TSV else self.separator
-            df = pd.read_csv(filepath, sep=sep, header=0 if self.header else None)
+        frictionless_schema = Schema()
         
-        return self.validate_dataframe(df)
-
-    def validate_dataframe(self, df: pd.DataFrame) -> List[Dict]:
-        """Validate a dataframe against the schema with lenient string type checking.
-        Only reports string validation errors for pattern mismatches, not type mismatches."""
-        json_schema = self.to_json_schema()
-        validator = jsonschema.Draft202012Validator(json_schema)
-        errors = []
-
-        for i, row in df.iterrows():
-            row_dict = row.to_dict()
-            validation_errors = sorted(validator.iter_errors(row_dict), key=lambda e: e.path)
+        type_to_field = {
+            'string': fields.StringField,
+            'integer': fields.IntegerField,
+            'number': fields.NumberField,
+            'boolean': fields.BooleanField,
+            'array': fields.ArrayField
+        }
+        
+        for name, prop in properties.items():
+            field_type = type_to_field.get(prop.get('type', 'string'), fields.StringField)
+            field = field_type(
+                name=name,
+                description=prop.get('description', ''),
+                constraints={}
+            )
             
-            for err in validation_errors:
-                # Skip type validation errors for string fields unless there's a pattern mismatch
-                if err.validator == "type":
-                    field_name = list(err.path)[-1] if err.path else None
-                    if field_name in self.properties:
-                        prop = self.properties[field_name]
-                        if prop.datatype == "string":
-                            # Skip string type validation errors
-                            continue
+            # Add constraints if they exist
+            if 'minimum' in prop:
+                field.constraints['minimum'] = prop['minimum']
+            if 'maximum' in prop:
+                field.constraints['maximum'] = prop['maximum']
+            if 'pattern' in prop:
+                field.constraints['pattern'] = prop['pattern']
+            if 'minLength' in prop:
+                field.constraints['minLength'] = prop['minLength']
+            if 'maxLength' in prop:
+                field.constraints['maxLength'] = prop['maxLength']
                 
-                # Include all other validation errors
-                errors.append({
-                    "message": err.message,
-                    "row": i,
-                    "field": list(err.path)[-1] if err.path else None,
-                    "type": "ValidationError",
-                    "failed_keyword": err.validator
-                })
-
-        return errors
+            frictionless_schema.add_field(field)
+        
+        # Create our schema instance
+        schema = cls(**data, properties=properties, required=required_fields)
+        schema._frictionless_schema = frictionless_schema
+        return schema
 
-class HDF5Schema(BaseSchema):
+class HDF5ValidationSchema(BaseModel):
+    guid: Optional[str] = Field(alias="@id", default=None)
+    context: Optional[Dict] = Field(default=DEFAULT_CONTEXT, alias="@context")
+    name: str
+    description: str
     properties: Dict[str, TabularValidationSchema] = Field(default={})
+    required: List[str] = Field(default=[])
 
+    def generate_guid(self) -> str:
+        """Generate a unique identifier for the schema"""
+        if self.guid is None:
+            prefix = f"schema-{self.name.lower().replace(' ', '-')}"
+            timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+            self.guid = f"ark:{NAAN}/{prefix}-{timestamp}"
+        return self.guid
+
+    @model_validator(mode='after')
+    def generate_all_guids(self) -> 'HDF5ValidationSchema':
+        """Generate GUIDs for this schema and any nested schemas"""
+        self.generate_guid()
+        return self
+    
     @staticmethod
     def dataset_to_dataframe(dataset: h5py.Dataset) -> pd.DataFrame:
-        """Convert any HDF5 dataset to a pandas DataFrame"""
+        """Convert an HDF5 dataset to a pandas DataFrame"""
         data = dataset[()]
         
-        # structured array convert directly
-        if dataset.dtype.fields:
+        if dataset.dtype.fields:  # Structured array
             return pd.DataFrame(data)
-            
-        # For multi-dimensional arrays make up column name
-        elif len(dataset.shape) > 1:
-            n_cols = dataset.shape[1] if len(dataset.shape) > 1 else 1
+        elif len(dataset.shape) > 1:  # Multi-dimensional array
+            n_cols = dataset.shape[1]
             columns = [f"column_{i}" for i in range(n_cols)]
             return pd.DataFrame(data, columns=columns)
-            
-        # For 1D arrays convert to single column DataFrame
-        else:
+        else:  # 1D array
             return pd.DataFrame(data, columns=['value'])
 
-    @classmethod 
-    def infer_from_file(cls, filepath: str, name: str, description: str, include_min_max: bool = False) -> 'HDF5Schema':
-        """Infer schema from HDF5 file"""
-        schema = cls(name=name, description=description)
+    @classmethod
+    def infer_from_file(cls, filepath: str, name: str, description: str) -> 'HDF5ValidationSchema':
+        """Infer schema from an HDF5 file"""
+        schema = cls(
+            name=name,
+            description=description
+        )
         properties = {}
-        
+
         with h5py.File(filepath, 'r') as f:
             def process_group(group, parent_path=""):
                 for key, item in group.items():
@@ -394,98 +360,139 @@ def process_group(group, parent_path=""):
                     if isinstance(item, h5py.Dataset):
                         try:
                             df = cls.dataset_to_dataframe(item)
-                            properties[path] = TabularValidationSchema.infer_from_dataframe(
-                                df,
+                            resource = describe(df)
+                            
+                            tabular_schema = TabularValidationSchema(
                                 name=f"{name}_{path.replace('/', '_')}",
                                 description=f"Dataset at {path}",
-                                include_min_max=include_min_max
+                                separator=",",
+                                header=True,
+                                properties={},
+                                required=[],
+                                context=None
                             )
+                            
+                            tabular_schema._frictionless_schema = resource.schema
+                            
+                            for i, field in enumerate(resource.schema.fields):
+                                property_def = {
+                                    "type": field.type,
+                                    "description": field.description or f"Column {field.name}",
+                                    "index": i
+                                }
+                                
+                                tabular_schema.properties[field.name] = property_def
+                                tabular_schema.required.append(field.name)
+                            
+                            properties[path] = tabular_schema
+                            
                         except Exception as e:
-                            print(f"Warning: Could not convert dataset {path} to DataFrame: {str(e)}")
+                            print(f"Warning: Could not process dataset {path}: {str(e)}")
                     
                     elif isinstance(item, h5py.Group):
-                        # Recursively process group contents
                         process_group(item, path)
-                
+                        
             process_group(f)
             schema.properties = properties
             schema.required = list(properties.keys())
         
         return schema
 
-    def validate_file(self, filepath: str) -> List[Dict]:
+    def validate_file(self, filepath: str) -> List[ValidationError]:
         """Validate an HDF5 file against the schema"""
         errors = []
         
         with h5py.File(filepath, 'r') as f:
             for path, schema in self.properties.items():
                 try:
-                    # Try to get the dataset using the path
                     dataset = f[path]
                     if isinstance(dataset, h5py.Dataset):
-                        # Convert dataset to DataFrame
                         df = self.dataset_to_dataframe(dataset)
-                        # Validate using the TabularValidationSchema's validate_dataframe method
-                        dataset_errors = schema.validate_dataframe(df)
-                        # Add path information to errors
-                        for error in dataset_errors:
-                            error['path'] = path
-                        errors.extend(dataset_errors)
+                        resource = Resource(data=df, schema=schema._frictionless_schema)
+                        report = resource.validate()
+                        
+                        for task in report.tasks:
+                            for error in task.errors:
+                                # Skip string type errors
+                                if (hasattr(error, 'type') and error.type == 'type-error' and
+                                    hasattr(error, 'note') and 'type is "string' in error.note):
+                                    continue
+                                    
+                                validation_error = ValidationError(
+                                    message=error.message,
+                                    row=error.rowNumber if hasattr(error, 'rowNumber') else None,
+                                    field=error.fieldName if hasattr(error, 'fieldName') else None,
+                                    type="ValidationError",
+                                    failed_keyword=error.type if hasattr(error, 'type') else "error",
+                                    path=path
+                                )
+                                errors.append(validation_error)
+                                
                 except KeyError:
-                    errors.append({
-                        "message": f"Dataset {path} not found",
-                        "path": path,
-                        "type": "ValidationError",
-                        "failed_keyword": "required"
-                    })
+                    errors.append(ValidationError(
+                        message=f"Dataset {path} not found",
+                        type="ValidationError",
+                        failed_keyword="required",
+                        path=path
+                    ))
                 except Exception as e:
-                    errors.append({
-                        "message": f"Error validating dataset {path}: {str(e)}",
-                        "path": path,
-                        "type": "ValidationError",
-                        "failed_keyword": "format"
-                    })
+                    errors.append(ValidationError(
+                        message=f"Error validating dataset {path}: {str(e)}",
+                        type="ValidationError",
+                        failed_keyword="format",
+                        path=path
+                    ))
         
         return errors
+    
+    def to_dict(self) -> dict:
+        """Convert the schema to a dictionary format including all fields"""
+        return self.model_dump(by_alias=True)
 
+    @classmethod
+    def from_dict(cls, data: dict) -> 'HDF5ValidationSchema':
+        """Create a schema instance from a dictionary"""
+        properties = {
+            path: TabularValidationSchema.from_dict(schema_dict)
+            for path, schema_dict in data.get('properties', {}).items()
+        }
+        
+        return cls(
+            name=data['name'],
+            description=data['description'],
+            properties=properties,
+            required=data.get('required', [])
+        )
+    
+def write_schema(schema: TabularValidationSchema, output_file: str):
+    """Write a schema to a file"""
+    schema_dict = schema.to_dict()
     
-def AppendProperty(schemaFilepath: str, propertyInstance, propertyName: str) -> None: 
+    with open(output_file, 'w') as f:
+        json.dump(schema_dict, f, indent=2)
+    
+def AppendProperty(schemaFilepath: str, propertyInstance, propertyName: str) -> None:
     # check that schemaFile exists
     schemaPath = pathlib.Path(schemaFilepath)
-
     if not schemaPath.exists():
         raise Exception
 
     with schemaPath.open("r+") as schemaFile:
         schemaFileContents = schemaFile.read()
-        schemaJson =  json.loads(schemaFileContents) 
+        schemaJson = json.loads(schemaFileContents)
 
-        # load the model into a tabular validation schema
         schemaModel = TabularValidationSchema.model_validate(schemaJson)
 
-        # TODO check for inconsitencies
-
-        # does there exist a property with same name
         if propertyName in [key for key in schemaModel.properties.keys()]:
             raise PropertyNameException(propertyName)
 
-        # does there exist a property with same column number
-        schema_indicies = [ val.index for val in schemaModel.properties.values()]
-
-        # check overlap of indicies
-        # CheckOverlap
-
-
-        # add new property to schema
+        schema_indicies = [val['index'] for val in schemaModel.properties.values()]
+        
         schemaModel.properties[propertyName] = propertyInstance
-
-        # add new property as required
         schemaModel.required.append(propertyName)
+        schemaJson = json.dumps(schemaModel.model_dump(by_alias=True, exclude_none=True), indent=2)
 
-        # serialize model to json
-        schemaJson = json.dumps(schemaModel.model_dump(by_alias=True) , indent=2)
-
-        # overwrite file contents
+        # overwrite file contents  
         schemaFile.seek(0)
         schemaFile.write(schemaJson)
 
@@ -525,64 +532,3 @@ def ReadSchemaLocal(schemaFile: str) -> TabularValidationSchema:
     tabularSchema = TabularValidationSchema.model_validate(schemaJson)
     return tabularSchema
 
-def ReadSchema(schemaFile:str) -> TabularValidationSchema:
-    ''' Read a schema specified by the argument schemaFile
-
-    The schemaFile parameter can be a url to a rawgithub link, or an ark identifier.
-    If the ark identifier is in the supplied, default schemas provided in the fairscape cli pacakges will be searched.
-    If there is no match then 
-    '''
-
-    if 'raw.githubusercontent' in schemaFile:
-        schemaInstance = ReadSchemaGithub(schemaFile)
-        return schemaInstance
-
-
-    elif 'ark' in schemaFile:
-        defaultSchemas = ImportDefaultSchemas()
-        matchingSchemas = list(filter(lambda schema: schema.guid == str(schemaFile), defaultSchemas))
-
-        if len(matchingSchemas) == 0:
-            # request against fairscape
-            schemaInstance = ReadSchemaFairscape(schemaFile)
-            return schemaInstance
-        else:
-            defaultSchema = matchingSchemas[0]
-            return defaultSchema
-
-    else: 
-        # schema must be a path that exists
-        schemaInstance = ReadSchemaLocal(schemaFile)
-        return schemaInstance
-
-def WriteSchema(tabular_schema: TabularValidationSchema, schema_file):
-    """ Helper Function for writing files
-    """
-
-    schema_dictionary = tabular_schema.model_dump(by_alias=True) 
-    schema_json = json.dumps(schema_dictionary, indent=2)
-
-    # dump json to a file
-    with open(schema_file, "w") as output_file:
-        output_file.write(schema_json)
-
-@lru_cache
-def ImportDefaultSchemas()-> List[TabularValidationSchema]:
-	defaultSchemaLocation = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) / 'default_schemas'
-	schemaPaths = list(defaultSchemaLocation.rglob("*/*.json"))
-
-	defaultSchemaList = []
-	for schemaPathElem in schemaPaths:
-
-		with schemaPathElem.open("r") as inputSchema:
-			inputSchemaData = inputSchema.read()
-			schemaJson =  json.loads(inputSchemaData) 
-
-		try:		
-			schemaElem = TabularValidationSchema.model_validate(schemaJson)
-			defaultSchemaList.append(schemaElem)
-		except:
-			# TODO handle validation failures from default schemas
-			pass
-	
-	return defaultSchemaList
diff --git a/src/fairscape_cli/models/software.py b/src/fairscape_cli/models/software.py
index fb60242..83ebda0 100644
--- a/src/fairscape_cli/models/software.py
+++ b/src/fairscape_cli/models/software.py
@@ -1,21 +1,12 @@
-from fairscape_cli.models.base import FairscapeBaseModel
-from fairscape_cli.models.utils import GenerateDatetimeSquid, FileNotInCrateException
-from fairscape_cli.config import NAAN
 import pathlib
-
-from pydantic import (
-    Field,
-    AnyUrl,
-    ConfigDict
-)
 from datetime import datetime
-from typing import (
-    Optional,
-    Union,
-    Dict,
-    List 
-)
+from typing import Optional, Union, Dict, List
 
+from pydantic import Field, AnyUrl, ConfigDict
+
+from fairscape_cli.config import NAAN
+from fairscape_cli.models.base import FairscapeBaseModel
+from fairscape_cli.models.guid_utils import GenerateDatetimeSquid
 
 
 class Software(FairscapeBaseModel): 
diff --git a/src/fairscape_cli/models/utils.py b/src/fairscape_cli/models/utils.py
index b51c6e9..dfe270a 100644
--- a/src/fairscape_cli/models/utils.py
+++ b/src/fairscape_cli/models/utils.py
@@ -1,70 +1,45 @@
-#  Python Interface for Registering Unique GUIDS
-from sqids import Sqids
-from pydantic import (
-    ValidationError
-)
-from typing import (
-    List
-    )
-import datetime
-from fairscape_cli.config import (
-    NAAN
-    )
-import random
+from pathlib import Path
+from typing import Set, Dict, List, Optional, Tuple
+import subprocess
 
-squids = Sqids(min_length=6)
-
-def GenerateDatetimeSquid():
-    try:
-        timestamp_int = int(datetime.datetime.now(datetime.UTC).timestamp())
-        sq = squids.encode([timestamp_int, random.randint(0, 10000)])
-    except: 
-        timestamp_int = int(datetime.datetime.utcnow().timestamp())
-        sq = squids.encode([timestamp_int])
-
-    return sq
-
-
-def GenerateDatetimeGUID(prefix: str)->str:
-    try:
-        timestamp_int = int(datetime.datetime.now(datetime.UTC).timestamp())
-        sq = squids.encode([timestamp_int])
-    except: 
-        timestamp_int = int(datetime.datetime.utcnow().timestamp())
-        sq = squids.encode([timestamp_int])
-
-    return f"ark:{NAAN}/{prefix}-{sq}"
-
-def GenerateGUID(data: List[int], prefix: str)-> str:
-    squid_encoded = squids.encode(data)
-    return f"ark:{NAAN}/{prefix}-{squid_encoded}"
+from pydantic import ValidationError
 
+from fairscape_cli.models.base import FairscapeBaseModel
 
 def InstantiateModel(ctx, metadata: dict, modelInstance):
     try:
         modelInstance.model_validate(metadata)
         return modelInstance
-    
     except ValidationError as metadataError:
         print('ERROR: MetadataValidationError', end='')
         for validationFailure in metadataError.errors():
             print(f'loc: {validationFailure.loc}\tinput: {validationFailure.input}\tmsg: {validationFailure.msg}', end='')
         ctx.exit(code=1)
 
-
-
-def ValidateGUID(ctx, param, value):
-    """ Make sure a GUID reference is reachable return JSON Metadata
-    """
-    # validate fairscape ARK
-
-    # validate DOI
-
-    # validate url
-    pass
-
-
 class FileNotInCrateException(Exception):
     def __init__(self, cratePath, filePath):
         self.message = f"Error: FileNotFound inside ro crate\ncratePath: {str(cratePath)}\tfilePath{str(filePath)}"
         super().__init__(self.message)
+
+def getDirectoryContents(directory: Path) -> Set[Path]:
+    """Get set of all files in directory recursively"""
+    return set(p for p in directory.rglob('*') if p.is_file())
+
+def run_command(command: str) -> Tuple[bool, str, str]:
+    """Execute command and return success status with output"""
+    try:
+        result = subprocess.run(
+            command.split(),
+            capture_output=True,
+            text=True
+        )
+        return result.returncode == 0, result.stdout, result.stderr
+    except Exception as e:
+        return False, "", str(e)
+    
+def getEntityFromCrate(crate_instance, entity_id: str) -> Optional[FairscapeBaseModel]:
+    """Get entity from crate by ID"""
+    for entity in crate_instance.metadataGraph:
+        if entity.guid == entity_id:
+            return entity.dict()
+    return None
\ No newline at end of file
diff --git a/src/fairscape_cli/rocrate/rocrate.py b/src/fairscape_cli/rocrate/rocrate.py
index 2b0438b..7fb2c22 100644
--- a/src/fairscape_cli/rocrate/rocrate.py
+++ b/src/fairscape_cli/rocrate/rocrate.py
@@ -2,32 +2,42 @@
 import pathlib
 import shutil
 import json
-from pydantic import ValidationError
 from datetime import datetime
+from typing import List, Optional, Union
 
+from pydantic import ValidationError
 
+from fairscape_cli.config import NAAN
+from fairscape_cli.models.guid_utils import GenerateDatetimeSquid
 from fairscape_cli.models.utils import (
-    FileNotInCrateException
+    FileNotInCrateException,
+    getDirectoryContents,
+    getEntityFromCrate,
+    run_command
 )
 from fairscape_cli.models import (
+    # Core models
     Dataset,
-    GenerateDataset,
     Software,
-    GenerateSoftware,
     Computation,
+    ROCrate,
+    BagIt,
+    
+    # Generator functions
+    GenerateDataset,
+    GenerateSoftware,
     GenerateComputation,
     GenerateROCrate,
-    ROCrate,
+    
+    # RO Crate operations
     ReadROCrateMetadata,
     AppendCrate,
     CopyToROCrate,
-    BagIt
-)
-
-from typing import (
-    List,
-    Optional,
-    Union
+    UpdateCrate,
+    
+    # Additional utilities
+    generateSummaryStatsElements,
+    registerOutputs
 )
 
 
@@ -204,6 +214,7 @@ def registerSoftware(
 @click.option('--keywords', required=True, multiple=True)
 @click.option('--data-format', required=True) 
 @click.option('--filepath', required=True)
+@click.option('--summary-statistics-filepath', required=False, type=click.Path(exists=True))
 @click.option('--used-by', required=False, multiple=True)
 @click.option('--derived-from', required=False, multiple=True)
 @click.option('--generated-by', required=False, multiple=True)
@@ -224,6 +235,7 @@ def registerDataset(
     keywords: List[str],
     data_format: str,
     filepath: str,
+    summary_statistics_filepath: Optional[str],
     used_by: Optional[List[str]],
     derived_from: Optional[List[str]],
     generated_by: Optional[List[str]],
@@ -231,8 +243,7 @@ def registerDataset(
     associated_publication: Optional[str],
     additional_documentation: Optional[List[str]],
 ):
-    """Register Dataset object metadata with the specified RO-Crate 
-    """    
+    """Register Dataset object metadata with the specified RO-Crate"""    
     try:
         crate_instance = ReadROCrateMetadata(rocrate_path)
     except Exception as exc:
@@ -240,8 +251,33 @@ def registerDataset(
         ctx.exit(code=1)
     
     try:
+        # Generate main dataset GUID
+        sq_dataset = GenerateDatetimeSquid()
+        dataset_guid = guid if guid else f"ark:{NAAN}/dataset-{name.lower().replace(' ', '-')}-{sq_dataset}"
+
+        summary_stats_guid = None
+        elements = []
+        
+        # Handle summary statistics if provided
+        if summary_statistics_filepath:
+            summary_stats_guid, summary_stats_instance, computation_instance = generateSummaryStatsElements(
+                name=name,
+                author=author,
+                keywords=keywords,
+                date_published=date_published,
+                version=version,
+                associated_publication=associated_publication,
+                additional_documentation=additional_documentation,
+                schema=schema,
+                dataset_guid=dataset_guid,
+                summary_statistics_filepath=summary_statistics_filepath,
+                crate_path=rocrate_path
+            )
+            elements.extend([computation_instance, summary_stats_instance])
+
+        # Generate main dataset
         dataset_instance = GenerateDataset(
-            guid=guid,
+            guid=dataset_guid,
             url=url,
             author=author,
             name=name,
@@ -257,9 +293,12 @@ def registerDataset(
             generatedBy=generated_by,
             usedBy=used_by,
             filepath=filepath,
-            cratePath=rocrate_path
+            cratePath=rocrate_path,
+            summary_stats_guid=summary_stats_guid
         )
-        AppendCrate(cratePath = rocrate_path, elements=[dataset_instance])
+        
+        elements.insert(0, dataset_instance)
+        AppendCrate(cratePath=rocrate_path, elements=elements)
         click.echo(dataset_instance.guid)
     
     except FileNotInCrateException as e:
@@ -275,8 +314,6 @@ def registerDataset(
         click.echo(f"ERROR: {str(exc)}")
         ctx.exit(code=1)
  
- 
-
 
 @register.command('computation')
 @click.argument('rocrate-path', type=click.Path(exists=True, path_type=pathlib.Path))
@@ -434,6 +471,8 @@ def software(
 @click.option('--data-format', required=True) 
 @click.option('--source-filepath', required=True)
 @click.option('--destination-filepath', required=True)
+@click.option('--summary-statistics-source', required=False, type=click.Path(exists=True))
+@click.option('--summary-statistics-destination', required=False, type=click.Path())
 @click.option('--used-by', required=False, multiple=True)
 @click.option('--derived-from', required=False, multiple=True)
 @click.option('--generated-by', required=False, multiple=True)
@@ -455,6 +494,8 @@ def dataset(
     data_format,
     source_filepath,
     destination_filepath,
+    summary_statistics_source,
+    summary_statistics_destination,
     used_by,
     derived_from,
     generated_by,
@@ -462,9 +503,7 @@ def dataset(
     associated_publication,
     additional_documentation,
 ):
-    """Add a Dataset file and its metadata to the RO-Crate.
-    """
-
+    """Add a Dataset file and its metadata to the RO-Crate."""
     try:
         crateInstance = ReadROCrateMetadata(rocrate_path)
     except Exception as exc:
@@ -472,9 +511,40 @@ def dataset(
         ctx.exit(code=1)
 
     try:
+        # Copy main dataset file
         CopyToROCrate(source_filepath, destination_filepath)
+        
+        # Generate main dataset GUID
+        sq_dataset = GenerateDatetimeSquid()
+        dataset_guid = guid if guid else f"ark:{NAAN}/dataset-{name.lower().replace(' ', '-')}-{sq_dataset}"
+
+        summary_stats_guid = None
+        elements = []
+        
+        # Handle summary statistics if provided
+        if summary_statistics_source and summary_statistics_destination:
+            # Copy summary statistics file
+            CopyToROCrate(summary_statistics_source, summary_statistics_destination)
+            
+            # Generate summary statistics elements
+            summary_stats_guid, summary_stats_instance, computation_instance = generateSummaryStatsElements(
+                name=name,
+                author=author,
+                keywords=keywords,
+                date_published=date_published,
+                version=version,
+                associated_publication=associated_publication,
+                additional_documentation=additional_documentation,
+                schema=schema,
+                dataset_guid=dataset_guid,
+                summary_statistics_filepath=summary_statistics_destination,
+                crate_path=rocrate_path
+            )
+            elements.extend([computation_instance, summary_stats_instance])
+
+        # Generate main dataset
         dataset_instance = GenerateDataset(
-            guid=guid,
+            guid=dataset_guid,
             url=url,
             author=author,
             name=name,
@@ -490,9 +560,12 @@ def dataset(
             generatedBy=generated_by,
             usedBy=used_by,
             filepath=destination_filepath,
-            cratePath=rocrate_path
+            cratePath=rocrate_path,
+            summary_stats_guid=summary_stats_guid
         )
-        AppendCrate(cratePath = rocrate_path, elements=[dataset_instance])
+        
+        elements.insert(0, dataset_instance)
+        AppendCrate(cratePath=rocrate_path, elements=elements)
         click.echo(dataset_instance.guid)
 
     except ValidationError as e:
@@ -503,5 +576,85 @@ def dataset(
     except Exception as exc:
         click.echo(f"ERROR: {str(exc)}")
         ctx.exit(code=1)
+
+#################
+# Summary Statistics
+#################
+@rocrate.command('compute-statistics')
+@click.argument('rocrate-path', type=click.Path(exists=True, path_type=pathlib.Path))
+@click.option('--dataset-id', required=True, help='ID of dataset to compute statistics for')
+@click.option('--software-id', required=True, help='ID of software to run')
+@click.option('--command', required=True, help='Python command to execute (e.g. python)')
+@click.pass_context
+def compute_statistics(
+    ctx,
+    rocrate_path: pathlib.Path,
+    dataset_id: str,
+    software_id: str,
+    command: str
+):
+    """Compute statistics for a dataset using specified software"""
+    crate_instance = ReadROCrateMetadata(rocrate_path)
+    initial_files = getDirectoryContents(rocrate_path)
     
-    # TODO add to cache 
+    # Get original dataset info
+    dataset_info = getEntityFromCrate(crate_instance, dataset_id)
+    software_info = getEntityFromCrate(crate_instance, software_id)
+    if not dataset_info or not software_info:
+        raise ValueError(f"Dataset or software not found in crate")
+
+    # Get original dataset author
+    original_author = dataset_info.get("author", "Unknown")
+    dataset_path = dataset_info.get("contentUrl", "").replace("file:///", "")
+    software_path = software_info.get("contentUrl", "").replace("file:///", "")
+    
+    if not dataset_path or not software_path:
+        raise ValueError("Dataset or software path not found")
+
+    full_command = f"{command} {software_path} {dataset_path} {rocrate_path}"
+    success, stdout, stderr = run_command(full_command)
+    if not success:
+        raise RuntimeError(f"Command failed: {stderr}")
+
+    final_files = getDirectoryContents(rocrate_path)
+    new_files = final_files - initial_files
+    if not new_files:
+        raise RuntimeError("No output files generated")
+
+    computation_instance = GenerateComputation(
+        guid=None,
+        name=f"Statistics Computation for {dataset_id}",
+        runBy="Fairscape-CLI",
+        command=full_command,
+        dateCreated=datetime.now().isoformat(),
+        description=f"Generated statistics\nstdout:\n{stdout}\nstderr:\n{stderr}",
+        keywords=["statistics"],
+        usedSoftware=[software_id],
+        usedDataset=[dataset_id],
+        generated=[]
+    )
+
+    output_instances = registerOutputs(
+        new_files=new_files,
+        computation_id=computation_instance.guid,
+        dataset_id=dataset_id,
+        author=original_author
+    )
+    
+    stats_output = [out.guid for out in output_instances]
+    computation_instance.generated = stats_output
+
+    if stats_output:
+        # Update the original dataset metadata
+        dataset_info["hasSummaryStatistics"] = stats_output
+        # Generate a new Dataset instance with updated metadata
+        updated_dataset = Dataset.model_validate(dataset_info)
+        
+        # Update the dataset in the crate and append new elements
+        UpdateCrate(cratePath=rocrate_path, element=updated_dataset)
+        AppendCrate(
+            cratePath=rocrate_path,
+            elements=[computation_instance] + output_instances
+        )
+
+    click.echo(computation_instance.guid)
\ No newline at end of file
diff --git a/src/fairscape_cli/schema/schema.py b/src/fairscape_cli/schema/schema.py
index e17b9d3..a09cf2b 100644
--- a/src/fairscape_cli/schema/schema.py
+++ b/src/fairscape_cli/schema/schema.py
@@ -3,49 +3,37 @@
 from prettytable import PrettyTable
 import pathlib
 from pydantic import (
-        ValidationError
+    ValidationError
 )
 from typing import (
     Union,
     Type
 )
 
-
 from fairscape_cli.models.schema.tabular import (
     TabularValidationSchema,
-    ReadSchema,
-    ImportDefaultSchemas,
-    WriteSchema,
+    HDF5ValidationSchema,
+    write_schema as WriteSchema,
     StringProperty,
     NumberProperty,
     IntegerProperty,
     BooleanProperty,
     ArrayProperty,
     ClickAppendProperty,
-    PropertyNameException,
-    ColumnIndexException,
     DatatypeEnum,
     Items,
-    FileType,
-    HDF5Schema
-)
-
-from fairscape_cli.config import (
-    FAIRSCAPE_URI
 )
 
-
 @click.group('schema')
 def schema():
     """Invoke operations on dataset schema.
     """
     pass
 
-
 @schema.command('create-tabular')
 @click.option('--name', required=True, type=str)
 @click.option('--description', required=True, type=str)
-@click.option('--guid', required=False, type=str, default="", show_default=False)
+@click.option('--guid', required=False, type=str, default=None, show_default=False)
 @click.option('--separator', type=str, required=True)
 @click.option('--header', required=False, type=bool, default=False)
 @click.argument('schema_file', type=str)
@@ -61,7 +49,6 @@ def create_tabular_schema(
 ):
     """Initialize a Tabular Schema.
     """
-    # create the model
     try:
         schema_model = TabularValidationSchema.model_validate({
             "name": name,
@@ -80,8 +67,7 @@ def create_tabular_schema(
         ctx.exit(code=1)
 
     WriteSchema(schema_model, schema_file)
-    click.echo(f"Wrote Schema: {str(schema_file)}") 
-
+    click.echo(f"Wrote Schema: {str(schema_file)}")
 
 @schema.group('add-property')
 def add_property():
@@ -89,7 +75,6 @@ def add_property():
     """
     pass
 
-
 @add_property.command('string')
 @click.option('--name', type=str, required=True)
 @click.option('--index', type=int, required=True)
@@ -118,7 +103,6 @@ def add_property_string(ctx, name, index, description, value_url, pattern, schem
 
     ClickAppendProperty(ctx, schema_file, stringPropertyModel, name)
 
-
 @add_property.command('number')
 @click.option('--name', type=str, required=True)
 @click.option('--index', type=int, required=True)
@@ -141,7 +125,6 @@ def add_property_number(ctx, name, index, description, maximum, minimum, value_u
             "description": description,
             "valueURL": value_url
             })
-
     except ValidationError as metadataError:
         click.echo("ERROR Validating NumberProperty")
         for validationFailure in metadataError.errors():
@@ -150,7 +133,6 @@ def add_property_number(ctx, name, index, description, maximum, minimum, value_u
 
     ClickAppendProperty(ctx, schema_file, numberPropertyModel, name)
 
-
 @add_property.command('boolean')
 @click.option('--name', type=str, required=True)
 @click.option('--index', type=int, required=True)
@@ -169,7 +151,6 @@ def add_property_boolean(ctx, name, index, description, value_url, schema_file):
             "description": description,
             "valueURL": value_url
             })
-
     except ValidationError as metadataError:
         click.echo("ERROR Validating BooleanProperty")
         for validationFailure in metadataError.errors():
@@ -178,7 +159,6 @@ def add_property_boolean(ctx, name, index, description, value_url, schema_file):
 
     ClickAppendProperty(ctx, schema_file, booleanPropertyModel, name)
 
-
 @add_property.command('integer')
 @click.option('--name', type=str, required=True)
 @click.option('--index', type=int, required=True)
@@ -201,7 +181,6 @@ def add_property_integer(ctx, name, index, description, maximum, minimum, value_
             "minimum": minimum,
             "valueURL": value_url
             })
-
     except ValidationError as metadataError:
         click.echo("ERROR Validating IntegerProperty")
         for validationFailure in metadataError.errors():
@@ -210,7 +189,6 @@ def add_property_integer(ctx, name, index, description, maximum, minimum, value_
 
     ClickAppendProperty(ctx, schema_file, integerPropertyModel, name)
 
-
 @add_property.command('array')
 @click.option('--name', type=str, required=True)
 @click.option('--index', type=str, required=True)
@@ -244,7 +222,6 @@ def add_property_array(ctx, name, index, description, value_url, items_datatype,
             uniqueItems=unique_items,
             items=Items(datatype=datatype_enum)
             )
-
     except ValidationError as metadataError:
         print("ERROR: MetadataValidationError")
         for validationFailure in metadataError.errors(): 
@@ -253,12 +230,11 @@ def add_property_array(ctx, name, index, description, value_url, items_datatype,
 
     ClickAppendProperty(ctx, schema_file, arrayPropertyModel, name)
 
-
-def determine_schema_type(filepath: str) -> Type[Union[TabularValidationSchema, HDF5Schema]]:
+def determine_schema_type(filepath: str) -> Type[Union[TabularValidationSchema, HDF5ValidationSchema]]:
     """Determine which schema type to use based on file extension"""
     ext = pathlib.Path(filepath).suffix.lower()[1:]
     if ext in ('h5', 'hdf5'):
-        return HDF5Schema
+        return HDF5ValidationSchema
     elif ext in ('csv', 'tsv', 'parquet'):
         return TabularValidationSchema
     else:
@@ -270,7 +246,6 @@ def determine_schema_type(filepath: str) -> Type[Union[TabularValidationSchema,
 @click.pass_context
 def validate(ctx, schema, data): 
     """Execute validation of a Schema against the provided data."""
-    # Check if schema file exists (if not a default schema)
     if 'ark' not in schema:
         schema_path = pathlib.Path(schema)
         if not schema_path.exists():
@@ -283,39 +258,35 @@ def validate(ctx, schema, data):
         ctx.exit(1)
 
     try:
-        # Load the schema file
         with open(schema) as f:
             schema_json = json.load(f)
         
-        # Determine schema type based on the data file
         schema_class = determine_schema_type(data)
-        validation_schema = schema_class.model_validate(schema_json)
+        validation_schema = schema_class.from_dict(schema_json)
         
-        # Validate the file
         validation_errors = validation_schema.validate_file(data)
 
         if len(validation_errors) != 0:
-            # Create a pretty table of validation errors
             error_table = PrettyTable()
-            if isinstance(validation_schema, HDF5Schema):
+            if isinstance(validation_schema, HDF5ValidationSchema):
                 error_table.field_names = ['path', 'error_type', 'failed_keyword', 'message']
             else:
                 error_table.field_names = ['row', 'error_type', 'failed_keyword', 'message']
 
             for err in validation_errors:
-                if isinstance(validation_schema, HDF5Schema):
+                if isinstance(validation_schema, HDF5ValidationSchema):
                     error_table.add_row([
-                        err.get("path"), 
-                        err.get("type"), 
-                        err.get("failed_keyword"), 
-                        str(err.get('message'))
+                        err.path,
+                        err.type,
+                        err.failed_keyword,
+                        str(err.message)
                     ])
                 else:
                     error_table.add_row([
-                        err.get("row"), 
-                        err.get("type"), 
-                        err.get("failed_keyword"), 
-                        str(err.get('message'))
+                        err.row,
+                        err.type, 
+                        err.failed_keyword,
+                        str(err.message)
                     ])
 
             print(error_table)
@@ -337,29 +308,24 @@ def validate(ctx, schema, data):
 @click.option('--name', required=True, type=str)
 @click.option('--description', required=True, type=str)
 @click.option('--guid', required=False, type=str, default="", show_default=False)
-@click.option('--include-min-max', is_flag=True, help="Include min and max values for numeric and integer fields")
 @click.argument('input_file', type=click.Path(exists=True))
 @click.argument('schema_file', type=str)
 @click.pass_context
-def infer_schema(ctx, name, description, guid, include_min_max, input_file, schema_file):
+def infer_schema(ctx, name, description, guid, input_file, schema_file):
     """Infer a schema from a file (CSV, TSV, Parquet, or HDF5)."""
     try:
-        # Determine which schema type to use based on input file
         schema_class = determine_schema_type(input_file)
         
-        # Infer the schema
         schema_model = schema_class.infer_from_file(
             input_file, 
             name, 
-            description,
-            include_min_max
+            description
         )
         if guid:
             schema_model.guid = guid
             
         WriteSchema(schema_model, schema_file)
         
-        # Get file type for display
         ext = pathlib.Path(input_file).suffix.lower()[1:]
         click.echo(f"Inferred Schema from {ext} file: {str(schema_file)}")
     
diff --git a/tests/stats-compute-tests/numbers.csv b/tests/stats-compute-tests/numbers.csv
new file mode 100644
index 0000000..aa9321c
--- /dev/null
+++ b/tests/stats-compute-tests/numbers.csv
@@ -0,0 +1,11 @@
+﻿column1,column2,column3
+1,0.557412965,0.015765057
+2,0.595715476,4.632460772
+3,1.000511292,0.516892255
+4,3.634542545,16.3678812
+5,0.216278402,0.37567848
+6,3.346647036,3.666700797
+7,2.864322316,2.292766985
+8,0.508136324,0.434491093
+9,5.934758558,1.647603341
+10,1.092459463,1.04885126
\ No newline at end of file
diff --git a/tests/stats-compute-tests/summary.py b/tests/stats-compute-tests/summary.py
new file mode 100644
index 0000000..633bb35
--- /dev/null
+++ b/tests/stats-compute-tests/summary.py
@@ -0,0 +1,58 @@
+import pandas as pd
+import sys
+import os
+from pathlib import Path
+
+def generate_summary_stats(input_path, output_dir):
+    """
+    Generate summary statistics for a CSV file and save to output directory
+    
+    Parameters:
+    input_path (str): Path to input CSV file
+    output_dir (str): Directory to save output summary statistics
+    """
+    # Read the input file
+    df = pd.read_csv(input_path)
+    
+    # Create summary statistics
+    summary_stats = pd.DataFrame({
+        'column_name': df.columns,
+        'data_type': df.dtypes.astype(str),
+        'count': df.count(),
+        'null_count': df.isnull().sum(),
+        'null_percentage': (df.isnull().sum() / len(df) * 100).round(2),
+        'unique_values': df.nunique(),
+    })
+    
+    # Add numeric column statistics
+    numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
+    summary_stats.loc[summary_stats['column_name'].isin(numeric_cols), 'mean'] = df[numeric_cols].mean()
+    summary_stats.loc[summary_stats['column_name'].isin(numeric_cols), 'std'] = df[numeric_cols].std()
+    summary_stats.loc[summary_stats['column_name'].isin(numeric_cols), 'min'] = df[numeric_cols].min()
+    summary_stats.loc[summary_stats['column_name'].isin(numeric_cols), 'max'] = df[numeric_cols].max()
+    
+    # Create output directory if it doesn't exist
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
+    
+    # Generate output filename from input filename
+    input_filename = os.path.basename(input_path)
+    output_filename = f"summary_stats_{input_filename}"
+    output_path = os.path.join(output_dir, output_filename)
+    
+    # Save summary statistics
+    summary_stats.to_csv(output_path, index=False)
+    print(f"Summary statistics saved to: {output_path}")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: python summary.py <input_path> <output_directory>")
+        sys.exit(1)
+        
+    input_path = sys.argv[1]
+    output_dir = sys.argv[2]
+    
+    try:
+        generate_summary_stats(input_path, output_dir)
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        sys.exit(1)
\ No newline at end of file
diff --git a/tests/test_compute_stats.py b/tests/test_compute_stats.py
new file mode 100644
index 0000000..e5c5711
--- /dev/null
+++ b/tests/test_compute_stats.py
@@ -0,0 +1,204 @@
+import os
+import sys
+import pathlib
+import json
+import shutil
+import unittest
+import subprocess
+import datetime
+from typing import Tuple
+
+class TestStatisticsCliWorkflow(unittest.TestCase):
+    
+    def setUp(self):
+        # Create test directory
+        self.test_dir = pathlib.Path.cwd() / 'tests' / 'stats-compute-tests'
+        self.test_dir.mkdir(parents=True, exist_ok=True)
+            
+    def tearDown(self):
+        # Only remove the generated files, not the entire directory
+        metadata_file = self.test_dir / 'ro-crate-metadata.json'
+        stats_file = self.test_dir / 'summary_stats_numbers.csv'
+        summary_file = self.test_dir / 'fake_summary.csv'
+        
+        if metadata_file.exists():
+            metadata_file.unlink()
+        if stats_file.exists():
+            stats_file.unlink()
+        if summary_file.exists():
+            summary_file.unlink()
+
+    def run_cli_command(self, command: str) -> Tuple[int, str, str]:
+        """Run a CLI command and return returncode, stdout, stderr"""
+        process = subprocess.Popen(
+            command,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True
+        )
+        stdout, stderr = process.communicate()
+        return process.returncode, stdout.strip(), stderr.strip()
+
+    def test_cli_workflow(self):
+        # Change to test directory
+        os.chdir(self.test_dir)
+        
+        # Initialize ROCrate
+        init_cmd = '''python -m fairscape_cli rocrate init \
+            --name "Data Analysis Project" \
+            --organization-name "My Organization" \
+            --project-name "Data Analysis" \
+            --description "A project for analyzing data using summary statistics" \
+            --keywords "data-analysis" --keywords "statistics" --keywords "python"'''
+        
+        returncode, stdout, stderr = self.run_cli_command(init_cmd)
+        self.assertEqual(returncode, 0, f"ROCrate init failed: {stderr}")
+        rocrate_guid = stdout.strip()
+        
+        # Register software
+        software_cmd = f'''python -m fairscape_cli rocrate register software ./ \
+            --name "Summary Statistics Generator" \
+            --author "Your Name" \
+            --version "1.0.0" \
+            --description "Python script that generates summary statistics for CSV data" \
+            --keywords "data-analysis" --keywords "statistics" --keywords "python" \
+            --file-format "text/x-python" \
+            --date-modified "{datetime.date.today().isoformat()}" \
+            --filepath "summary.py"'''
+        
+        returncode, stdout, stderr = self.run_cli_command(software_cmd)
+        self.assertEqual(returncode, 0, f"Software registration failed: {stderr}")
+        software_guid = stdout.strip()
+        
+        # Register dataset
+        dataset_cmd = f'''python -m fairscape_cli rocrate register dataset ./ \
+            --name "Analysis Dataset" \
+            --author "Your Name" \
+            --version "1.0.0" \
+            --date-published "{datetime.date.today().isoformat()}" \
+            --description "Dataset for statistical analysis" \
+            --keywords "data-analysis" --keywords "statistics" --keywords "python" \
+            --data-format "text/csv" \
+            --filepath "numbers.csv"'''
+        
+        returncode, stdout, stderr = self.run_cli_command(dataset_cmd)
+        self.assertEqual(returncode, 0, f"Dataset registration failed: {stderr}")
+        dataset_guid = stdout.strip()
+        
+        # Compute statistics
+        compute_cmd = f'''python -m fairscape_cli rocrate compute-statistics ./ \
+            --dataset-id "{dataset_guid}" \
+            --software-id "{software_guid}" \
+            --command "python"'''
+        
+        returncode, stdout, stderr = self.run_cli_command(compute_cmd)
+        self.assertEqual(returncode, 0, f"Computation failed: {stderr}")
+        computation_guid = stdout.strip()
+        
+        # Verify the metadata file exists and has correct structure
+        metadata_file = self.test_dir / 'ro-crate-metadata.json'
+        self.assertTrue(metadata_file.exists())
+        
+        # Load and verify metadata
+        with open(metadata_file) as f:
+            metadata = json.load(f)
+            
+        # Basic structure tests
+        self.assertEqual(metadata['name'], "Data Analysis Project")
+        self.assertEqual(metadata['@id'], rocrate_guid)
+        
+        # Verify all components are present in @graph
+        guids = [item['@id'] for item in metadata['@graph']]
+        self.assertIn(software_guid, guids)
+        self.assertIn(dataset_guid, guids)
+        self.assertIn(computation_guid, guids)
+        
+        # Find computation record
+        computation = next(item for item in metadata['@graph'] if item['@id'] == computation_guid)
+        
+        # Verify computation relationships
+        self.assertEqual(computation['usedSoftware'], [software_guid])
+        self.assertEqual(computation['usedDataset'], [dataset_guid])
+        self.assertTrue(len(computation['generated']) > 0)
+        
+        # Verify output file exists
+        output_file = self.test_dir / 'summary_stats_numbers.csv'
+        self.assertTrue(output_file.exists())
+        
+        # Find dataset record and verify it has summary statistics
+        dataset = next(item for item in metadata['@graph'] if item['@id'] == dataset_guid)
+        self.assertTrue('hasSummaryStatistics' in dataset)
+        self.assertEqual(dataset['hasSummaryStatistics'], computation['generated'])
+
+    def test_dataset_with_summary_stats(self):
+        # Change to test directory
+        os.chdir(self.test_dir)
+        
+        # Initialize ROCrate
+        init_cmd = '''python -m fairscape_cli rocrate init \
+            --name "Dataset Summary Test" \
+            --organization-name "Test Organization" \
+            --project-name "Summary Stats Test" \
+            --description "Testing dataset registration with summary statistics" \
+            --keywords "data" --keywords "testing" --keywords "summary-stats"'''
+        
+        returncode, stdout, stderr = self.run_cli_command(init_cmd)
+        self.assertEqual(returncode, 0, f"ROCrate init failed: {stderr}")
+        rocrate_guid = stdout.strip()
+        
+        # Create fake summary file
+        summary_path = self.test_dir / 'fake_summary.csv'
+        with open(summary_path, 'w') as f:
+            f.write("statistic,value\nmean,42.0\nmedian,41.5\nstd,5.2")
+        
+        # Register dataset with summary statistics
+        dataset_cmd = f'''python -m fairscape_cli rocrate register dataset ./ \
+            --name "Test Dataset" \
+            --author "Test Author" \
+            --version "1.0.0" \
+            --date-published "{datetime.date.today().isoformat()}" \
+            --description "Dataset with pre-existing summary statistics" \
+            --keywords "data" --keywords "testing" \
+            --data-format "text/csv" \
+            --filepath "numbers.csv" \
+            --summary-statistics-filepath "fake_summary.csv"'''
+        
+        returncode, stdout, stderr = self.run_cli_command(dataset_cmd)
+        self.assertEqual(returncode, 0, f"Dataset registration failed: {stderr}")
+        dataset_guid = stdout.strip()
+        
+        # Verify the metadata file exists and has correct structure
+        metadata_file = self.test_dir / 'ro-crate-metadata.json'
+        self.assertTrue(metadata_file.exists())
+        
+        # Load and verify metadata
+        with open(metadata_file) as f:
+            metadata = json.load(f)
+        
+        # Find dataset record and verify it has summary statistics
+        dataset = next(item for item in metadata['@graph'] if item['@id'] == dataset_guid)
+
+        # Get summary stats ID
+        summary_stats_id = dataset['hasSummaryStatistics']
+
+        # Find the summary statistics dataset in the graph - with more flexible matching
+        summary_stats = next(
+            (item for item in metadata['@graph'] 
+            if 'stats' in item['@id'] and item['@type'] == 'https://w3id.org/EVI#Dataset'),
+            None
+        )
+        self.assertEqual(summary_stats['@type'], 'https://w3id.org/EVI#Dataset')
+        self.assertTrue('stats' in summary_stats['@id'])
+        self.assertEqual(summary_stats['author'], 'Test Author')
+    
+        computation = next(
+            (item for item in metadata['@graph'] 
+            if item['@type'] == 'https://w3id.org/EVI#Computation' and summary_stats_id in item.get('generated', [])),
+            None
+        )
+        self.assertIsNotNone(computation)
+        self.assertEqual(computation['usedDataset'], [dataset_guid])
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/tests/test_rocrate_api.py b/tests/test_rocrate_api.py
index d19fbae..2aec4d5 100644
--- a/tests/test_rocrate_api.py
+++ b/tests/test_rocrate_api.py
@@ -2,6 +2,7 @@
 import sys
 import pathlib
 import json
+import shutil
 
 sys.path.insert(
     0, 
@@ -19,145 +20,145 @@
 from fairscape_cli.models.dataset import GenerateDataset
 from fairscape_cli.models.software import GenerateSoftware
 from fairscape_cli.models.rocrate import (
-	GenerateROCrate,
-	ReadROCrateMetadata,
-	AppendCrate
+    GenerateROCrate,
+    ReadROCrateMetadata,
+    AppendCrate
 )
 from sqids import Sqids
 
 class TestAPI(unittest.TestCase):
-		
-	def test_api(self):
-		rocratePath = pathlib.Path.cwd() / 'tests'/ 'data' / 'test_api'
-
-		# delete the test_api folder
-		metadataFile = rocratePath / 'ro-crate-metadata.json'
-		metadataFile.unlink()
-
-		rocrate_metadata = {
-			"guid": "ark:59853/UVA/B2AI/rocrate_test",
-			"name": 'test rocrate',
-			"organizationName": "UVA",
-			"projectName":  "B2AI",
-			"description":  "Testing ROCrate Model",
-			"keywords": ["test", "fair"],
-			"path": rocratePath
-			}
-
-		# touch a file for the dataset to say exists
-
-		rocrate = GenerateROCrate(**rocrate_metadata)
-		
-		software_metadata={
-			"guid" : "955cf26c-e3a3-4f0f-b2df-fca4c693cac4:cm4ai_chromatin_mda-mb-468_untreated_ifimage_0.7alpha",
-			"author": "Cell Maps team",
-			"url": "https://github.com/idekerlab/cellmaps_utils",
-			"name": "cellmaps_utils",
-			"keywords": [
-			"CM4AI",
-			"0.7alpha",
-			"MDA-MB-468",
-			"untreated",
-			"IF microscopy",
-			"images",
-			"breast; mammary gland",
-			"chromatin",
-			"tools",
-			"cellmaps_utils"
-			],
-			"description": "CM4AI 0.7alpha MDA-MB-468 untreated IF microscopy images breast; mammary gland chromatin Contains utilities needed by Cell Maps tools",
-			"dateModified": "2024-10-22",
-			"version": "0.5.0",
-			"fileFormat": "py",
-			"usedByComputation": [],
-			"associatedPublication": None,
-			"additionalDocumentation": None,
-			"filepath": "https://github.com/idekerlab/cellmaps_utils",
-			"cratePath": rocratePath
-		}
-		software = GenerateSoftware(**software_metadata)
-
-		yellowFolder = rocratePath / 'yellow'
-		yellowFolder.mkdir(exist_ok=True)
-
-		# create 10k identifiers
-		datasetList = []
-		#for i in range(100000):
-		#	fileName = f'B2AI_5_untreated_B5_R5_z01_yellow_{i}.jpg'
-		#	datasetFilePath = yellowFolder / fileName
-		#	datasetFilePath.touch(exist_ok=True)
-
-		for i in range(10000):
-			fileName = f'B2AI_5_untreated_B5_R5_z01_yellow_{i}.jpg'
-			datasetMetadata = {
-				"guid": "322ab5a2-e6a7-4c46-be79-cbf3e9453cde:cm4ai_chromatin_mda-mb-468_untreated_ifimage_0.7alpha",
-				"name": "B2AI_5_untreated_B5_R5_z01_yellow.jpg yellow channel image",
-				"keywords": [
-					"CM4AI",
-					"0.7alpha",
-					"MDA-MB-468",
-					"untreated",
-					"IF microscopy",
-					"images",
-					"breast; mammary gland",
-					"chromatin",
-					"yellow",
-					"IF",
-					"image",
-					"ER (Calreticulin antibody)"
-				],
-			"description": "CM4AI 0.7alpha MDA-MB-468 untreated IF microscopy images breast; mammary gland chromatin IF image file",
-			"author": "Lundberg Lab",
-			"datePublished": "2024-10-22",
-			"version": "0.7alpha",
-			"dataFormat": "jpg",
-			"generatedBy": [],
-			"derivedFrom": [],
-			"usedBy": [],
-			"url": None,
-			"associatedPublication": None,
-			"additionalDocumentation": None,
-			"schema": None,
-			"filepath": f"file:///yellow/{fileName}",
-			"cratePath": rocratePath
-			}
-			dataset = GenerateDataset(**datasetMetadata)
-			datasetList.append(dataset)
-
-		AppendCrate(rocratePath, datasetList)
-
-		# read in the crate metadata
-		rocrateMetadataRecord = ReadROCrateMetadata(rocratePath)
-		rocrateGUIDs = [ elem.guid for elem in rocrateMetadataRecord.metadataGraph]
-
-		# assert that all dataset guids are present
-		for ds in datasetList:
-			assert ds.guid in rocrateGUIDs
-
-		computation_metadata = {
-			"guid": "test guid",
-			"name": "Image Compression",
-			"runBy": "Chris Churas",
-			"command": "./test.sh",
-			"dateCreated": "10-28-2024",
-			"description": "A placeholder computation for image compression",
-			"keywords": ["cm4ai", "image"],
-			"usedSoftware": software.guid,
-			"usedDataset": [ds.guid for ds in datasetList],
-			"generated": None
-		}
-		computation = GenerateComputation(**computation_metadata)
-		AppendCrate(rocratePath, [software, computation])
-
-		# read in ROCrate
-		rocrateMetadataRecord = ReadROCrateMetadata(rocratePath)
-		rocrateGUIDs = [ elem.guid for elem in rocrateMetadataRecord.metadataGraph]
-
-		assert computation.guid in rocrateGUIDs
-		assert software.guid in rocrateGUIDs
-
-
-
+    
+    def setUp(self):
+        # Create test directory structure
+        self.rocratePath = pathlib.Path.cwd() / 'tests' / 'data' / 'test_api'
+        self.rocratePath.mkdir(parents=True, exist_ok=True)
+        
+    def tearDown(self):
+        # Clean up test directory after tests
+        pass
+        # if self.rocratePath.exists():
+        #     shutil.rmtree(self.rocratePath)
+        
+    def test_api(self):
+        # Clean start - safely handle metadata file deletion
+        metadataFile = self.rocratePath / 'ro-crate-metadata.json'
+        if metadataFile.exists():
+            metadataFile.unlink()
+
+        rocrate_metadata = {
+            "guid": "ark:59853/UVA/B2AI/rocrate_test",
+            "name": 'test rocrate',
+            "organizationName": "UVA",
+            "projectName":  "B2AI",
+            "description":  "Testing ROCrate Model",
+            "keywords": ["test", "fair"],
+            "path": self.rocratePath
+        }
+
+        rocrate = GenerateROCrate(**rocrate_metadata)
+        
+        software_metadata = {
+            "guid": "955cf26c-e3a3-4f0f-b2df-fca4c693cac4:cm4ai_chromatin_mda-mb-468_untreated_ifimage_0.7alpha",
+            "author": "Cell Maps team",
+            "url": "https://github.com/idekerlab/cellmaps_utils",
+            "name": "cellmaps_utils",
+            "keywords": [
+                "CM4AI",
+                "0.7alpha",
+                "MDA-MB-468",
+                "untreated",
+                "IF microscopy",
+                "images",
+                "breast; mammary gland",
+                "chromatin",
+                "tools",
+                "cellmaps_utils"
+            ],
+            "description": "CM4AI 0.7alpha MDA-MB-468 untreated IF microscopy images breast; mammary gland chromatin Contains utilities needed by Cell Maps tools",
+            "dateModified": "2024-10-22",
+            "version": "0.5.0",
+            "fileFormat": "py",
+            "usedByComputation": [],
+            "associatedPublication": None,
+            "additionalDocumentation": None,
+            "filepath": "https://github.com/idekerlab/cellmaps_utils",
+            "cratePath": self.rocratePath
+        }
+        software = GenerateSoftware(**software_metadata)
+
+        yellowFolder = self.rocratePath / 'yellow'
+        yellowFolder.mkdir(exist_ok=True)
+
+        # Create datasets
+        datasetList = []
+        for i in range(10000):
+            fileName = f'B2AI_5_untreated_B5_R5_z01_yellow_{i}.jpg'
+            datasetMetadata = {
+                "guid": f"322ab5a2-e6a7-4c46-be79-cbf3e9453cde:cm4ai_chromatin_mda-mb-468_untreated_ifimage_0.7alpha_{i}",  # Make unique
+                "name": f"B2AI_5_untreated_B5_R5_z01_yellow_{i}.jpg yellow channel image",
+                "keywords": [
+                    "CM4AI",
+                    "0.7alpha",
+                    "MDA-MB-468",
+                    "untreated",
+                    "IF microscopy",
+                    "images",
+                    "breast; mammary gland",
+                    "chromatin",
+                    "yellow",
+                    "IF",
+                    "image",
+                    "ER (Calreticulin antibody)"
+                ],
+                "description": "CM4AI 0.7alpha MDA-MB-468 untreated IF microscopy images breast; mammary gland chromatin IF image file",
+                "author": "Lundberg Lab",
+                "datePublished": "2024-10-22",
+                "version": "0.7alpha",
+                "dataFormat": "jpg",
+                "generatedBy": [],
+                "derivedFrom": [],
+                "usedBy": [],
+                "url": None,
+                "associatedPublication": None,
+                "additionalDocumentation": None,
+                "schema": None,
+                "filepath": f"file:///yellow/{fileName}",
+                "cratePath": self.rocratePath
+            }
+            dataset = GenerateDataset(**datasetMetadata)
+            datasetList.append(dataset)
+
+        AppendCrate(self.rocratePath, datasetList)
+
+        # Verify crate metadata
+        rocrateMetadataRecord = ReadROCrateMetadata(self.rocratePath)
+        rocrateGUIDs = [elem.guid for elem in rocrateMetadataRecord.metadataGraph]
+
+        # Verify all dataset GUIDs are present
+        for ds in datasetList:
+            self.assertIn(ds.guid, rocrateGUIDs, f"Dataset GUID {ds.guid} not found in metadata")
+
+        computation_metadata = {
+            "guid": "test-computation-guid",  # Made more specific
+            "name": "Image Compression",
+            "runBy": "Chris Churas",
+            "command": "./test.sh",
+            "dateCreated": "10-28-2024",
+            "description": "A placeholder computation for image compression",
+            "keywords": ["cm4ai", "image"],
+            "usedSoftware": software.guid,
+            "usedDataset": [ds.guid for ds in datasetList],
+            "generated": None
+        }
+        computation = GenerateComputation(**computation_metadata)
+        AppendCrate(self.rocratePath, [software, computation])
+
+        # Final verification
+        rocrateMetadataRecord = ReadROCrateMetadata(self.rocratePath)
+        rocrateGUIDs = [elem.guid for elem in rocrateMetadataRecord.metadataGraph]
+
+        self.assertIn(computation.guid, rocrateGUIDs, "Computation GUID not found in metadata")
+        self.assertIn(software.guid, rocrateGUIDs, "Software GUID not found in metadata")
 
 if __name__ == "__main__":
-	unittest.main()
\ No newline at end of file
+    unittest.main()
\ No newline at end of file