From fb712bd6864037f8c65d918de5dab8e8241e0670 Mon Sep 17 00:00:00 2001 From: sdorkenw Date: Thu, 28 Aug 2025 17:16:55 -0700 Subject: [PATCH 1/3] Add cell matching schemas and CellTypeReferenceUser - Add CellMatch schema for matching cells between datasets - Add CellSimilarity schema for scoring similarity between two cells - Add CellMatchReference schema (reference annotation version) - Add CellTypeReferenceUser schema extending CellTypeReference with user_id - Register all new schemas in type_mapping for system availability --- emannotationschemas/__init__.py | 24 +++++- .../schemas/cell_type_local.py | 16 +++- emannotationschemas/schemas/matching.py | 83 +++++++++++++++++++ 3 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 emannotationschemas/schemas/matching.py diff --git a/emannotationschemas/__init__.py b/emannotationschemas/__init__.py index 9ae8630..753f668 100644 --- a/emannotationschemas/__init__.py +++ b/emannotationschemas/__init__.py @@ -26,7 +26,11 @@ from emannotationschemas.schemas.braincircuits import ( BrainCircuitsBoundTagAnnotationUser, ) -from emannotationschemas.schemas.cell_type_local import CellTypeLocal, CellTypeReference +from emannotationschemas.schemas.cell_type_local import ( + CellTypeLocal, + CellTypeReference, + CellTypeReferenceUser, +) from emannotationschemas.schemas.contact import Contact from emannotationschemas.schemas.derived_spatial_point import ( DerivedNumeric, @@ -41,9 +45,12 @@ FunctionalCoregistration, FunctionalUnitCoregistration, FunctionalUnitCoregistrationExtended, - V1DDFunctionalUnitCoregistration + V1DDFunctionalUnitCoregistration, +) +from emannotationschemas.schemas.functional_props import ( + FunctionalPropertiesBCM, + DigitalTwinPropertiesBCM, ) -from emannotationschemas.schemas.functional_props import FunctionalPropertiesBCM, DigitalTwinPropertiesBCM from emannotationschemas.schemas.glia_contact import GliaContact from emannotationschemas.schemas.groups import SimpleGroup from emannotationschemas.schemas.neuropil import FlyNeuropil @@ -59,6 +66,11 @@ from emannotationschemas.schemas.reference_text_float import ( ReferenceTagFloat, ) +from emannotationschemas.schemas.matching import ( + CellMatch, + CellSimilarity, + CellMatchReference, +) from emannotationschemas.schemas.synapse import ( BuhmannEcksteinSynapseSchema, BuhmannSynapseSchema, @@ -89,6 +101,7 @@ "reference_tag": ReferenceTagAnnotation, "cell_type_local": CellTypeLocal, "cell_type_reference": CellTypeReference, + "cell_type_reference_user": CellTypeReferenceUser, "nucleus_detection": NucleusDetection, "bound_tag": BoundTagAnnotation, "bound_double_tag": BoundDoubleTagAnnotation, @@ -122,7 +135,10 @@ "reference_tag_float": ReferenceTagFloat, "compartment_proofread_status_strategy": CompartmentProofreadStatusStrategy, "reference_tag_with_confidence": ReferenceTagWithConfidence, - "digital_twin_properties": DigitalTwinPropertiesBCM + "digital_twin_properties": DigitalTwinPropertiesBCM, + "cell_match": CellMatch, + "cell_similarity": CellSimilarity, + "cell_match_reference": CellMatchReference, } diff --git a/emannotationschemas/schemas/cell_type_local.py b/emannotationschemas/schemas/cell_type_local.py index 5f4c568..0230180 100644 --- a/emannotationschemas/schemas/cell_type_local.py +++ b/emannotationschemas/schemas/cell_type_local.py @@ -1,5 +1,9 @@ import marshmallow as mm -from emannotationschemas.schemas.base import AnnotationSchema, BoundSpatialPoint, ReferenceAnnotation +from emannotationschemas.schemas.base import ( + AnnotationSchema, + BoundSpatialPoint, + ReferenceAnnotation, +) allowed_types = dict( valence=["e", "i", "g", "uncertain"], @@ -77,7 +81,6 @@ def validate_type(self, item, **kwargs): return item - class CellTypeReference(ReferenceAnnotation): classification_system = mm.fields.String( @@ -85,7 +88,6 @@ class CellTypeReference(ReferenceAnnotation): ) cell_type = mm.fields.String(required=True, description="Cell type name") - @mm.post_load def validate_type(self, item, **kwargs): @@ -99,3 +101,11 @@ def validate_type(self, item, **kwargs): item["valid"] = True return item + + +class CellTypeReferenceUser(CellTypeReference): + """CellTypeReference schema with user tracking""" + + user_id = mm.fields.Int( + required=True, description="User who created the cell type classification." + ) diff --git a/emannotationschemas/schemas/matching.py b/emannotationschemas/schemas/matching.py new file mode 100644 index 0000000..3c2c147 --- /dev/null +++ b/emannotationschemas/schemas/matching.py @@ -0,0 +1,83 @@ +import marshmallow as mm +from emannotationschemas.schemas.base import ( + AnnotationSchema, + BoundSpatialPoint, + NumericField, + ReferenceAnnotation, +) + + +class CellMatch(AnnotationSchema): + """Schema for matching cells between datasets""" + + pt = mm.fields.Nested( + BoundSpatialPoint, + required=True, + description="Source cell identity (pt_root_id, pt_supervoxel_id, pt_position)", + ) + query_root_id = NumericField( + required=True, + description="Static root ID used for score calculation", + ) + match_id = mm.fields.Str( + required=True, + description="Unique identifier for cell from another dataset", + ) + score = mm.fields.Float( + required=True, + description="Normalised NBLAST similarity score [for CAVE schema, could be any numeric score]", + ) + validation = mm.fields.Bool( + required=False, + default=False, + description="False if not separately validated, True if separately validated", + ) + + +class CellSimilarity(AnnotationSchema): + """Schema for cell similarity scoring between two cells""" + + pt = mm.fields.Nested( + BoundSpatialPoint, + required=True, + description="Query cell identity (pt_root_id, pt_supervoxel_id, pt_position)", + ) + match_pt = mm.fields.Nested( + BoundSpatialPoint, + required=True, + description="Target cell identity (match_root_id, match_supervoxel_id, match_position)", + ) + query_root_id = NumericField( + required=True, + description="Static root ID for query cell at time of score calculation", + ) + match_root_id = NumericField( + required=True, + description="Static root ID for target cell at time of score calculation", + ) + score = mm.fields.Float( + required=True, + description="Numeric similarity score between the two cells", + ) + + +class CellMatchReference(ReferenceAnnotation): + """Reference annotation version of CellMatch schema""" + + query_root_id = NumericField( + required=True, + description="Static root ID used for score calculation", + ) + match_id = mm.fields.Str( + required=True, + description="Unique identifier for cell from another dataset", + ) + score = mm.fields.Float( + required=True, + description="Normalised NBLAST similarity score [for CAVE schema, could be any numeric score]", + ) + validation = mm.fields.Bool( + required=False, + default=False, + description="False if not validated by a human, True if validated by a human", + ) From 86688b9b606bdb2657ab28a933bb3fd8e2eaae8b Mon Sep 17 00:00:00 2001 From: Sven Dorkenwald <9481250+sdorkenw@users.noreply.github.com> Date: Thu, 28 Aug 2025 17:20:15 -0700 Subject: [PATCH 2/3] Update emannotationschemas/schemas/matching.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- emannotationschemas/schemas/matching.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emannotationschemas/schemas/matching.py b/emannotationschemas/schemas/matching.py index 3c2c147..a613c05 100644 --- a/emannotationschemas/schemas/matching.py +++ b/emannotationschemas/schemas/matching.py @@ -74,7 +74,7 @@ class CellMatchReference(ReferenceAnnotation): ) score = mm.fields.Float( required=True, - description="Normalised NBLAST similarity score [for CAVE schema, could be any numeric score]", + description="Normalized NBLAST similarity score [for CAVE schema, could be any numeric score]", ) validation = mm.fields.Bool( required=False, From 1bf5477eb13dc78a52ffa61a1be92eb3dc4b8e84 Mon Sep 17 00:00:00 2001 From: sdorkenw Date: Thu, 28 Aug 2025 17:27:02 -0700 Subject: [PATCH 3/3] Add database indexes to match_id fields - Set index=True for match_id in CellMatch schema - Set index=True for match_id in CellMatchReference schema - Improves query performance for cell matching lookups --- emannotationschemas/schemas/matching.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/emannotationschemas/schemas/matching.py b/emannotationschemas/schemas/matching.py index a613c05..8de21b7 100644 --- a/emannotationschemas/schemas/matching.py +++ b/emannotationschemas/schemas/matching.py @@ -22,6 +22,7 @@ class CellMatch(AnnotationSchema): match_id = mm.fields.Str( required=True, description="Unique identifier for cell from another dataset", + index=True, ) score = mm.fields.Float( required=True, @@ -71,6 +72,7 @@ class CellMatchReference(ReferenceAnnotation): match_id = mm.fields.Str( required=True, description="Unique identifier for cell from another dataset", + index=True, ) score = mm.fields.Float( required=True,