diff --git a/src/data/sheets/schemasheet.tsv b/src/data/sheets/schemasheet.tsv deleted file mode 100644 index 59110cb..0000000 --- a/src/data/sheets/schemasheet.tsv +++ /dev/null @@ -1,394 +0,0 @@ -class slot title description range required multivalued enum permissible_value meaning ->class slot title description range required multivalued enum permissible_value meaning - studyCode Study Code Unique identifier for the study (generally a short acronym) enum_studyCode true - studyTitle Study Title Full title of the study string true - program Program Funding source(s) for the study (pipe-separated if multiple) enum_program true true - studyDescription Study Description Brief description of the study (2-4 sentences) string true - principalInvestigatorName Principal Investigator Name Name(s) of Principal Investigator(s) of this study; pipe-separated if multiple string true true - studyContactName Study Contact Name Name of contact person for this study; pipe-separated if multiple string true true - studyContactInstitution Study Contact Institution Institution of contact person for this study; pipe-separated if multiple string true true - studyContactEmail Study Contact Email Email address of contact person for this study; pipe-separated if multiple string true true - vbrEmail VBR Email Email address for Virtual Biorepository requests/inquiries, if participating string - vbrUrl VBR URL Link to Virtual Biorepository request form, if participating uri - vbrReadme VBR Readme Instructions for contacting or requesting samples from Virtual Biorepository, if participating string - researchDomain Research Domain Main research domain(s) of the study, other than Down syndrome; pipe-separated if multiple enum_researchDomain true true - participantLifespanStage Participant Lifespan Stage Focus age group(s) of the study population; pipe-separated if multiple enum_participantLifespanStage true true - selectionCriteria Selection Criteria A limited list of criteria for selection of participants in the study, provided in terms of inclusion and exclusion criteria. For Observational studies, a description of the population from which the groups or cohorts were selected (for example, primary care clinic, community sample, residents of a certain town). string - studyDesign Study Design Overall design of study, including whether it is longitudinal and whether family members/unrelated controls are also enrolled enum_studyDesign true true - clinicalDataSourceType Clinical Data Source Type Source(s) of data collected from study participants; pipe-separated if multiple enum_clinicalDataSourceType true true - studyWebsite Study Website Website for the study uri - publication Publication uri true - expectedNumberOfParticipants Expected Number of Participants integer true - guidType GUID Type System used to generate globally unique identifiers (GUIDs) enum_guidType true - guidMapped GUIDs Mapped? For studies using NDAR GUIDs, have the GUIDs been added to the INCLUDE GUID Mapping File? boolean - dbgap dbGaP string true - acknowledgments Acknowledgments Funding statement and acknowledgments for this study string true - citationStatement Citation Statement "Statement that secondary data users should use to acknowledge use of this dataset. E.g., ""The results analyzed and here are based in whole or in part upon data generated by the INCLUDE (INvestigation of Co-occurring conditions across the Lifespan to Understand Down syndromE) Project , and were accessed from the INCLUDE Data Hub and .""" string true - doi DOI Unique Digital Object Identifier for each Study and Dataset, minted by the DCC using DataCite uri false - doiCitation DOI Citation Bibliographic citation for DOI, generated by DataCite string false - nctId NCT ID "The unique identification code given to each clinical study upon registration at ClinicalTrials.gov. The format is ""NCT"" followed by an 8-digit number. Also known as ClinicalTrials.gov Identifier" string true false - clinicalStudyDesign Clinical Study Design The nature of the investigation or investigational use for which clinical study information is being submitted. Select one. enum_clinicalStudyDesign true false - trialPhase Trial Phase For a clinical trial of a drug product (including a biological product), the numerical phase of such clinical trial. Select only one. enum_trialPhase true false - primaryPurpose Primary Purpose The main objective of the intervention(s) being evaluated by the clinical trial. Select one. enum_primaryPurpose true false - interventionType Intervention Type For each intervention studied in the clinical study, the general type of intervention. Select one. enum_interventionType true false - intervention Intervention For interventional studies, specify the intervention(s) associated with each arm or group; at least one intervention must be specified for interventional studies. Use non-proprietary names where available. Multiple values should be pipe-separated. For observational studies, specify the intervention(s)/exposure(s) of interest, if any. string true true - armInformation Arm Information For interventional studies, a description of each arm of the clinical trial that indicates its role in the clinical trial (e.g. Experimental, Active Comparator, Placebo Comparator, Sham Comparator; No Intervention; Other); provides an informative title; and, if necessary, additional descriptive information (including which interventions are administered in each arm) to differentiate each arm from other arms in the clinical trial. Multiple values should be pipe-separated. For observational studies, specify the predefined participant groups (cohorts) to be studied, e.g. those with or without a condition/exposure. string true true - armAllocation Arm Allocation The method by which participants are assigned to arms in a clinical trial. enum_armAllocation true false - interventionAssignmentStrategy Intervention Assignment Strategy In an interventional study, the strategy for assigning interventions to participants. enum_interventionAssignmentStrategy true false - primaryOutcomeMeasure Primary Outcome Measure A description of each primary outcome measure (title, description, and time point/duration of assessment). Multiple values should be pipe-separated. string true true - secondaryOutcomeMeasure Secondary Outcome Measure A description of each secondary outcome measure (title, description, and time point/duration of assessment). Multiple values should be pipe-separated. string true true - datasetName Dataset Name Full name of the dataset, provided by contributor string true - datasetDescription Dataset Description Brief additional notes about the dataset (1-3 sentences) that are not already captured in the other fields string - datasetGlobalId Dataset Global ID Unique Global ID for dataset, generated by DCC string false - datasetExternalId Dataset External ID Unique identifier or code for dataset, if provided by contributor string - expectedNumberOfFiles Expected Number of Files Expected number of files associated with this dataset, including dictionaries. If additional explanation is needed, please add to Dataset Description field. integer false - dataCollectionStartYear Data Collection Start Year Year that data collection started string false - dataCollectionEndYear Data Collection End Year Year that data collection ended string false - accessLimitations Access Limitations Data access limitations, as defined in the GA4GH Data Use Ontology (DUO; can list more than one, pipe separated) string false - accessRequirements Access Requirements Data access requirements, as defined in the GA4GH Data Use Ontology (DUO; can list more than one, pipe separated) string false - otherRepository Other Repository URL if dataset is already deposited in a public repository other than dbGaP (e.g. LONI, Metabolomics Workbench, etc.) uri - otherAccessAuthority Other Access Authority Email or URL for dataset's Access Authority, if not dbGaP string - isHarmonized Is Harmonized All of the elements in this Dataset are harmonized and available in the INCLUDE Data Hub boolean - datasetManifestLocation Dataset Manifest Location Location of associated Dataset Manifest string - participantGlobalId Participant Global ID Unique INCLUDE global identifier for the participant, assigned by DCC string true - participantExternalId Participant External ID Unique, de-identified identifier for the participant, assigned by data contributor. External IDs must be two steps removed from personal information in the study records. string true - familyId Family ID Unique identifer for family to which Participant belongs, assigned by data contributor string - familyType Family Type Structure of family members participating in the study enum_familyType true - fatherId Father ID Participant External ID for Participant's father (NA if Participant is not the proband) string - motherId Mother ID Participant External ID for Participant's mother (NA if Participant is not the proband) string - siblingId Sibling ID Participant External ID for Participant's sibling(s) (NA if Participant is not the proband) string - otherFamilyMemberId Other Family Member ID Participant External ID for Participant's other family members (NA if Participant is not the proband) string - familyRelationship Family Relationship Relationship of Participant to proband enum_familyRelationship true - sex Sex Sex of Participant enum_sex true - race Race Race of Participant enum_race true - ethnicity Ethnicity Ethnicity of Participant enum_ethnicity true - downSyndromeStatus Down Syndrome Status Down Syndrome status of participant enum_downSyndromeStatus true - ageAtFirstParticipantEngagement Age at First Participant Engagement Age in days of Participant at first recorded study event (enrollment, visit, observation, sample collection, survey completion, etc.). Age at enrollment is preferred, if available. integer true - firstParticipantEngagementEvent First Participant Engagement Event Event for which Age at First Participant Engagement is given (e.g. enrollment, visit, observation, sample collection, survey completion, etc.). Age at enrollment is preferred, if available. string true - outcomesVitalStatus Outcomes Vital Status Whether participant is alive or dead enum_vital_status - ageAtLastVitalStatus Age at Last Vital Status Age in days when participant's vital status was last recorded integer - eventId Event ID Identifier for event (Visit, Survey completion, Sample collection, etc.) to which the Condition data are linked, if applicable. There may be multiple events linked to a Participant. string - eventType Event Type Type of event for which Event ID is given (Visit, Survey completion, Sample collection, etc.) string - conditionMeasureSourceText Condition or Measure Source Text Co-occurring Condition (phenotype or diagnosis) or Measure (observation with numeric value), as described by data contributor. The Down Syndrome Genetic Diagnosis will be rolled into this field. string - ageAtConditionMeasureObservation Age At Condition or Measure Observation Age in days at which Condition or Measure was observed, recorded, or diagnosed integer - conditionInterpretation Condition Interpretation Whether Condition was observed or not enum_conditionInterpretation - conditionStatus Condition Status Whether the Condition is ongoing, has been resolved, or this is a general history of the condition without known dates enum_conditionStatus - conditionDataSource Condition Data Source Whether Condition information was obtained by the investigator or reported by participant/family member enum_conditionDataSource - hpoLabel HPO Label Label for Condition in the Human Phenotype Ontology (HPO) string - hpoCode HPO Code Code for Condition in the Human Phenotype Ontology (HPO) string - mondoLabel MONDO Label Label for Condition in the Mondo Disease Ontology (MONDO) string - mondoCode MONDO Code Code for Condition in the Mondo Disease Ontology (Mondo) string - maxoLabel MAXO Label Label for Condition in the Medical Action Ontology (MAXO) string - maxoCode MAXO Code Code for condition in the Medical Action Ontology (MAXO) string - otherLabel Other Label Label for Condition in another ontology (if no match in HPO, MONDO, or MAXO) string - otherCode Other Code Code for Condition in another ontology (if no match in HPO, MONDO, or MAXO) string - measureValue Measure Value Numeric value of Measure float - measureUnit Measure Unit Unit that is associated with Measure Value (e.g. kg, cm, %, x10^9/L, etc.) string - sampleGlobalId Sample Global ID INCLUDE global identifier for sample, assigned by DCC string true - sampleExternalId Sample External ID Unique identifier for sample, assigned by data contributor. A sample is a unique biological material; two samples with two different IDs are biologically distinct. string true - sampleType Sample Type Type of biological material comprising the Sample (e.g. Plasma, White blood cells, Red blood cells, DNA, RNA, Peripheral blood mononuclear cells, CD4+ Tconv cells, NK cells, Monocytes, CD8+ T cells, B cells, Granulocytes, Treg cells) string true - ageAtBiospecimenCollection Age At Biospecimen Collection Age in days of participant at time of biospecimen collection integer - parentSampleGlobalId Parent Sample Global ID INCLUDE global identifier for the direct parent from which Sample was derived, assigned by DCC string - parentSampleExternalId Parent Sample External ID Identifier for the direct parent from which Sample was derived, processed, pooled, etc. (if applicable); assigned by data contributor string - parentSampleType Parent Sample Type Type of biological material comprising the Parent Sample (e.g. Peripheral Whole Blood, Derived Cell Line, Saliva, Whole blood, WBCs) string - collectionGlobalId Collection Global ID INCLUDE global identifier for the eldest sample in a lineage, assigned by DCC string - collectionExternalId Collection External ID Identifier for the eldest sample in a lineage of processed, pooled, or aliquoted samples - typically the material actually collected from the Participant. This may be the same as Parent Sample ID or Sample ID (if no processing was performed). Assigned by data contributor. string - collectionSampleType Collection Sample Type Type of biological material comprising the Collected Sample (e.g. Whole blood, Not reported, Saliva, Derived cell line) string - containerGlobalId Container Global ID INCLUDE global identifier for specific container/aliquot of sample, assigned by DCC string - containerExternalId Container External ID Identifier for specific container/aliquot of sample, assigned by data contributor. For example, distinct aliquots of a sample will have the same Sample ID but different Container IDs. string - volume Volume Amount of sample in container float - volumeUnit Volume Unit Unit of sample volume string - concentration Concentration Concentration of sample in container float - concentrationUnit Concentration Unit Unit of sample concentration string - laboratoryProcedure Laboratory Procedure Procedure by which Sample was derived from Parent Sample (e.g. Centrifugation, RBC lysis, Lyse/fix buffer, FACS, PAXgene DNA, PAXgene RNA, Qiagen Allprep, Ficoll) string - biospecimenStorage Biospecimen Storage Method by which Container is stored (e.g. Minus 80 degrees Celsius, Liquid nitrogen storage) string - sampleAvailability Sample Availability Whether or not the Sample (any Container thereof) is potentially available for sharing through the Virtual Biorepository enum_Availability true - containerAvailability Container Availability Whether or not the specific Container is potentially available for sharing through the Virtual Biorepository enum_Availability - participantDataFileManifestLocation Participant-DataFile Manifest Location Location of Participant-DataFile Manifest, if file contains multiple Participants string - fileName File Name Name of file, assigned by data contributor string true - fileGlobalId File Global ID INCLUDE global file identifier, assigned by DCC string true - fileUploadLocation File Upload Location Where source file was uploaded, if not directly to an S3 bucket (e.g. Synapse) string - fileS3Location File S3 Location S3 bucket location of file; also serves as dewrangle descriptor string true - drsUri DRS URI Data Repository Services API Uniform Resource Identifier uriorcurie true - fileHash File Hash md5 hash of this file for validation (if known) string - dataAccess Data Access Type of access control on this file, determined by DCC enum_dataAccess true - dataCategory Data Category enum_dataCategory true - dataType Data Type string true - experimentalStrategy Experimental Strategy string true - experimentalPlatform Experimental Platform Specific platform used to perform experiment; pipe-separated if multiple (e.g. SOMAscan, MSD, Luminex, Illumina) string true - fileFormat File Format Format of file (e.g. tsv, cram, gvcf, vcf, maf, txt, pdf, html, png) string true - fileSize File Size Size of file, if known (mainly important if large) integer - fileSizeUnit File Size Unit Unit of file size string - encounterGlobalId Encounter Global ID Unique identifier for Encounter (assigned by DCC) string true false - encounterExternalId Encounter External ID Unique ID or Name of Encounter. For actual visits, this might be a visit ID; for planned encounters, use a consistent format, such as string true false - encounterDescription Encounter Description Name and/or description of of Encounter string true false - encounterGroup Encounter Group Optional grouping for Encounters, e.g. treatment arms. This is a flexible field that different studies might use in different ways. string false true - encounterType Encounter Type Optional type of Encounter, e.g. pre/on/post-intervention, virtual vs. in-person, etc. This is a flexible field that different studies might use in different ways. string false true - seriesOrder Series Order Ordering of Encounter relative to others in group, if not already indicated by Timepoint (Study Schedule only) float false false - timepoint Timepoint Timepoint of Encounter relative to anchor (Study Schedule only -- timepoint will be provided by Participant Age at Encounter in actual data) float false false - timepointUnit Timepoint Unit Unit of timepoint (Study Schedule only) string false false - participantAgeAtEncounter Participant Age at Encounter Age in days of Participant at Encounter integer false false - activitiesPerformed Activities Performed External IDs of Activities performed at Encounter; separate with pipes if multiple string false true - samplesCollected Samples Collected External IDs of Samples collected at Encounter; separate with pipes if multiple string false true - filesGenerated Files Generated Names of Files generated during Encounter; separate with pipes if multiple string false true - activityGlobalId Activity Global ID Unique identifier for Activity (assigned by DCC) string true false - activityExternalId Activity External ID "Unique ID of Activity. Could be a ""fake"" activity for study schedule, but must formatted like an ID (e.g. no spaces). Must match Activity ID provided in DataFile metadata. E.g. Study Schedule activities, " string true false - activityDescription Activity Description Name and/or description of Activity string true false - inputClass Input Class Class of input for Activity (NA for study schedule) enum_inputOutputClass false true - inputType Input Type Type of input for Activity (NA for study schedule). Must match Type of Input Class (dataType, sampleType, etc.) string false true - outputClass Output Class Class of output generated by Activity (NA for study schedule) enum_inputOutputClass false true - outputType Output Type Type of output generated by Activity. Must match Type of Output Class. For generic/planned Study Schedule activities, use Output Type = Study Schedule string false true -Study Study General information about the study -Study dataCategory Categories of data expected to be collected in this study true -Study dbgap "dbGaP ""phs"" accession code(s) associated with this Study, either for access or informational purposes (pipe-separated if multiple)" -Study publication URL for publication(s) describing the study's rationale and methodology (PubMed Central preferred but not required; pipe-separated if multiple) -Study expectedNumberOfParticipants Expected number of participants in this study (or actual number, if data has been submitted to INCLUDE DCC). If additional explanation is needed, please add to Study Description field. -ClinicalTrial Clinical Trial Study-level metadata specific to a clinical trial, copied from the study's clinicaltrials.gov page -ClinicalTrial studyCode Study -Dataset Dataset Information about a specific grouping of data files -Dataset dataCategory General category of data in Dataset; pipe-separated if multiple true -Dataset dbgap "dbGaP ""phs"" accession code(s) required to access the files in this Dataset, if applicable (pipe-separated if multiple)" -Dataset publication URL for publication(s) describing the Dataset's rationale and methodology (PubMed Central preferred but not required; pipe-separated if multiple) -Dataset expectedNumberOfParticipants Expected number of participants in this Dataset (or actual number, if data has been submitted to INCLUDE DCC). If additional explanation is needed, please add to Dataset Description field. -Dataset dataType Specific type of data contained in Dataset; pipe-separated if multiple (e.g. Preprocessed metabolite relative abundance, Absolute protein concentration, Aligned reads, Simple nucleotide variations, GVCF, Gene expression quantifications, Gene fusions, Somatic copy number variations, Somatic structural variations) -Dataset experimentalStrategy Experimental method used to obtain data in Dataset; pipe-separated if multiple (e.g. Whole genome sequencing, RNAseq, Multiplex immunoassay, Mass spec metabolomics) true -Dataset studyCode Study -DatasetManifest Dataset Manifest Mapping information for files in Dataset -DatasetManifest studyCode Study -DatasetManifest fileGlobalId DataFile -DatasetManifest datasetGlobalId Dataset -ParticipantSampleDataFileManifest Participant-Sample-DataFile Manifest List of Participants and/or Samples in DataFiles with multiple Participants and/or Samples -ParticipantSampleDataFileManifest studyCode Study -ParticipantSampleDataFileManifest fileName DataFile -ParticipantSampleDataFileManifest participantExternalId Participant -ParticipantSampleDataFileManifest sampleExternalId Biospecimen -Participant Participant Demographic and clinical information about the participant -Condition Condition Co-occurring conditions and other observations for the participant -Condition studyCode Study -Condition participantGlobalId Participant -Biospecimen Biospecimen A Biospecimen Collected from A Participant -Biospecimen studyCode Study -Biospecimen participantGlobalId Participant -DataFile Data File Metadata about Data Files -DataFile dataCategory General category of data in file (e.g. Clinical, Genomics, Proteomics, Metabolomics, Immune profiling, Transcriptomics) -DataFile dataType Specific type of data contained in file (e.g. Preprocessed metabolite relative abundance, Absolute protein concentration, Aligned reads, Simple nucleotide variations, GVCF, Gene expression quantifications, Gene fusions, Somatic copy number variations, Somatic structural variations) -DataFile experimentalStrategy Experimental method used to obtain data in file (e.g. Whole genome sequencing, RNAseq, Multiplex immunoassay, Mass spec metabolomics) -DataFile studyCode Study -DataFile participantGlobalId Participant -DataFile sampleGlobalId Biospecimen -Encounter Encounter Encounters can describe generic planned visits (e.g. for a Clinical Trial study schedule) or actual participant events -Encounter participantExternalId External ID of associated participant (for actual encounters) false true -Encounter activityExternalId External IDs of Activities performed at Encounter; separate with pipes if multiple false true -Encounter sampleExternalId External IDs of Samples collected at Encounter; separate with pipes if multiple false true -Encounter fileName Names of Files generated during Encounter; separate with pipes if multiple false true -Encounter studyCode Study -Activity Activity Activities describe sample collection/processing, assays, data generation, etc. and may be planned (e.g. for a Clinical Trial study schedule) or actual -Activity studyCode Study - enum_studyCode - AADSC enum_studyCode aadsc - ABC-DS enum_studyCode abc_ds - ADS enum_studyCode ads - AECOM-DS enum_studyCode aecom_ds - BEST21 enum_studyCode best21 - BrainPower enum_studyCode brainpower - BRI-DSR enum_studyCode bri_dsr - CCDS enum_studyCode ccds - CHILD-DS enum_studyCode child_ds - CHARGE-DS enum_studyCode charge_ds - DECIDAS enum_studyCode decidas - DS-ARC enum_studyCode ds_arc - DS-Brain enum_studyCode ds_brain - DS-COG-ALL enum_studyCode ds_cog_all - DS-COG-AML enum_studyCode ds_cog_aml - DS-DETERMINED enum_studyCode ds_determined - DS-HOME enum_studyCode ds_home - DS-HSAT enum_studyCode ds_hsat - DS-ISP enum_studyCode ds_isp - DS-Nexus enum_studyCode ds_nexus - DS-PALS enum_studyCode ds_pals - DS-PCGC enum_studyCode ds_pcgc - DS-Sleep enum_studyCode ds_sleep - DS-VitE enum_studyCode ds_vite - DS360-CHD enum_studyCode ds360_chd - DSC enum_studyCode dsc - DSpostBFmulti enum_studyCode dspostbfmulti - DSRRS enum_studyCode dsrrs - ECODS enum_studyCode ecods - EXcEEDS enum_studyCode exceeds - HTP enum_studyCode htp - IBIS-DS enum_studyCode ibis-ds - JAKi-DS enum_studyCode jaki_ds - OPTimal enum_studyCode optimal - TEAM-DS enum_studyCode team_ds - TOMI enum_studyCode tomi - TRC-DS enum_studyCode trc_ds - X01-deSmith enum_studyCode x01_desmith - X01-Hakonarson enum_studyCode x01_hakonarson - enum_program - INCLUDE enum_program include - KF enum_program kf - Other enum_program other - enum_researchDomain - Behavior and Behavior Mechanisms enum_researchDomain behavior_and_behavior_mechanisms mesh:D001520 - Congenital Heart Defects enum_researchDomain congenital_heart_defects mesh:D006330 - Immune System Diseases enum_researchDomain immune_system_diseases mesh:D007154 - Hematologic Diseases enum_researchDomain hematologic_diseases mesh:D006402 - Neurodevelopment enum_researchDomain neurodevelopment mesh:D065886 - Sleep Wake Disorders enum_researchDomain sleep_wake_disorders mesh:D012893 - All Co-occurring Conditions enum_researchDomain all_co_occurring_conditions mesh:D013568 - Physical Fitness enum_researchDomain physical_fitness mesh:D010809 - Other enum_researchDomain other - enum_participantLifespanStage - Fetal enum_participantLifespanStage fetal - Neonatal 0-28 days old enum_participantLifespanStage neonatal - Pediatric Birth-17 years old enum_participantLifespanStage pediatric - Adult 18+ years old enum_participantLifespanStage adult - enum_studyDesign - Case-Control enum_studyDesign case_control - Case Set enum_studyDesign case_set - Control Set enum_studyDesign control_set - Clinical Trial enum_studyDesign clinical_trial - Cross-Sectional enum_studyDesign cross_sectional - Family/Twins/Trios enum_studyDesign family_twins_trios - Interventional enum_studyDesign interventional - Longitudinal enum_studyDesign longitudinal - Trial Readiness Study enum_studyDesign trial_readiness_study - Tumor vs Matched Normal enum_studyDesign tumor_vs_matched_normal - enum_clinicalDataSourceType - Medical Record Data obtained directly from medical record enum_clinicalDataSourceType medical_record - Investigator Assessment Data obtained by examination, interview, etc. with investigator enum_clinicalDataSourceType investigator_assessment - Participant or Caregiver Report Data obtained from survey, questionnaire, etc. filled out by participant or caregiver enum_clinicalDataSourceType participant_or_caregiver_report - Other Data obtained from other source, such as tissue bank enum_clinicalDataSourceType other - Unknown enum_clinicalDataSourceType unknown - enum_dataCategory - Unharmonized Demographic/Clinical Data enum_dataCategory unharmonized_demographic_clinical_data - Harmonized Demographic/Clinical Data enum_dataCategory harmonized_demographic_clinical_data - Genomics enum_dataCategory genomics - Transcriptomics enum_dataCategory transcriptomics - Epigenomics enum_dataCategory epigenomics - Proteomics enum_dataCategory proteomics - Metabolomics enum_dataCategory metabolomics - Cognitive/Behavioral enum_dataCategory cognitive_behavioral - Immune Profiling enum_dataCategory immune_profiling - Imaging enum_dataCategory imaging - Microbiome enum_dataCategory microbiome - Fitness enum_dataCategory fitness - Physical Activity enum_dataCategory physical_activity - Other enum_dataCategory other - Sleep Study enum_dataCategory sleep_study - enum_guidType - NDAR GUID generated by NIMH Data Archive (NDA) GUID tool enum_guidType ndar - Other GUID generated by other system enum_guidType other - No GUID No GUIDs used in this study enum_guidType no_guid - enum_clinicalStudyDesign - Interventional enum_clinicalStudyDesign interventional - Observational enum_clinicalStudyDesign observational - Patient Registry enum_clinicalStudyDesign patient_registry - Expanded Access enum_clinicalStudyDesign expanded_access - enum_trialPhase - Not Applicable enum_trialPhase not_applicable - Early Phase 1 enum_trialPhase early_phase_1 - Phase 1 enum_trialPhase phase_1 - Phase 1/2 enum_trialPhase phase_1_2 - Phase 2 enum_trialPhase phase_2 - Phase 2/3 enum_trialPhase phase_2_3 - Phase 3 enum_trialPhase phase_3 - Phase 4 enum_trialPhase phase_4 - enum_primaryPurpose - Treatment enum_primaryPurpose treatment - Prevention enum_primaryPurpose prevention - Diagnostic enum_primaryPurpose diagnostic - Supportive Care enum_primaryPurpose supportive_care - Screening enum_primaryPurpose screening - Health Services Research enum_primaryPurpose health_services_research - Basic Science enum_primaryPurpose basic_science - Device Feasibility enum_primaryPurpose device_feasibility - Other enum_primaryPurpose other - enum_interventionType - Drug enum_interventionType drug - Device enum_interventionType device - Biological/Vaccine enum_interventionType biological_vaccine - Procedure/Surgery enum_interventionType procedure_surgery - Radiation enum_interventionType radiation - Behavioral enum_interventionType behavioral - Genetic enum_interventionType genetic - Dietary Supplement enum_interventionType dietary_supplement - Combination Product enum_interventionType combination_product - Diagnostic Test enum_interventionType diagnostic_test - Other enum_interventionType other - enum_armAllocation - Not Applicable enum_armAllocation not_applicable - Randomized enum_armAllocation randomized - Nonrandomized enum_armAllocation nonrandomized - enum_interventionAssignmentStrategy - Single Arm enum_interventionAssignmentStrategy single_arm - Parallel enum_interventionAssignmentStrategy parallel - Crossover enum_interventionAssignmentStrategy crossover - Factorial enum_interventionAssignmentStrategy factorial - Sequential enum_interventionAssignmentStrategy sequential - enum_familyType - Control-only Unrelated control, no Down syndrome family members enum_familyType control_only - Duo Proband + one parent enum_familyType duo - Other Other family structure, eg one parent + twins enum_familyType other - Proband-only Proband only, no family members participating in study enum_familyType proband_only - Trio Proband + two parents enum_familyType trio - Trio Plus Proband + two parents + other relatives enum_familyType trio_plus - enum_familyRelationship - Proband The first affected family member to join the study enum_familyRelationship proband NCIT:C64435 - Father enum_familyRelationship father NCIT:C25174 - Mother enum_familyRelationship mother NCIT:C25189 - Sibling enum_familyRelationship sibling NCIT:C25204 - Other relative enum_familyRelationship other_relative NCIT:C21480 - Unrelated control enum_familyRelationship unrelated_control NCIT:C25328 - enum_sex - Female enum_sex female NCIT:C16576 - Male enum_sex male NCIT:C20197 - Other enum_sex other NCIT:C17649 - Unknown enum_sex unknown NCIT:C17998 - enum_race - American Indian or Alaska Native enum_race american_indian_or_alaska_native NCIT:C41259 - Asian enum_race asian NCIT:C41260 - Black or African American enum_race black_or_african_american NCIT:C16352 - More than one race enum_race more_than_one_race NCIT:C67109 - Native Hawaiian or Other Pacific Islander enum_race native_hawaiian_or_other_pacific_islander NCIT:C41219 - Other enum_race other NCIT:C17649 - White enum_race white NCIT:C41261 - Prefer not to answer enum_race prefer_not_to_answer NCIT:C132222 - Unknown enum_race unknown NCIT:C17998 - East Asian UK only; do not use for US data enum_race east_asian NCIT:C161419 - Latin American UK only; do not use for US data enum_race latin_american NCIT:C126531 - Middle Eastern or North African UK only; do not use for US data enum_race middle_eastern_or_north_african NCIT:C43866 - South Asian UK only; do not use for US data enum_race south_asian NCIT:C41263 - enum_ethnicity - Hispanic or Latino enum_ethnicity hispanic_or_latino NCIT:C17459 - Not Hispanic or Latino enum_ethnicity not_hispanic_or_latino NCIT:C41222 - Prefer not to answer enum_ethnicity prefer_not_to_answer NCIT:C132222 - Unknown enum_ethnicity unknown NCIT:C17998 - enum_downSyndromeStatus - D21 Disomy 21 (euploid) enum_downSyndromeStatus d21 - T21 Trisomy 21 (Down syndrome) enum_downSyndromeStatus t21 MONDO:0008608 - enum_vital_status - Dead enum_vital_status dead NCIT:C28554 - Alive enum_vital_status alive NCIT:C37987 - Unknown or not available enum_vital_status unknown_or_not_available NCIT:C17998 - enum_conditionInterpretation - Observed Condition was observed or reported (this will be the case for most conditions) enum_conditionInterpretation observed - Not Observed Participant was specifically examined or medical record queried for condition and found to be negative enum_conditionInterpretation not_observed - enum_conditionStatus - Current Condition is ongoing enum_conditionStatus current - Resolved Condition has been resolved enum_conditionStatus resolved - History Of This is a general history of the condition, without known dates enum_conditionStatus history_of - enum_conditionDataSource - Clinical Information about condition was obtained from medical records or reported by investigator enum_conditionDataSource clinical - Self-reported Information about condition was reported by participant or family member enum_conditionDataSource self_reported - enum_dataAccess - Controlled enum_dataAccess controlled - Open enum_dataAccess open - Registered enum_dataAccess registered - enum_Availability - Available Sample or Container is potentially available to be requested through the Virtual Biorepository (see VBR contact info in Study page) enum_Availability available - Unavailable Sample or Container either was available through Virtual Biorepository but has been used up, or is part of a study that is not participating in the VBR enum_Availability unavailable - enum_inputOutputClass - Participant enum_inputOutputClass participant - Biospecimen enum_inputOutputClass biospecimen - DataFile enum_inputOutputClass datafile diff --git a/src/data/sheets/schemasheet_2025-12-15.tsv b/src/data/sheets/schemasheet_2025-12-15.tsv new file mode 100644 index 0000000..76349c2 --- /dev/null +++ b/src/data/sheets/schemasheet_2025-12-15.tsv @@ -0,0 +1,374 @@ +class slot title description required range any_of minimum_value maximum_value multivalued enum permissible_value meaning +>class slot title description required range any_of minimum_value maximum_value multivalued enum permissible_value meaning + studyCode Study Code Unique identifier for the study (generally a short acronym) true enum_studyCode + studyTitle Study Title Full title of the study true string + program Program Funding source(s) for the study (pipe-separated if multiple) true enum_program true + studyDescription Study Description Brief description of the study (2-4 sentences) true string + principalInvestigatorName Principal Investigator Name Name(s) of Principal Investigator(s) of this study; pipe-separated if multiple true string true + studyContactName Study Contact Name Name of contact person for this study; pipe-separated if multiple true string true + studyContactInstitution Study Contact Institution Institution of contact person for this study; pipe-separated if multiple true string true + studyContactEmail Study Contact Email Email address of contact person for this study; pipe-separated if multiple true string true + vbrEmail VBR Email Email address for Virtual Biorepository requests/inquiries, if participating string + vbrUrl VBR URL Link to Virtual Biorepository request form, if participating uri + vbrReadme VBR Readme Instructions for contacting or requesting samples from Virtual Biorepository, if participating string + researchDomain Research Domain Main research domain(s) of the study, other than Down syndrome; pipe-separated if multiple true enum_researchDomain true + participantLifespanStage Participant Lifespan Stage Focus age group(s) of the study population; pipe-separated if multiple true enum_participantLifespanStage true + selectionCriteria Selection Criteria A limited list of criteria for selection of participants in the study, provided in terms of inclusion and exclusion criteria. For Observational studies, a description of the population from which the groups or cohorts were selected (for example, primary care clinic, community sample, residents of a certain town). string + studyDesign Study Design Overall design of study, including whether it is longitudinal and whether family members/unrelated controls are also enrolled true enum_studyDesign true + clinicalDataSourceType Clinical Data Source Type Source(s) of data collected from study participants; pipe-separated if multiple true enum_clinicalDataSourceType true + studyWebsite Study Website Website for the study uri true + publication Publication uri true + expectedNumberOfParticipants Expected Number of Participants true integer + guidType GUID Type System used to generate globally unique identifiers (GUIDs) true enum_guidType + guidMapped GUIDs Mapped? For studies using NDAR GUIDs, have the GUIDs been added to the INCLUDE GUID Mapping File? boolean + dbgap dbGaP string true + acknowledgments Acknowledgments Funding statement and acknowledgments for this study string true + citationStatement Citation Statement "Statement that secondary data users should use to acknowledge use of this dataset. E.g., ""The results analyzed and here are based in whole or in part upon data generated by the INCLUDE (INvestigation of Co-occurring conditions across the Lifespan to Understand Down syndromE) Project , and were accessed from the INCLUDE Data Hub and .""" string true + doi DOI Unique Digital Object Identifier for each Study and Dataset, minted by the DCC using DataCite false uri + doiCitation DOI Citation Bibliographic citation for DOI, generated by DataCite false string + nctId NCT ID "The unique identification code given to each clinical study upon registration at ClinicalTrials.gov. The format is ""NCT"" followed by an 8-digit number. Also known as ClinicalTrials.gov Identifier" true string false + clinicalStudyDesign Clinical Study Design The nature of the investigation or investigational use for which clinical study information is being submitted. Select one. true enum_clinicalStudyDesign false + trialPhase Trial Phase For a clinical trial of a drug product (including a biological product), the numerical phase of such clinical trial. Select only one. true enum_trialPhase false + primaryPurpose Primary Purpose The main objective of the intervention(s) being evaluated by the clinical trial. Select one. true enum_primaryPurpose false + interventionType Intervention Type For each intervention studied in the clinical study, the general type of intervention. Select one. true enum_interventionType false + intervention Intervention For interventional studies, specify the intervention(s) associated with each arm or group; at least one intervention must be specified for interventional studies. Use non-proprietary names where available. Multiple values should be pipe-separated. For observational studies, specify the intervention(s)/exposure(s) of interest, if any. true string true + armInformation Arm Information For interventional studies, a description of each arm of the clinical trial that indicates its role in the clinical trial (e.g. Experimental, Active Comparator, Placebo Comparator, Sham Comparator; No Intervention; Other); provides an informative title; and, if necessary, additional descriptive information (including which interventions are administered in each arm) to differentiate each arm from other arms in the clinical trial. Multiple values should be pipe-separated. For observational studies, specify the predefined participant groups (cohorts) to be studied, e.g. those with or without a condition/exposure. true string true + armAllocation Arm Allocation The method by which participants are assigned to arms in a clinical trial. true enum_armAllocation false + interventionAssignmentStrategy Intervention Assignment Strategy In an interventional study, the strategy for assigning interventions to participants. true enum_interventionAssignmentStrategy false + primaryOutcomeMeasure Primary Outcome Measure A description of each primary outcome measure (title, description, and time point/duration of assessment). Multiple values should be pipe-separated. true string true + secondaryOutcomeMeasure Secondary Outcome Measure A description of each secondary outcome measure (title, description, and time point/duration of assessment). Multiple values should be pipe-separated. true string true + otherOutcomeMeasure Other Outcome Measure A description of other outcome measures (title, description, and time point/duration of assessment). Multiple values should be pipe-separated. true string true + datasetName Dataset Name Full name of the dataset, provided by contributor true string + datasetDescription Dataset Description Brief additional notes about the dataset (1-3 sentences) that are not already captured in the other fields string + datasetGlobalId Dataset Global ID Unique Global ID for dataset, generated by DCC false string + datasetExternalId Dataset External ID Unique identifier or code for dataset, if provided by contributor string + expectedNumberOfFiles Expected Number of Files Expected number of files associated with this dataset, including dictionaries. If additional explanation is needed, please add to Dataset Description field. false integer + dataCollectionStartYear Data Collection Start Year Year that data collection started false string + dataCollectionEndYear Data Collection End Year Year that data collection ended false string + accessLimitations Access Limitations Data access limitations, as defined in the GA4GH Data Use Ontology (DUO; can list more than one, pipe separated) false string + accessRequirements Access Requirements Data access requirements, as defined in the GA4GH Data Use Ontology (DUO; can list more than one, pipe separated) false string + otherRepository Other Repository URL if dataset is already deposited in a public repository other than dbGaP (e.g. LONI, Metabolomics Workbench, etc.) uri true + otherAccessAuthority Other Access Authority Email or URL for dataset's Access Authority, if not dbGaP string + isHarmonized Is Harmonized All of the elements in this Dataset are harmonized and available in the INCLUDE Data Hub boolean + datasetManifestLocation Dataset Manifest Location Location of associated Dataset Manifest string + participantGlobalId Participant Global ID Unique INCLUDE global identifier for the participant, assigned by DCC true string + participantExternalId Participant External ID Unique, de-identified identifier for the participant, assigned by data contributor. External IDs must be two steps removed from personal information in the study records. true string + familyId Family ID Unique identifer for family to which Participant belongs, assigned by data contributor string + familyType Family Type Structure of family members participating in the study true enum_familyType + fatherId Father ID Participant External ID for Participant's father (NA if Participant is not the proband) string + motherId Mother ID Participant External ID for Participant's mother (NA if Participant is not the proband) string + siblingId Sibling ID Participant External ID for Participant's sibling(s) (NA if Participant is not the proband) string + otherFamilyMemberId Other Family Member ID Participant External ID for Participant's other family members (NA if Participant is not the proband) string + familyRelationship Family Relationship Relationship of Participant to proband true enum_familyRelationship + sex Sex Sex of Participant true enum_sex + race Race Race of Participant true enum_race + ethnicity Ethnicity Ethnicity of Participant true enum_ethnicity + downSyndromeStatus Down Syndrome Status Down Syndrome status of participant true enum_downSyndromeStatus + ageAtFirstParticipantEngagement Age at First Participant Engagement Age in days of Participant at first recorded study event (enrollment, visit, observation, sample collection, survey completion, etc.). Age at enrollment is preferred, if available. true Any AnonymousSlotExpression({'range': 'integer'})|AnonymousSlotExpression({'range': 'enum_Unknown'}) 0 33000 + firstParticipantEngagementEvent First Participant Engagement Event Event for which Age at First Participant Engagement is given (e.g. enrollment, visit, observation, sample collection, survey completion, etc.). Age at enrollment is preferred, if available. true string + outcomesVitalStatus Outcomes Vital Status Whether participant is alive or dead enum_vital_status + ageAtLastVitalStatus Age at Last Vital Status Age in days when participant's vital status was last recorded integer 0 33000 + eventId Event ID Identifier for event (Visit, Survey completion, Sample collection, etc.) to which the Condition data are linked, if applicable. There may be multiple events linked to a Participant. string + eventType Event Type Type of event for which Event ID is given (Visit, Survey completion, Sample collection, etc.) string + conditionMeasureSourceText Condition or Measure Source Text Co-occurring Condition (phenotype or diagnosis) or Measure (observation with numeric value), as described by data contributor. The Down Syndrome Genetic Diagnosis will be rolled into this field. string + ageAtConditionMeasureObservation Age At Condition or Measure Observation Age in days at which Condition or Measure was observed, recorded, or diagnosed integer 0 33000 + conditionInterpretation Condition Interpretation Whether Condition was observed or not enum_conditionInterpretation + conditionStatus Condition Status Whether the Condition is ongoing, has been resolved, or this is a general history of the condition without known dates enum_conditionStatus + conditionDataSource Condition Data Source Whether Condition information was obtained by the investigator or reported by participant/family member enum_conditionDataSource + hpoLabel HPO Label Label for Condition in the Human Phenotype Ontology (HPO) string + hpoCode HPO Code Code for Condition in the Human Phenotype Ontology (HPO) string + mondoLabel MONDO Label Label for Condition in the Mondo Disease Ontology (MONDO) string + mondoCode MONDO Code Code for Condition in the Mondo Disease Ontology (Mondo) string + maxoLabel MAXO Label Label for Condition in the Medical Action Ontology (MAXO) string + maxoCode MAXO Code Code for condition in the Medical Action Ontology (MAXO) string + otherLabel Other Label Label for Condition in another ontology (if no match in HPO, MONDO, or MAXO) string + otherCode Other Code Code for Condition in another ontology (if no match in HPO, MONDO, or MAXO) string + measureValue Measure Value Numeric value of Measure float + measureUnit Measure Unit Unit that is associated with Measure Value (e.g. kg, cm, %, x10^9/L, etc.) string + sampleGlobalId Sample Global ID INCLUDE global identifier for sample, assigned by DCC true string + sampleExternalId Sample External ID Unique identifier for sample, assigned by data contributor. A sample is a unique biological material; two samples with two different IDs are biologically distinct. true string + sampleType Sample Type Type of biological material comprising the Sample (e.g. Plasma, White blood cells, Red blood cells, DNA, RNA, Peripheral blood mononuclear cells, CD4+ Tconv cells, NK cells, Monocytes, CD8+ T cells, B cells, Granulocytes, Treg cells) true string + ageAtBiospecimenCollection Age At Biospecimen Collection Age in days of participant at time of biospecimen collection integer + parentSampleGlobalId Parent Sample Global ID INCLUDE global identifier for the direct parent from which Sample was derived, assigned by DCC string + parentSampleExternalId Parent Sample External ID Identifier for the direct parent from which Sample was derived, processed, pooled, etc. (if applicable); assigned by data contributor string + parentSampleType Parent Sample Type Type of biological material comprising the Parent Sample (e.g. Peripheral Whole Blood, Derived Cell Line, Saliva, Whole blood, WBCs) string + collectionGlobalId Collection Global ID INCLUDE global identifier for the eldest sample in a lineage, assigned by DCC string + collectionExternalId Collection External ID Identifier for the eldest sample in a lineage of processed, pooled, or aliquoted samples - typically the material actually collected from the Participant. This may be the same as Parent Sample ID or Sample ID (if no processing was performed). Assigned by data contributor. string + collectionSampleType Collection Sample Type Type of biological material comprising the Collected Sample (e.g. Whole blood, Not reported, Saliva, Derived cell line) string + containerGlobalId Container Global ID INCLUDE global identifier for specific container/aliquot of sample, assigned by DCC string + containerExternalId Container External ID Identifier for specific container/aliquot of sample, assigned by data contributor. For example, distinct aliquots of a sample will have the same Sample ID but different Container IDs. string + volume Volume Amount of sample in container float + volumeUnit Volume Unit Unit of sample volume string + concentration Concentration Concentration of sample in container float + concentrationUnit Concentration Unit Unit of sample concentration string + laboratoryProcedure Laboratory Procedure Procedure by which Sample was derived from Parent Sample (e.g. Centrifugation, RBC lysis, Lyse/fix buffer, FACS, PAXgene DNA, PAXgene RNA, Qiagen Allprep, Ficoll) string + biospecimenStorage Biospecimen Storage Method by which Container is stored (e.g. Minus 80 degrees Celsius, Liquid nitrogen storage) string + sampleAvailability Sample Availability Whether or not the Sample (any Container thereof) is potentially available for sharing through the Virtual Biorepository true enum_Availability + containerAvailability Container Availability Whether or not the specific Container is potentially available for sharing through the Virtual Biorepository enum_Availability + participantDataFileManifestLocation Participant-DataFile Manifest Location Location of Participant-DataFile Manifest, if file contains multiple Participants string + fileName File Name Name of file, assigned by data contributor true string + fileGlobalId File Global ID INCLUDE global file identifier, assigned by DCC true string + fileUploadLocation File Upload Location Where source file was uploaded, if not directly to an S3 bucket (e.g. Synapse) string + fileS3Location File S3 Location S3 bucket location of file; also serves as dewrangle descriptor true string + drsUri DRS URI Data Repository Services API Uniform Resource Identifier true uriorcurie + fileHash File Hash md5 hash of this file for validation (if known) string + dataAccess Data Access Type of access control on this file, determined by DCC true enum_dataAccess + dataCategory Data Category true enum_dataCategory + dataType Data Type string true + experimentalStrategy Experimental Strategy string true + experimentalPlatform Experimental Platform Specific platform used to perform experiment; pipe-separated if multiple (e.g. SOMAscan, MSD, Luminex, Illumina) string true + fileFormat File Format Format of file (e.g. tsv, cram, gvcf, vcf, maf, txt, pdf, html, png) true string + fileSize File Size Size of file, if known (mainly important if large) integer + fileSizeUnit File Size Unit Unit of file size string +Study Study General information about the study +Study dataCategory Categories of data expected to be collected in this study true +Study dbgap "dbGaP ""phs"" accession code(s) associated with this Study, either for access or informational purposes (pipe-separated if multiple)" +Study publication URL for publication(s) describing the study's rationale and methodology (PubMed Central preferred but not required; pipe-separated if multiple) +Study expectedNumberOfParticipants Expected number of participants in this study (or actual number, if data has been submitted to INCLUDE DCC). If additional explanation is needed, please add to Study Description field. +ClinicalTrial Clinical Trial Information specific to clinical trials +ClinicalTrial studyCode Study +Dataset Dataset Information about a specific grouping of data files +Dataset dataCategory General category of data in Dataset; pipe-separated if multiple true +Dataset dbgap "dbGaP ""phs"" accession code(s) required to access the files in this Dataset, if applicable (pipe-separated if multiple)" +Dataset publication URL for publication(s) describing the Dataset's rationale and methodology (PubMed Central preferred but not required; pipe-separated if multiple) +Dataset expectedNumberOfParticipants Expected number of participants in this Dataset (or actual number, if data has been submitted to INCLUDE DCC). If additional explanation is needed, please add to Dataset Description field. +Dataset dataType Specific type of data contained in Dataset; pipe-separated if multiple (e.g. Preprocessed metabolite relative abundance, Absolute protein concentration, Aligned reads, Simple nucleotide variations, GVCF, Gene expression quantifications, Gene fusions, Somatic copy number variations, Somatic structural variations) +Dataset experimentalStrategy Experimental method used to obtain data in Dataset; pipe-separated if multiple (e.g. Whole genome sequencing, RNAseq, Multiplex immunoassay, Mass spec metabolomics) true +Dataset studyCode Study +DatasetManifest Dataset Manifest Mapping information for files in Dataset +DatasetManifest studyCode Study +DatasetManifest fileGlobalId DataFile +DatasetManifest datasetGlobalId Dataset +ParticipantSampleDataFileManifest Participant-Sample-DataFile Manifest List of Participants and/or Samples in DataFiles with multiple Participants and/or Samples +ParticipantSampleDataFileManifest studyCode Study +ParticipantSampleDataFileManifest fileName DataFile +ParticipantSampleDataFileManifest participantExternalId Participant +ParticipantSampleDataFileManifest sampleExternalId Biospecimen +Participant Participant Demographic and clinical information about the participant +Condition Condition Co-occurring conditions and other observations for the participant +Condition studyCode Study +Condition participantGlobalId Participant +Biospecimen Biospecimen A Biospecimen Collected from A Participant +Biospecimen studyCode Study +Biospecimen participantGlobalId Participant +DataFile Data File Metadata about Data Files +DataFile dataCategory General category of data in file (e.g. Clinical, Genomics, Proteomics, Metabolomics, Immune profiling, Transcriptomics) +DataFile dataType Specific type of data contained in file (e.g. Preprocessed metabolite relative abundance, Absolute protein concentration, Aligned reads, Simple nucleotide variations, GVCF, Gene expression quantifications, Gene fusions, Somatic copy number variations, Somatic structural variations) +DataFile experimentalStrategy Experimental method used to obtain data in file (e.g. Whole genome sequencing, RNAseq, Multiplex immunoassay, Mass spec metabolomics) +DataFile studyCode Study +DataFile participantGlobalId Participant +DataFile sampleGlobalId Biospecimen + enum_studyCode + AADSC enum_studyCode aadsc + ABC-DS enum_studyCode abc_ds + ADS enum_studyCode ads + AECOM-DS enum_studyCode aecom_ds + APAP21 enum_studyCode apap21 + ARC-DS enum_studyCode arc_ds + BEST21 enum_studyCode best21 + BrainPower enum_studyCode brainpower + BRI-DSR enum_studyCode bri_dsr + CCDS enum_studyCode ccds + CHILD-DS enum_studyCode child_ds + CHARGE-DS enum_studyCode charge_ds + DECIDAS enum_studyCode decidas + DS-Brain enum_studyCode ds_brain + DS-COG-ALL enum_studyCode ds_cog_all + DS-COG-AML enum_studyCode ds_cog_aml + DS-DETERMINED enum_studyCode ds_determined + DS-HOME enum_studyCode ds_home + DS-HPBM enum_studyCode ds_hpbm + DS-HSAT enum_studyCode ds_hsat + DS-ISP enum_studyCode ds_isp + DS-Nexus enum_studyCode ds_nexus + DS-PALS enum_studyCode ds_pals + DS-PCGC enum_studyCode ds_pcgc + DS-Sleep enum_studyCode ds_sleep + DS-VitE enum_studyCode ds_vite + DS360-CHD enum_studyCode ds360_chd + DSC enum_studyCode dsc + DSRRS enum_studyCode dsrrs + EACH-DS enum_studyCode each_ds + ECODS enum_studyCode ecods + EXcEEDS enum_studyCode exceeds + HTP enum_studyCode htp + IBIS-DS enum_studyCode ibis-ds + JAKi-DS enum_studyCode jaki_ds + MOSAIC-DS enum_studyCode mosaic_ds + OPTimal enum_studyCode optimal + TEAM-DS enum_studyCode team_ds + TOMI enum_studyCode tomi + TRC-DS enum_studyCode trc_ds + X01-deSmith enum_studyCode x01_desmith + X01-Hakonarson enum_studyCode x01_hakonarson + enum_program + INCLUDE enum_program include + KF enum_program kf + Other enum_program other + enum_researchDomain + Behavior and Behavior Mechanisms enum_researchDomain behavior_and_behavior_mechanisms mesh:D001520 + Congenital Heart Defects enum_researchDomain congenital_heart_defects mesh:D006330 + Immune System Diseases enum_researchDomain immune_system_diseases mesh:D007154 + Hematologic Diseases enum_researchDomain hematologic_diseases mesh:D006402 + Neurodevelopment enum_researchDomain neurodevelopment mesh:D065886 + Nutritional and Metabolic Diseases enum_researchDomain nutritional_and_metabolic_diseases mesh:D009750 + Sleep Wake Disorders enum_researchDomain sleep_wake_disorders mesh:D012893 + All Co-occurring Conditions enum_researchDomain all_co_occurring_conditions mesh:D013568 + Physical Fitness enum_researchDomain physical_fitness mesh:D010809 + Respiratory Tract Diseases enum_researchDomain respiratory_tract_diseases mesh:D012140 + Other enum_researchDomain other + enum_participantLifespanStage + Fetal enum_participantLifespanStage fetal + Neonatal 0-28 days old enum_participantLifespanStage neonatal + Pediatric Birth-17 years old enum_participantLifespanStage pediatric + Adult 18+ years old enum_participantLifespanStage adult + enum_studyDesign + Case-Control enum_studyDesign case_control + Case Set enum_studyDesign case_set + Control Set enum_studyDesign control_set + Clinical Trial enum_studyDesign clinical_trial + Cross-Sectional enum_studyDesign cross_sectional + Family/Twins/Trios enum_studyDesign family_twins_trios + Interventional enum_studyDesign interventional + Longitudinal enum_studyDesign longitudinal + Technology Development enum_studyDesign technology_development + Trial Readiness Study enum_studyDesign trial_readiness_study + Tumor vs Matched Normal enum_studyDesign tumor_vs_matched_normal + enum_clinicalDataSourceType + Medical Record Data obtained directly from medical record enum_clinicalDataSourceType medical_record + Investigator Assessment Data obtained by examination, interview, etc. with investigator enum_clinicalDataSourceType investigator_assessment + Participant or Caregiver Report Data obtained from survey, questionnaire, app, etc. filled out by participant or caregiver enum_clinicalDataSourceType participant_or_caregiver_report + Wearable Data collected by wearable or other device enum_clinicalDataSourceType wearable + Other Data obtained from other source, such as tissue bank enum_clinicalDataSourceType other + Unknown enum_clinicalDataSourceType unknown + enum_dataCategory + Unharmonized Demographic/Clinical Data enum_dataCategory unharmonized_demographic_clinical_data + Harmonized Demographic/Clinical Data enum_dataCategory harmonized_demographic_clinical_data + Genomics enum_dataCategory genomics + Transcriptomics enum_dataCategory transcriptomics + Epigenomics enum_dataCategory epigenomics + Proteomics enum_dataCategory proteomics + Metabolomics enum_dataCategory metabolomics + Adherence enum_dataCategory adherence + Cognitive/Behavioral enum_dataCategory cognitive_behavioral + Immune Profiling enum_dataCategory immune_profiling + Imaging enum_dataCategory imaging + Microbiome enum_dataCategory microbiome + Fitness enum_dataCategory fitness + Physical Activity enum_dataCategory physical_activity + Other enum_dataCategory other + Sleep enum_dataCategory sleep_study + enum_guidType + NDAR GUID generated by NIMH Data Archive (NDA) GUID tool enum_guidType ndar + Other GUID generated by other system enum_guidType other + No GUID No GUIDs used in this study enum_guidType no_guid + enum_clinicalStudyDesign + Interventional enum_clinicalStudyDesign interventional + Observational enum_clinicalStudyDesign observational + Patient Registry enum_clinicalStudyDesign patient_registry + Expanded Access enum_clinicalStudyDesign expanded_access + enum_trialPhase + Not Applicable enum_trialPhase not_applicable + Early Phase 1 enum_trialPhase early_phase_1 + Phase 1 enum_trialPhase phase_1 + Phase 1/2 enum_trialPhase phase_1_2 + Phase 2 enum_trialPhase phase_2 + Phase 2/3 enum_trialPhase phase_2_3 + Phase 3 enum_trialPhase phase_3 + Phase 4 enum_trialPhase phase_4 + enum_primaryPurpose + Treatment enum_primaryPurpose treatment + Prevention enum_primaryPurpose prevention + Diagnostic enum_primaryPurpose diagnostic + Supportive Care enum_primaryPurpose supportive_care + Screening enum_primaryPurpose screening + Health Services Research enum_primaryPurpose health_services_research + Basic Science enum_primaryPurpose basic_science + Device Feasibility enum_primaryPurpose device_feasibility + Other enum_primaryPurpose other + enum_interventionType + Drug enum_interventionType drug + Device enum_interventionType device + Biological/Vaccine enum_interventionType biological_vaccine + Procedure/Surgery enum_interventionType procedure_surgery + Radiation enum_interventionType radiation + Behavioral enum_interventionType behavioral + Genetic enum_interventionType genetic + Dietary Supplement enum_interventionType dietary_supplement + Combination Product enum_interventionType combination_product + Diagnostic Test enum_interventionType diagnostic_test + Other enum_interventionType other + enum_armAllocation + Not Applicable enum_armAllocation not_applicable + Randomized enum_armAllocation randomized + Nonrandomized enum_armAllocation nonrandomized + enum_interventionAssignmentStrategy + Single Group enum_interventionAssignmentStrategy single_arm + Parallel enum_interventionAssignmentStrategy parallel + Crossover enum_interventionAssignmentStrategy crossover + Factorial enum_interventionAssignmentStrategy factorial + Sequential enum_interventionAssignmentStrategy sequential + enum_familyType + Control-only Unrelated control, no Down syndrome family members enum_familyType control_only + Duo Proband + one parent enum_familyType duo + Other Other family structure, eg one parent + twins enum_familyType other + Proband-only Proband only, no family members participating in study enum_familyType proband_only + Trio Proband + two parents enum_familyType trio + Trio Plus Proband + two parents + other relatives enum_familyType trio_plus + enum_familyRelationship + Proband The first affected family member to join the study enum_familyRelationship proband NCIT:C64435 + Father enum_familyRelationship father NCIT:C25174 + Mother enum_familyRelationship mother NCIT:C25189 + Sibling enum_familyRelationship sibling NCIT:C25204 + Other relative enum_familyRelationship other_relative NCIT:C21480 + Unrelated control enum_familyRelationship unrelated_control NCIT:C25328 + enum_sex + Female enum_sex female NCIT:C16576 + Male enum_sex male NCIT:C20197 + Other enum_sex other NCIT:C17649 + Unknown enum_sex unknown NCIT:C17998 + enum_race + American Indian or Alaska Native enum_race american_indian_or_alaska_native NCIT:C41259 + Asian enum_race asian NCIT:C41260 + Black or African American enum_race black_or_african_american NCIT:C16352 + More than one race enum_race more_than_one_race NCIT:C67109 + Native Hawaiian or Other Pacific Islander enum_race native_hawaiian_or_other_pacific_islander NCIT:C41219 + Other enum_race other NCIT:C17649 + White enum_race white NCIT:C41261 + Prefer not to answer enum_race prefer_not_to_answer NCIT:C132222 + Unknown enum_race unknown NCIT:C17998 + East Asian UK only; do not use for US data enum_race east_asian NCIT:C161419 + Latin American UK only; do not use for US data enum_race latin_american NCIT:C126531 + Middle Eastern or North African UK only; do not use for US data enum_race middle_eastern_or_north_african NCIT:C43866 + South Asian UK only; do not use for US data enum_race south_asian NCIT:C41263 + enum_ethnicity + Hispanic or Latino enum_ethnicity hispanic_or_latino NCIT:C17459 + Not Hispanic or Latino enum_ethnicity not_hispanic_or_latino NCIT:C41222 + Prefer not to answer enum_ethnicity prefer_not_to_answer NCIT:C132222 + Unknown enum_ethnicity unknown NCIT:C17998 + enum_downSyndromeStatus + D21 Disomy 21 (euploid) enum_downSyndromeStatus d21 + T21 Trisomy 21 (Down syndrome) enum_downSyndromeStatus t21 MONDO:0008608 + enum_vital_status + Dead enum_vital_status dead NCIT:C28554 + Alive enum_vital_status alive NCIT:C37987 + Unknown or not available enum_vital_status unknown_or_not_available NCIT:C17998 + enum_conditionInterpretation + Observed Condition was observed or reported (this will be the case for most conditions) enum_conditionInterpretation observed + Not Observed Participant was specifically examined or medical record queried for condition and found to be negative enum_conditionInterpretation not_observed + enum_conditionStatus + Current Condition is ongoing enum_conditionStatus current + Resolved Condition has been resolved enum_conditionStatus resolved + History Of This is a general history of the condition, without known dates enum_conditionStatus history_of + enum_conditionDataSource + Clinical Information about condition was obtained from medical records or reported by investigator enum_conditionDataSource clinical + Self-reported Information about condition was reported by participant or family member enum_conditionDataSource self_reported + enum_dataAccess + Controlled enum_dataAccess controlled + Open enum_dataAccess open + Registered enum_dataAccess registered + enum_Availability + Available Sample or Container is potentially available to be requested through the Virtual Biorepository (see VBR contact info in Study page) enum_Availability available + Unavailable Sample or Container either was available through Virtual Biorepository but has been used up, or is part of a study that is not participating in the VBR enum_Availability unavailable + enum_Unknown + Unknown enum_Unknown unknown NCIT:C17998 diff --git a/src/data/sheets/slots.tsv b/src/data/sheets/slots.tsv index 27954fb..8f95e87 100644 --- a/src/data/sheets/slots.tsv +++ b/src/data/sheets/slots.tsv @@ -1,2 +1,2 @@ -class slot title description range required multivalued enum permissible_value title description meaning ->class slot title description range required multivalued enum permissible_value title description meaning +class slot title description required range any_of minimum_value maximum_value multivalued enum permissible_value title description meaning +>class slot title description required range any_of minimum_value maximum_value multivalued enum permissible_value title description meaning diff --git a/src/data_validation/csv2json.py b/src/data_validation/csv2json.py index 004f7a4..b1d3e47 100644 --- a/src/data_validation/csv2json.py +++ b/src/data_validation/csv2json.py @@ -5,8 +5,8 @@ import re SCHEMA_PATH = Path("src/linkml/include_schema.yaml").resolve() -INPUT_CSV = Path("src/data/input/study_test_data_utf8.csv").resolve() -OUTPUT_JSON = Path("src/data/input/study_data.json").resolve() +INPUT_CSV = Path("../validation_logs/hakon_participants_v7_2025-12-12.csv").resolve() +OUTPUT_JSON = Path("../validation_logs/hakon_participants_js.json").resolve() def get_multivalued_slots(schema_path): diff --git a/src/linkml/include_schema.yaml b/src/linkml/include_schema.yaml index 02456d1..13c59bd 100644 --- a/src/linkml/include_schema.yaml +++ b/src/linkml/include_schema.yaml @@ -751,7 +751,10 @@ slots: definition_uri: include:ageAtFirstParticipantEngagement description: Age in days of Participant at first recorded study event (enrollment, visit, observation, sample collection, survey completion, etc.). Age at enrollment is preferred, if available. title: Age at First Participant Engagement - range: integer + range: Any + any_of: + - range: integer + - range: enum_Unknown minimum_value: 0 maximum_value: 33000 required: true @@ -884,7 +887,12 @@ slots: definition_uri: include:ageAtBiospecimenCollection description: Age in days of participant at time of biospecimen collection title: Age At Biospecimen Collection - range: integer + range: Any + any_of: + - range: integer + - range: enum_Unknown + minimum_value: 0 + maximum_value: 33000 parentSampleGlobalId: definition_uri: include:parentSampleGlobalId description: INCLUDE global identifier for the direct parent from which Sample was derived, assigned by DCC @@ -1586,5 +1594,12 @@ enums: unavailable: title: Unavailable description: Sample or Container either was available through Virtual Biorepository but has been used up, or is part of a study that is not participating in the VBR + enum_Unknown: + definition_uri: include:enum_Unknown + permissible_values: + unknown: + title: Unknown + meaning: NCIT:C17998 +