diff --git a/conversion/tapis_v2_to_v3/README.md b/conversion/tapis_v2_to_v3/README.md index 56879d8..fe8c1e1 100644 --- a/conversion/tapis_v2_to_v3/README.md +++ b/conversion/tapis_v2_to_v3/README.md @@ -14,10 +14,14 @@ validated in the migration. We assume that V2 API is not available and that the data to be migrated resides in a JSON file. -## Docker +## Jupyter notebooks -We build a specialized docker image that brings in vdj-tapis-js and vdjserver-schema +The extract and transform fuctions are performed in the Jupyter notebooks +and generate JSONL files with meta records that can be bulk uploaded with +meta_load_records.js script in the tapis-conversion docker container. It +resdes in the adc-api-tapis-js of vdjserver-repository. +- public_projects.ipynb: Public Projects ## Users @@ -82,3 +86,20 @@ so they are either old or testing. "bioProcessing": 5, "irplus_analysis": 3, +# Migration Tasks + +## Public projects + +This conversion maintains uuids for the records. + +- public_projects.ipynb: Public Projects + +This will generate files in the directories: + +- Metadata_public_project: One file per project containing metadata records. +- Metadata_public_project_jobs: One file per project containing Tapis V2 job records. + +As there aren't many files, manually load the files one at a time using +meta_load_records.js in the tapis-conversion docker. + +## diff --git a/conversion/tapis_v2_to_v3/data_exploration.ipynb b/conversion/tapis_v2_to_v3/data_exploration.ipynb new file mode 100644 index 0000000..04e93c0 --- /dev/null +++ b/conversion/tapis_v2_to_v3/data_exploration.ipynb @@ -0,0 +1,2998 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data exploration of tapis v2 " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Modified DFs\n", + "These dataframes (unless specified) are created below\n", + "\n", + "- `valid_emails` : df of JsonArray containiung value in `value.email`\n", + "\n", + "- `valid_email_list` : list of valid emails\n", + "\n", + "- `non_read_users` : metadataperms, but only READ/WRITE or ALL\n", + "\n", + "- `job_event_perms` : job events and job permissions merge on job_id\n", + "\n", + "- `email_username_combo` : combination of `value.username` and `value.email`\n", + "\n", + "- `num_jobs` : number of jobs for each `parameter.Creator` with username and email\n", + "\n", + "- `projJob_projFile_jobAll_emailUser` : \n", + " - merging of `jsonArray_df` where `name=projectJob` and `jsonArray_df` where `name=projectFile` (`on=`value.projectUuid`)\n", + " - then merge with `jobs_all_df` (`left_on=jobUuid` and `right_on=uuid`) \n", + " - then merged with `email_username_combo` (`left_on=parameters.Creator` and `right_on=value.username`)\n", + "\n", + "- `projJob_projFile_jobAll_emailUser_groupby` : Prior df, but grouped by `parameters.Creators` and sorted by `value.projectUuid`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "# import matplotlib.pyplot as plt\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Read files" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "job_events_df = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverJobEvents.json')\n", + "job_permissions_df = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverJobPermissions.json')\n", + "jobs_all_df = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverJobs_all.json')\n", + "jobs_all_df['parameters.Creator'] = jobs_all_df['parameters'].apply(lambda x: json.loads(x).get('Creator', np.nan) if x else np.nan)\n", + "metadata_perms_df = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverMetadataPermissions.json')\n", + "\n", + "with open('/mnt/data2/Projects/vdjserver/vdjserverJsonArrayFeb042025.json', 'r') as f:\n", + " jsonarray = json.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# probably not the best way of doing this\n", + "# max_level must be 1, otherwise df is too large for system\n", + "# I end up using pd.read_json later on, but the creation of valid email list uses this.\n", + "jsonarray_df = pd.json_normalize(jsonarray, max_level=1)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# View dataframes" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreatedcreated_bydescriptionip_addressstatustenant_idjob_idtransfertaskuuid
054762014-03-31 14:48:10jfonnerJob accepted and queued for submission.129.114.60.167PENDINGvdjserver.org142NaNNone
154772014-03-31 14:48:20jfonnerNo inputs for the given job. Skipping staging129.114.60.167STAGEDvdjserver.org142NaNNone
\n", + "
" + ], + "text/plain": [ + " id created created_by \\\n", + "0 5476 2014-03-31 14:48:10 jfonner \n", + "1 5477 2014-03-31 14:48:20 jfonner \n", + "\n", + " description ip_address status \\\n", + "0 Job accepted and queued for submission. 129.114.60.167 PENDING \n", + "1 No inputs for the given job. Skipping staging 129.114.60.167 STAGED \n", + "\n", + " tenant_id job_id transfertask uuid \n", + "0 vdjserver.org 142 NaN None \n", + "1 vdjserver.org 142 NaN None " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "job_events_df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idjob_idlast_updatedpermissiontenant_idusername
037832014-07-11 09:11:00READ_EXECUTEvdjserver.orgmlevin
147832014-07-11 09:33:02ALLvdjserver.orgwscarbor2
\n", + "
" + ], + "text/plain": [ + " id job_id last_updated permission tenant_id username\n", + "0 3 783 2014-07-11 09:11:00 READ_EXECUTE vdjserver.org mlevin\n", + "1 4 783 2014-07-11 09:33:02 ALL vdjserver.org wscarbor2" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "job_permissions_df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnametenant_idtenant_queueownerrolessystem_idapp_idapp_uuidstatus...remote_endedremote_outcomeremote_submit_retriesremote_status_checksfailed_status_checkslast_status_checkblocked_countvisibleupdate_tokenparameters.Creator
0503865My Job 24-Jan-2025 8:57:09 pmvdjserver.orgaloe.jobq.vdjserver.org.submit.DefaultQueuevdjInternal/VDJ_vdj_keycloak_PRODUCTION,Internal/...ls6.tacc.utexas.edurepcalc-ls6-2.0u86306626279335587345-242ac119-0001-005FINISHED...2025-01-25 15:43:33.007FINISHED019202025-01-25 15:43:33.00101eb27e311-4a37-4aeb-b649-056704dd2711schristley
1503859My Job 23-Jan-2025 3:05:30 pmvdjserver.orgaloe.jobq.vdjserver.org.submit.DefaultQueuevdjInternal/VDJ_vdj_keycloak_PRODUCTION,Internal/...ls6.tacc.utexas.eduigblast-ls6-1.20u61936847182374244846-242ac119-0001-005FINISHED...2025-01-24 04:18:20.211FINISHED011202025-01-24 04:18:20.201915e2528fd-25d6-4473-9287-6a67a8de8391schristley
\n", + "

2 rows × 42 columns

\n", + "
" + ], + "text/plain": [ + " id name tenant_id \\\n", + "0 503865 My Job 24-Jan-2025 8:57:09 pm vdjserver.org \n", + "1 503859 My Job 23-Jan-2025 3:05:30 pm vdjserver.org \n", + "\n", + " tenant_queue owner \\\n", + "0 aloe.jobq.vdjserver.org.submit.DefaultQueue vdj \n", + "1 aloe.jobq.vdjserver.org.submit.DefaultQueue vdj \n", + "\n", + " roles system_id \\\n", + "0 Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/... ls6.tacc.utexas.edu \n", + "1 Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/... ls6.tacc.utexas.edu \n", + "\n", + " app_id app_uuid status ... \\\n", + "0 repcalc-ls6-2.0u8 6306626279335587345-242ac119-0001-005 FINISHED ... \n", + "1 igblast-ls6-1.20u6 1936847182374244846-242ac119-0001-005 FINISHED ... \n", + "\n", + " remote_ended remote_outcome remote_submit_retries \\\n", + "0 2025-01-25 15:43:33.007 FINISHED 0 \n", + "1 2025-01-24 04:18:20.211 FINISHED 0 \n", + "\n", + " remote_status_checks failed_status_checks last_status_check \\\n", + "0 192 0 2025-01-25 15:43:33.001 \n", + "1 112 0 2025-01-24 04:18:20.201 \n", + "\n", + " blocked_count visible update_token \\\n", + "0 0 1 eb27e311-4a37-4aeb-b649-056704dd2711 \n", + "1 9 1 5e2528fd-25d6-4473-9287-6a67a8de8391 \n", + "\n", + " parameters.Creator \n", + "0 schristley \n", + "1 schristley \n", + "\n", + "[2 rows x 42 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jobs_all_df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlast_updatedpermissionusernameuuidtenant_id
052014-01-21 17:00:11READ_WRITEtest30001389977207738-5056a550b8-0001-012vdjserver.org
162014-01-21 17:00:41READ_WRITEtest40001389977207738-5056a550b8-0001-012vdjserver.org
\n", + "
" + ], + "text/plain": [ + " id last_updated permission username \\\n", + "0 5 2014-01-21 17:00:11 READ_WRITE test3 \n", + "1 6 2014-01-21 17:00:41 READ_WRITE test4 \n", + "\n", + " uuid tenant_id \n", + "0 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "1 0001389977207738-5056a550b8-0001-012 vdjserver.org " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metadata_perms_df.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Explore" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## find valid emails" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "value.username\n", + "att23562 3\n", + "diksha28 2\n", + "paulatataru 1\n", + "frankabuytenhuijs 1\n", + "mbartl 1\n", + " ..\n", + "pejvak 1\n", + "jinhyun 1\n", + "el-hadi 1\n", + "saleach 1\n", + "cdallett 1\n", + "Name: count, Length: 1670, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Extract list of valid emails\n", + "domains = ['test', 'email']\n", + "tlds = ['.edu', '.com', '.org']\n", + "special = ['#$^&&*)(_+{{7*7}}||\\\\[:</>,.?']\n", + "bogus_domains = [d+t for d in domains for t in tlds]\n", + "bogus_domains = bogus_domains + special\n", + "\n", + "valid_emails = jsonarray_df[jsonarray_df['value.email'].notna()]\n", + "valid_emails = valid_emails[valid_emails['value.email'].str.split('@').apply(len)==2]\n", + "valid_emails = valid_emails[valid_emails['value.email'].apply(lambda x: x.split('@')[1] not in bogus_domains)]\n", + "\n", + "valid_email_list = [e for e in valid_emails['value.email'].unique() if e.split('@')[1].lower() not in bogus_domains]\n", + "\n", + "# Construct dataframe of email-username combinations\n", + "email_username_combo = valid_emails.loc[:,['value.username', 'value.email']]\n", + "email_username_combo = email_username_combo[email_username_combo['value.username'].notna()].drop_duplicates()\n", + "email_username_combo['value.username'].value_counts()\n", + "\n", + "# valid_emails.loc[[478159,478163,478174,568135,568139],['value.username', 'value.email', 'lastUpdated']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## vdjserverJsonArrayFeb042025.json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`name` contains `profile`, `feedback` when restricted on `value.email` as not `NaN`" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# jsonarray_df.groupby(by=['owner']).count().sort_values(by=['created'], ascending=False).to_csv('./csv/owner_created_count_sorted.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### json projectFile" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fileUuidownertenantIdschemaIdinternalUsernamelastUpdatednamecreated_id.$oidassociationIds.0associationIds.1value.projectUuidvalue.type
530001395955349445-5056a550b8-0001-012vdjauthvdjserver.orgNaNNaN2014-03-27T16:22:29.444-05:00projectFile2014-03-27T16:22:29.444-05:00NoneNoneNone
540001396029083309-5056a550b8-0001-012vdjauthvdjserver.orgNaNNaN2014-03-28T12:51:23.309-05:00projectFile2014-03-28T12:51:23.309-05:00NoneNoneNone0001395346788177-5056a550b8-0001-012uploaded
550001396029805022-5056a550b8-0001-012vdjauthvdjserver.orgNaNNaN2014-03-28T13:03:25.022-05:00projectFile2014-03-28T13:03:25.022-05:00NoneNoneNone0001395346788177-5056a550b8-0001-012uploaded
560001396030144907-5056a550b8-0001-012vdjauthvdjserver.orgNaNNaN2014-03-28T13:09:04.907-05:00projectFile2014-03-28T13:09:04.907-05:00NoneNoneNone0001395346788177-5056a550b8-0001-012uploaded
570001396039988083-5056a550b8-0001-012vdjauthvdjserver.orgNaNNaN2014-03-28T15:53:08.083-05:00projectFile2014-03-28T15:53:08.083-05:00NoneNoneNone0001395346788177-5056a550b8-0001-012uploaded
..........................................
6070855338423137409494545-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-13T16:40:40.230-06:00projectFile2025-01-13T16:14:09.079-06:00NoneNoneNone5456400192359305711-242ac118-0001-012None
6070861335427718191574545-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-13T16:40:40.230-06:00projectFile2025-01-13T16:15:42.281-06:00NoneNoneNone5456400192359305711-242ac118-0001-012None
6070871840700597200490991-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-13T16:40:43.277-06:00projectFile2025-01-13T16:16:56.231-06:00NoneNoneNone5456400192359305711-242ac118-0001-012None
6070885023614960920170991-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-13T16:40:43.281-06:00projectFile2025-01-13T16:18:10.339-06:00NoneNoneNone5456400192359305711-242ac118-0001-012None
6070897830832104257678865-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-13T16:41:49.035-06:00projectFile2025-01-13T16:41:49.035-06:00NoneNoneNone5456400192359305711-242ac118-0001-012None
\n", + "

35948 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " fileUuid owner tenantId \\\n", + "53 0001395955349445-5056a550b8-0001-012 vdjauth vdjserver.org \n", + "54 0001396029083309-5056a550b8-0001-012 vdjauth vdjserver.org \n", + "55 0001396029805022-5056a550b8-0001-012 vdjauth vdjserver.org \n", + "56 0001396030144907-5056a550b8-0001-012 vdjauth vdjserver.org \n", + "57 0001396039988083-5056a550b8-0001-012 vdjauth vdjserver.org \n", + "... ... ... ... \n", + "607085 5338423137409494545-242ac118-0001-012 vdj vdjserver.org \n", + "607086 1335427718191574545-242ac118-0001-012 vdj vdjserver.org \n", + "607087 1840700597200490991-242ac118-0001-012 vdj vdjserver.org \n", + "607088 5023614960920170991-242ac118-0001-012 vdj vdjserver.org \n", + "607089 7830832104257678865-242ac118-0001-012 vdj vdjserver.org \n", + "\n", + " schemaId internalUsername lastUpdated \\\n", + "53 NaN NaN 2014-03-27T16:22:29.444-05:00 \n", + "54 NaN NaN 2014-03-28T12:51:23.309-05:00 \n", + "55 NaN NaN 2014-03-28T13:03:25.022-05:00 \n", + "56 NaN NaN 2014-03-28T13:09:04.907-05:00 \n", + "57 NaN NaN 2014-03-28T15:53:08.083-05:00 \n", + "... ... ... ... \n", + "607085 NaN NaN 2025-01-13T16:40:40.230-06:00 \n", + "607086 NaN NaN 2025-01-13T16:40:40.230-06:00 \n", + "607087 NaN NaN 2025-01-13T16:40:43.277-06:00 \n", + "607088 NaN NaN 2025-01-13T16:40:43.281-06:00 \n", + "607089 NaN NaN 2025-01-13T16:41:49.035-06:00 \n", + "\n", + " name created _id.$oid associationIds.0 \\\n", + "53 projectFile 2014-03-27T16:22:29.444-05:00 None None \n", + "54 projectFile 2014-03-28T12:51:23.309-05:00 None None \n", + "55 projectFile 2014-03-28T13:03:25.022-05:00 None None \n", + "56 projectFile 2014-03-28T13:09:04.907-05:00 None None \n", + "57 projectFile 2014-03-28T15:53:08.083-05:00 None None \n", + "... ... ... ... ... \n", + "607085 projectFile 2025-01-13T16:14:09.079-06:00 None None \n", + "607086 projectFile 2025-01-13T16:15:42.281-06:00 None None \n", + "607087 projectFile 2025-01-13T16:16:56.231-06:00 None None \n", + "607088 projectFile 2025-01-13T16:18:10.339-06:00 None None \n", + "607089 projectFile 2025-01-13T16:41:49.035-06:00 None None \n", + "\n", + " associationIds.1 value.projectUuid value.type \n", + "53 None \n", + "54 None 0001395346788177-5056a550b8-0001-012 uploaded \n", + "55 None 0001395346788177-5056a550b8-0001-012 uploaded \n", + "56 None 0001395346788177-5056a550b8-0001-012 uploaded \n", + "57 None 0001395346788177-5056a550b8-0001-012 uploaded \n", + "... ... ... ... \n", + "607085 None 5456400192359305711-242ac118-0001-012 None \n", + "607086 None 5456400192359305711-242ac118-0001-012 None \n", + "607087 None 5456400192359305711-242ac118-0001-012 None \n", + "607088 None 5456400192359305711-242ac118-0001-012 None \n", + "607089 None 5456400192359305711-242ac118-0001-012 None \n", + "\n", + "[35948 rows x 13 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jsonarray_projectFile = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverJsonArrayFeb042025.json')\n", + "jsonarray_projectFile = jsonarray_projectFile[jsonarray_projectFile['name'] == 'projectFile']\n", + "\n", + "jsonarray_projectFile['_id.$oid'] = jsonarray_projectFile['value'].apply(lambda x: x.get('$oid', None))\n", + "jsonarray_projectFile['associationIds.0'] = jsonarray_projectFile['value'].apply(lambda x: x[0] if isinstance(x, list) and len(x)>0 else None)\n", + "jsonarray_projectFile['associationIds.1'] = jsonarray_projectFile['value'].apply(lambda x: x[1] if isinstance(x, list) and len(x)>1 else None)\n", + "jsonarray_projectFile['value.projectUuid'] = jsonarray_projectFile['value'].apply(lambda x: x.get('projectUuid', None))\n", + "jsonarray_projectFile['value.type'] = jsonarray_projectFile['value'].apply(lambda x: x.get('type', None))\n", + "jsonarray_projectFile.rename(columns={'uuid':'fileUuid'}, inplace=True)\n", + "jsonarray_projectFile.drop(columns=['_id', 'associationIds', 'value'], inplace=True)\n", + "\n", + "jsonarray_projectFile" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fileUuidownertenantIdschemaIdinternalUsernamelastUpdatednamecreated_id.$oidassociationIds.0associationIds.1value.type
value.projectUuid
443100441440001
0001395346788177-5056a550b8-0001-012161100161160001
0001396538655269-5056a550b8-0001-012111001110001
0001396562317882-5056a550b8-0001-012211002120001
0001396564389482-5056a550b8-0001-012311003130001
.......................................
988793832798425581-242ac118-0001-012241100241240000
991355144541573606-242ac114-0001-012111001110000
991430390416535060-242ac117-0001-012111001110000
993645360945172966-242ac11c-0001-012211002120000
993772352152343016-242ac11e-0001-012131100131130000
\n", + "

1916 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " fileUuid owner tenantId schemaId \\\n", + "value.projectUuid \n", + " 44 3 1 0 \n", + "0001395346788177-5056a550b8-0001-012 16 1 1 0 \n", + "0001396538655269-5056a550b8-0001-012 1 1 1 0 \n", + "0001396562317882-5056a550b8-0001-012 2 1 1 0 \n", + "0001396564389482-5056a550b8-0001-012 3 1 1 0 \n", + "... ... ... ... ... \n", + "988793832798425581-242ac118-0001-012 24 1 1 0 \n", + "991355144541573606-242ac114-0001-012 1 1 1 0 \n", + "991430390416535060-242ac117-0001-012 1 1 1 0 \n", + "993645360945172966-242ac11c-0001-012 2 1 1 0 \n", + "993772352152343016-242ac11e-0001-012 13 1 1 0 \n", + "\n", + " internalUsername lastUpdated name \\\n", + "value.projectUuid \n", + " 0 44 1 \n", + "0001395346788177-5056a550b8-0001-012 0 16 1 \n", + "0001396538655269-5056a550b8-0001-012 0 1 1 \n", + "0001396562317882-5056a550b8-0001-012 0 2 1 \n", + "0001396564389482-5056a550b8-0001-012 0 3 1 \n", + "... ... ... ... \n", + "988793832798425581-242ac118-0001-012 0 24 1 \n", + "991355144541573606-242ac114-0001-012 0 1 1 \n", + "991430390416535060-242ac117-0001-012 0 1 1 \n", + "993645360945172966-242ac11c-0001-012 0 2 1 \n", + "993772352152343016-242ac11e-0001-012 0 13 1 \n", + "\n", + " created _id.$oid associationIds.0 \\\n", + "value.projectUuid \n", + " 44 0 0 \n", + "0001395346788177-5056a550b8-0001-012 16 0 0 \n", + "0001396538655269-5056a550b8-0001-012 1 0 0 \n", + "0001396562317882-5056a550b8-0001-012 2 0 0 \n", + "0001396564389482-5056a550b8-0001-012 3 0 0 \n", + "... ... ... ... \n", + "988793832798425581-242ac118-0001-012 24 0 0 \n", + "991355144541573606-242ac114-0001-012 1 0 0 \n", + "991430390416535060-242ac117-0001-012 1 0 0 \n", + "993645360945172966-242ac11c-0001-012 2 0 0 \n", + "993772352152343016-242ac11e-0001-012 13 0 0 \n", + "\n", + " associationIds.1 value.type \n", + "value.projectUuid \n", + " 0 1 \n", + "0001395346788177-5056a550b8-0001-012 0 1 \n", + "0001396538655269-5056a550b8-0001-012 0 1 \n", + "0001396562317882-5056a550b8-0001-012 0 1 \n", + "0001396564389482-5056a550b8-0001-012 0 1 \n", + "... ... ... \n", + "988793832798425581-242ac118-0001-012 0 0 \n", + "991355144541573606-242ac114-0001-012 0 0 \n", + "991430390416535060-242ac117-0001-012 0 0 \n", + "993645360945172966-242ac11c-0001-012 0 0 \n", + "993772352152343016-242ac11e-0001-012 0 0 \n", + "\n", + "[1916 rows x 12 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jsonarray_projectFile.groupby('value.projectUuid').nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### json projectJob" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownertenantIdschemaIdinternalUsernamelastUpdatednamecreated_id.$oidassociationIds.0associationIds.1value.projectUuidvalue.jobUuid
2500001400192074855-5056a550b8-0001-012vdjvdjserver.orgNaNNaN2014-05-15T17:14:34.855-05:00projectJob2014-05-15T17:14:34.855-05:00NoneNoneNone0001399309581559-5056a550b8-0001-0120001399315558601-5056a550b8-0001-007
2520001400254373114-5056a550b8-0001-012vdjvdjserver.orgNaNNaN2014-05-16T10:32:53.114-05:00projectJob2014-05-16T10:32:53.114-05:00NoneNoneNone0001400250478554-5056a550b8-0001-0120001400254372814-5056a550b8-0001-007
2530001400273862423-5056a550b8-0001-012vdjvdjserver.orgNaNNaN2014-05-16T15:57:42.423-05:00projectJob2014-05-16T15:57:42.423-05:00NoneNoneNone0001400250478554-5056a550b8-0001-0120001400273862119-5056a550b8-0001-007
2540001400274448495-5056a550b8-0001-012vdjvdjserver.orgNaNNaN2014-05-16T16:07:28.494-05:00projectJob2014-05-16T16:07:28.494-05:00NoneNoneNone0001400250478554-5056a550b8-0001-0120001400274448320-5056a550b8-0001-007
2560001400274714655-5056a550b8-0001-012vdjvdjserver.orgNaNNaN2014-05-16T16:11:54.655-05:00projectJob2014-05-16T16:11:54.655-05:00NoneNoneNone0001400250478554-5056a550b8-0001-0120001400274714490-5056a550b8-0001-007
..........................................
6067115097479121213854191-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-08T12:13:35.460-06:00projectJob2025-01-08T12:13:35.460-06:00NoneNoneNone6589143665654501871-242ac118-0001-012ad02cb34-250e-48cb-a06e-973e431b62ee-007
6070901948444895656078865-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-13T16:44:05.995-06:00projectJob2025-01-13T16:44:05.995-06:00NoneNoneNone5456400192359305711-242ac118-0001-012c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007
6073291819643224410746385-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-20T03:06:57.762-06:00projectJob2025-01-20T03:06:57.762-06:00NoneNoneNone5199144433477554666-242ac116-0001-012773a5cb7-b369-4517-a221-83d57e3899e5-007
6073302845695380777266705-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-23T15:05:59.570-06:00projectJob2025-01-23T15:05:59.570-06:00NoneNoneNone5456400192359305711-242ac118-0001-0129188bf80-e868-4e05-a6b4-308c044108d7-007
6073473203620026767118831-242ac118-0001-012vdjvdjserver.orgNaNNaN2025-01-24T20:57:54.599-06:00projectJob2025-01-24T20:57:54.599-06:00NoneNoneNone5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-007
\n", + "

6355 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " uuid owner tenantId schemaId \\\n", + "250 0001400192074855-5056a550b8-0001-012 vdj vdjserver.org NaN \n", + "252 0001400254373114-5056a550b8-0001-012 vdj vdjserver.org NaN \n", + "253 0001400273862423-5056a550b8-0001-012 vdj vdjserver.org NaN \n", + "254 0001400274448495-5056a550b8-0001-012 vdj vdjserver.org NaN \n", + "256 0001400274714655-5056a550b8-0001-012 vdj vdjserver.org NaN \n", + "... ... ... ... ... \n", + "606711 5097479121213854191-242ac118-0001-012 vdj vdjserver.org NaN \n", + "607090 1948444895656078865-242ac118-0001-012 vdj vdjserver.org NaN \n", + "607329 1819643224410746385-242ac118-0001-012 vdj vdjserver.org NaN \n", + "607330 2845695380777266705-242ac118-0001-012 vdj vdjserver.org NaN \n", + "607347 3203620026767118831-242ac118-0001-012 vdj vdjserver.org NaN \n", + "\n", + " internalUsername lastUpdated name \\\n", + "250 NaN 2014-05-15T17:14:34.855-05:00 projectJob \n", + "252 NaN 2014-05-16T10:32:53.114-05:00 projectJob \n", + "253 NaN 2014-05-16T15:57:42.423-05:00 projectJob \n", + "254 NaN 2014-05-16T16:07:28.494-05:00 projectJob \n", + "256 NaN 2014-05-16T16:11:54.655-05:00 projectJob \n", + "... ... ... ... \n", + "606711 NaN 2025-01-08T12:13:35.460-06:00 projectJob \n", + "607090 NaN 2025-01-13T16:44:05.995-06:00 projectJob \n", + "607329 NaN 2025-01-20T03:06:57.762-06:00 projectJob \n", + "607330 NaN 2025-01-23T15:05:59.570-06:00 projectJob \n", + "607347 NaN 2025-01-24T20:57:54.599-06:00 projectJob \n", + "\n", + " created _id.$oid associationIds.0 \\\n", + "250 2014-05-15T17:14:34.855-05:00 None None \n", + "252 2014-05-16T10:32:53.114-05:00 None None \n", + "253 2014-05-16T15:57:42.423-05:00 None None \n", + "254 2014-05-16T16:07:28.494-05:00 None None \n", + "256 2014-05-16T16:11:54.655-05:00 None None \n", + "... ... ... ... \n", + "606711 2025-01-08T12:13:35.460-06:00 None None \n", + "607090 2025-01-13T16:44:05.995-06:00 None None \n", + "607329 2025-01-20T03:06:57.762-06:00 None None \n", + "607330 2025-01-23T15:05:59.570-06:00 None None \n", + "607347 2025-01-24T20:57:54.599-06:00 None None \n", + "\n", + " associationIds.1 value.projectUuid \\\n", + "250 None 0001399309581559-5056a550b8-0001-012 \n", + "252 None 0001400250478554-5056a550b8-0001-012 \n", + "253 None 0001400250478554-5056a550b8-0001-012 \n", + "254 None 0001400250478554-5056a550b8-0001-012 \n", + "256 None 0001400250478554-5056a550b8-0001-012 \n", + "... ... ... \n", + "606711 None 6589143665654501871-242ac118-0001-012 \n", + "607090 None 5456400192359305711-242ac118-0001-012 \n", + "607329 None 5199144433477554666-242ac116-0001-012 \n", + "607330 None 5456400192359305711-242ac118-0001-012 \n", + "607347 None 5456400192359305711-242ac118-0001-012 \n", + "\n", + " value.jobUuid \n", + "250 0001399315558601-5056a550b8-0001-007 \n", + "252 0001400254372814-5056a550b8-0001-007 \n", + "253 0001400273862119-5056a550b8-0001-007 \n", + "254 0001400274448320-5056a550b8-0001-007 \n", + "256 0001400274714490-5056a550b8-0001-007 \n", + "... ... \n", + "606711 ad02cb34-250e-48cb-a06e-973e431b62ee-007 \n", + "607090 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 \n", + "607329 773a5cb7-b369-4517-a221-83d57e3899e5-007 \n", + "607330 9188bf80-e868-4e05-a6b4-308c044108d7-007 \n", + "607347 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 \n", + "\n", + "[6355 rows x 13 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jsonarray_projectJob = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverJsonArrayFeb042025.json')\n", + "jsonarray_projectJob = jsonarray_projectJob[jsonarray_projectJob['name'] == 'projectJob']\n", + "\n", + "jsonarray_projectJob['_id.$oid'] = jsonarray_projectJob['value'].apply(lambda x: x.get('$oid', None))\n", + "jsonarray_projectJob['associationIds.0'] = jsonarray_projectJob['value'].apply(lambda x: x[0] if isinstance(x, list) and len(x)>0 else None)\n", + "jsonarray_projectJob['associationIds.1'] = jsonarray_projectJob['value'].apply(lambda x: x[1] if isinstance(x, list) and len(x)>1 else None)\n", + "jsonarray_projectJob['value.projectUuid'] = jsonarray_projectJob['value'].apply(lambda x: x.get('projectUuid', None))\n", + "jsonarray_projectJob['value.jobUuid'] = jsonarray_projectJob['value'].apply(lambda x: x.get('jobUuid', None))\n", + "jsonarray_projectJob.drop(columns=['_id', 'associationIds', 'value'], inplace=True)\n", + "\n", + "jsonarray_projectJob" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownertenantIdschemaIdinternalUsernamelastUpdatednamecreated_id.$oidassociationIds.0associationIds.1value.jobUuid
value.projectUuid
0001399309581559-5056a550b8-0001-012111001110001
0001400250478554-5056a550b8-0001-0122511002512500025
0001401392421049-5056a550b8-0001-012911009190009
0001401393981043-5056a550b8-0001-012111001110001
0001401395232432-5056a550b8-0001-012311003130003
.......................................
976788537735179795-242ac118-0001-012411004140004
988793832798425581-242ac118-0001-012411004140004
991355144541573606-242ac114-0001-012111001110001
991430390416535060-242ac117-0001-012111001110001
993772352152343016-242ac11e-0001-012211002120002
\n", + "

1086 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " uuid owner tenantId schemaId \\\n", + "value.projectUuid \n", + "0001399309581559-5056a550b8-0001-012 1 1 1 0 \n", + "0001400250478554-5056a550b8-0001-012 25 1 1 0 \n", + "0001401392421049-5056a550b8-0001-012 9 1 1 0 \n", + "0001401393981043-5056a550b8-0001-012 1 1 1 0 \n", + "0001401395232432-5056a550b8-0001-012 3 1 1 0 \n", + "... ... ... ... ... \n", + "976788537735179795-242ac118-0001-012 4 1 1 0 \n", + "988793832798425581-242ac118-0001-012 4 1 1 0 \n", + "991355144541573606-242ac114-0001-012 1 1 1 0 \n", + "991430390416535060-242ac117-0001-012 1 1 1 0 \n", + "993772352152343016-242ac11e-0001-012 2 1 1 0 \n", + "\n", + " internalUsername lastUpdated name \\\n", + "value.projectUuid \n", + "0001399309581559-5056a550b8-0001-012 0 1 1 \n", + "0001400250478554-5056a550b8-0001-012 0 25 1 \n", + "0001401392421049-5056a550b8-0001-012 0 9 1 \n", + "0001401393981043-5056a550b8-0001-012 0 1 1 \n", + "0001401395232432-5056a550b8-0001-012 0 3 1 \n", + "... ... ... ... \n", + "976788537735179795-242ac118-0001-012 0 4 1 \n", + "988793832798425581-242ac118-0001-012 0 4 1 \n", + "991355144541573606-242ac114-0001-012 0 1 1 \n", + "991430390416535060-242ac117-0001-012 0 1 1 \n", + "993772352152343016-242ac11e-0001-012 0 2 1 \n", + "\n", + " created _id.$oid associationIds.0 \\\n", + "value.projectUuid \n", + "0001399309581559-5056a550b8-0001-012 1 0 0 \n", + "0001400250478554-5056a550b8-0001-012 25 0 0 \n", + "0001401392421049-5056a550b8-0001-012 9 0 0 \n", + "0001401393981043-5056a550b8-0001-012 1 0 0 \n", + "0001401395232432-5056a550b8-0001-012 3 0 0 \n", + "... ... ... ... \n", + "976788537735179795-242ac118-0001-012 4 0 0 \n", + "988793832798425581-242ac118-0001-012 4 0 0 \n", + "991355144541573606-242ac114-0001-012 1 0 0 \n", + "991430390416535060-242ac117-0001-012 1 0 0 \n", + "993772352152343016-242ac11e-0001-012 2 0 0 \n", + "\n", + " associationIds.1 value.jobUuid \n", + "value.projectUuid \n", + "0001399309581559-5056a550b8-0001-012 0 1 \n", + "0001400250478554-5056a550b8-0001-012 0 25 \n", + "0001401392421049-5056a550b8-0001-012 0 9 \n", + "0001401393981043-5056a550b8-0001-012 0 1 \n", + "0001401395232432-5056a550b8-0001-012 0 3 \n", + "... ... ... \n", + "976788537735179795-242ac118-0001-012 0 4 \n", + "988793832798425581-242ac118-0001-012 0 4 \n", + "991355144541573606-242ac114-0001-012 0 1 \n", + "991430390416535060-242ac117-0001-012 0 1 \n", + "993772352152343016-242ac11e-0001-012 0 2 \n", + "\n", + "[1086 rows x 12 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jsonarray_projectJob.groupby('value.projectUuid').nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## combine projectJob jobAll and emailUsernameCombo" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuid_xowner_xtenantId_xschemaId_xinternalUsername_xlastUpdated_xname_xcreated_x_id.$oid_xassociationIds.0_x...remote_submit_retriesremote_status_checksfailed_status_checkslast_status_checkblocked_countvisibleupdate_tokenparameters.Creatorvalue.usernamevalue.email
parameters.Creator
vdj-test18421008418400...231221841vdj-test1vdjserver@utsouthwestern.edu
schristley4561100456145600...118423381314561schristleyscott.christley@utsouthwestern.edu
scott_public3051100305130500...123622221213051scott_publicscott.christley@utsouthwestern.edu
victorialopez1201100120112000...149184311201victorialopezvictorialopez@isciii.es
scott_ab5111005115100...15011871511scott_abscott.christley@utsouthwestern.edu
..................................................................
baoduong1110011100...11111111baoduongbaoduong@uab.edu
zhangcy1811001811800...16114131181zhangcycaiyan505@126.com
zhe.sang1110011100...11111111zhe.sangzhe.sang@gmail.com
zhouhao9610041110011100...11111111zhouhao961004zhouhao@biken.osaka-u.ac.jp
vae9110091900...12191191vaezyf950619@gmail.com
\n", + "

300 rows × 69 columns

\n", + "
" + ], + "text/plain": [ + " uuid_x owner_x tenantId_x schemaId_x \\\n", + "parameters.Creator \n", + "vdj-test1 84 2 1 0 \n", + "schristley 456 1 1 0 \n", + "scott_public 305 1 1 0 \n", + "victorialopez 120 1 1 0 \n", + "scott_ab 51 1 1 0 \n", + "... ... ... ... ... \n", + "baoduong 1 1 1 0 \n", + "zhangcy 18 1 1 0 \n", + "zhe.sang 1 1 1 0 \n", + "zhouhao961004 1 1 1 0 \n", + "vae 9 1 1 0 \n", + "\n", + " internalUsername_x lastUpdated_x name_x created_x \\\n", + "parameters.Creator \n", + "vdj-test1 0 84 1 84 \n", + "schristley 0 456 1 456 \n", + "scott_public 0 305 1 305 \n", + "victorialopez 0 120 1 120 \n", + "scott_ab 0 51 1 51 \n", + "... ... ... ... ... \n", + "baoduong 0 1 1 1 \n", + "zhangcy 0 18 1 18 \n", + "zhe.sang 0 1 1 1 \n", + "zhouhao961004 0 1 1 1 \n", + "vae 0 9 1 9 \n", + "\n", + " _id.$oid_x associationIds.0_x ... \\\n", + "parameters.Creator ... \n", + "vdj-test1 0 0 ... \n", + "schristley 0 0 ... \n", + "scott_public 0 0 ... \n", + "victorialopez 0 0 ... \n", + "scott_ab 0 0 ... \n", + "... ... ... ... \n", + "baoduong 0 0 ... \n", + "zhangcy 0 0 ... \n", + "zhe.sang 0 0 ... \n", + "zhouhao961004 0 0 ... \n", + "vae 0 0 ... \n", + "\n", + " remote_submit_retries remote_status_checks \\\n", + "parameters.Creator \n", + "vdj-test1 2 3 \n", + "schristley 1 184 \n", + "scott_public 1 236 \n", + "victorialopez 1 49 \n", + "scott_ab 1 50 \n", + "... ... ... \n", + "baoduong 1 1 \n", + "zhangcy 1 6 \n", + "zhe.sang 1 1 \n", + "zhouhao961004 1 1 \n", + "vae 1 2 \n", + "\n", + " failed_status_checks last_status_check blocked_count \\\n", + "parameters.Creator \n", + "vdj-test1 1 2 2 \n", + "schristley 2 338 13 \n", + "scott_public 2 222 12 \n", + "victorialopez 1 84 3 \n", + "scott_ab 1 18 7 \n", + "... ... ... ... \n", + "baoduong 1 1 1 \n", + "zhangcy 1 14 13 \n", + "zhe.sang 1 1 1 \n", + "zhouhao961004 1 1 1 \n", + "vae 1 9 1 \n", + "\n", + " visible update_token parameters.Creator value.username \\\n", + "parameters.Creator \n", + "vdj-test1 1 84 1 vdj-test1 \n", + "schristley 1 456 1 schristley \n", + "scott_public 1 305 1 scott_public \n", + "victorialopez 1 120 1 victorialopez \n", + "scott_ab 1 51 1 scott_ab \n", + "... ... ... ... ... \n", + "baoduong 1 1 1 baoduong \n", + "zhangcy 1 18 1 zhangcy \n", + "zhe.sang 1 1 1 zhe.sang \n", + "zhouhao961004 1 1 1 zhouhao961004 \n", + "vae 1 9 1 vae \n", + "\n", + " value.email \n", + "parameters.Creator \n", + "vdj-test1 vdjserver@utsouthwestern.edu \n", + "schristley scott.christley@utsouthwestern.edu \n", + "scott_public scott.christley@utsouthwestern.edu \n", + "victorialopez victorialopez@isciii.es \n", + "scott_ab scott.christley@utsouthwestern.edu \n", + "... ... \n", + "baoduong baoduong@uab.edu \n", + "zhangcy caiyan505@126.com \n", + "zhe.sang zhe.sang@gmail.com \n", + "zhouhao961004 zhouhao@biken.osaka-u.ac.jp \n", + "vae zyf950619@gmail.com \n", + "\n", + "[300 rows x 69 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# from jsonArray name=projectJob combined with jobs_all json file \n", + "projJob_projFile_jobAll_emailUser = jsonarray_projectJob.merge(jsonarray_projectFile, on='value.projectUuid')\\\n", + " .merge(jobs_all_df, left_on='value.jobUuid', right_on='uuid')\\\n", + " .merge(email_username_combo, left_on='parameters.Creator', right_on='value.username')\n", + "\n", + " \n", + "\n", + "\n", + "# extract all columns\n", + "cols = projJob_projFile_jobAll_emailUser.columns.to_list()\n", + "\n", + "# define custom agg function for email and username when only 1 present, print out name, otherwise unique count\n", + "def custom_agg(series):\n", + " unique_values = series.unique()\n", + " if len(unique_values) == 1:\n", + " return unique_values[0]\n", + " else:\n", + " return len(unique_values)\n", + "\n", + "# define columns to use this function \n", + "user_cols = {'value.username': custom_agg,\n", + " 'value.email': custom_agg}\n", + "\n", + "# create agg_dict\n", + "agg_dict = {col: 'nunique' for col in cols if col not in user_cols}\n", + "agg_dict.update(user_cols)\n", + "\n", + "# groupby\n", + "projJob_projFile_jobAll_emailUser_groupby = projJob_projFile_jobAll_emailUser.groupby('parameters.Creator').agg(agg_dict).sort_values('value.projectUuid', ascending=False)\n", + "\n", + "# save\n", + "# projJob_projFile_jobAll_emailUser_groupby.to_csv('./csv/jsonArrayProjectJob.merge.jsonArrayProjectFile_on=projectUuid'+\n", + "# '.merge.jobsAll_on=jobUuid&uuid'+\n", + "# '.merge.emailUserCombo_on=Creator&username'+\n", + "# '.groupby=Creators'+\n", + "# '.sorted=projectUuid.csv')\n", + "projJob_projFile_jobAll_emailUser_groupby" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value.emailvalue.projectUuidfileUuidvalue.jobUuid
parameters.Creator
vdj-test1vdjserver@utsouthwestern.edu848484
schristleyscott.christley@utsouthwestern.edu647097456
scott_publicscott.christley@utsouthwestern.edu323242305
victorialopezvictorialopez@isciii.es27218120
scott_abscott.christley@utsouthwestern.edu23417551
\n", + "
" + ], + "text/plain": [ + " value.email value.projectUuid \\\n", + "parameters.Creator \n", + "vdj-test1 vdjserver@utsouthwestern.edu 84 \n", + "schristley scott.christley@utsouthwestern.edu 64 \n", + "scott_public scott.christley@utsouthwestern.edu 32 \n", + "victorialopez victorialopez@isciii.es 27 \n", + "scott_ab scott.christley@utsouthwestern.edu 23 \n", + "\n", + " fileUuid value.jobUuid \n", + "parameters.Creator \n", + "vdj-test1 84 84 \n", + "schristley 7097 456 \n", + "scott_public 3242 305 \n", + "victorialopez 218 120 \n", + "scott_ab 4175 51 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pull out stats required\n", + "stats_cols = ['value.email', 'value.projectUuid', 'fileUuid', 'value.jobUuid', ]\n", + "# projJob_projFile_jobAll_emailUser_groupby[stats_cols].to_csv('./csv/reduced_stats.csv')\n", + "projJob_projFile_jobAll_emailUser_groupby[stats_cols].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# tests used when pulling '_id's, 'associationIds's, and 'value's\n", + "\n", + "# print(set([str(e.keys()) for e in jsonarray_projectJob['_id']]))\n", + "# print(set(len(e) for e in jsonarray_projectJob['associationIds']))\n", + "# print(set([str(e.keys()) for e in jsonarray_projectJob['value']]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## vdjserverMetadataPermissions.json" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlast_updatedusernameuuidtenant_id
permission
ALL63646364636463646364
READ4691246912469124691246912
READ_WRITE707319707319707319707319707319
\n", + "
" + ], + "text/plain": [ + " id last_updated username uuid tenant_id\n", + "permission \n", + "ALL 6364 6364 6364 6364 6364\n", + "READ 46912 46912 46912 46912 46912\n", + "READ_WRITE 707319 707319 707319 707319 707319" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metadata_perms_df.groupby(['permission']).count()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "username\n", + "schristley 242530\n", + "scott_test1 62528\n", + "itoby 54506\n", + "lgcowell 51657\n", + "scott_public 43133\n", + " ... \n", + "zktuong 1\n", + "zhonghuang 1\n", + "zhanxw 1\n", + "zerufael 1\n", + "a.sooda 1\n", + "Name: count, Length: 1058, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_read_users = metadata_perms_df[(metadata_perms_df['permission'] == 'ALL') | (metadata_perms_df['permission'] == 'READ_WRITE')]\n", + "# non_read_users.groupby(by=['username']).count().sort_values(by=['uuid'],ascending=False)\n", + "non_read_users.value_counts('username')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## counting the number of jobs\n", + "- From vdjserverJobs_all.json the parameters.Creator var was counted, \n", + "- This count left joined with a dataframe containing email and username combinations. \n", + " - These combos were made from vdjserverJsonArrayFeb042025.json by finding valid emails by filtering out any @test or @email with varying top level domains, and checking if the address can be split on @ with a len of 2. The injection bit was also removed" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
parameters.Creatornum_jobsvalue.usernamevalue.email
0vdj2552NaNNaN
1scott_test1643scott_test1scott.christley@utsouthwestern.edu
2schristley592schristleyscott.christley@utsouthwestern.edu
3scott_public478scott_publicscott.christley@utsouthwestern.edu
4victorialopez175victorialopezvictorialopez@isciii.es
...............
303jivdj171jivdj17xuhuai.ji@stanford.edu
304pingpingzheng1pingpingzhengzhengpp@stanford.edu
305minici.claudia1minici.claudiaminici.claudia@hsr.it
306nianbinli1nianbinlilinianbin97@tmu.edu.cn
307crushseven1crushsevenZiyue.Yan@alivexbiotech.com
\n", + "

308 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " parameters.Creator num_jobs value.username \\\n", + "0 vdj 2552 NaN \n", + "1 scott_test1 643 scott_test1 \n", + "2 schristley 592 schristley \n", + "3 scott_public 478 scott_public \n", + "4 victorialopez 175 victorialopez \n", + ".. ... ... ... \n", + "303 jivdj17 1 jivdj17 \n", + "304 pingpingzheng 1 pingpingzheng \n", + "305 minici.claudia 1 minici.claudia \n", + "306 nianbinli 1 nianbinli \n", + "307 crushseven 1 crushseven \n", + "\n", + " value.email \n", + "0 NaN \n", + "1 scott.christley@utsouthwestern.edu \n", + "2 scott.christley@utsouthwestern.edu \n", + "3 scott.christley@utsouthwestern.edu \n", + "4 victorialopez@isciii.es \n", + ".. ... \n", + "303 xuhuai.ji@stanford.edu \n", + "304 zhengpp@stanford.edu \n", + "305 minici.claudia@hsr.it \n", + "306 linianbin97@tmu.edu.cn \n", + "307 Ziyue.Yan@alivexbiotech.com \n", + "\n", + "[308 rows x 4 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# cant do this because owner is usually vdj since tacc account needed. \n", + "# we will replace owner with parameters.creator\n", + "num_jobs = jobs_all_df['parameters.Creator'].value_counts().to_frame().reset_index().rename(columns={'count':'num_jobs'})\n", + "num_jobs = num_jobs.merge(email_username_combo, how='left', left_on='parameters.Creator', right_on='value.username')\n", + "# num_jobs.to_csv('./csv/num_jobs_by_parameters.Creator_w_username_equal_to_parameters.Creator.csv')\n", + "num_jobs\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# jobsAll & jobsEvents" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
id_xnametenant_id_xtenant_queueownerrolessystem_idapp_idapp_uuidstatus_x...id_ycreated_ycreated_bydescriptionip_addressstatus_ytenant_id_yjob_idtransfertaskuuid_y
0503865My Job 24-Jan-2025 8:57:09 pmvdjserver.orgaloe.jobq.vdjserver.org.submit.DefaultQueuevdjInternal/VDJ_vdj_keycloak_PRODUCTION,Internal/...ls6.tacc.utexas.edurepcalc-ls6-2.0u86306626279335587345-242ac119-0001-005FINISHED...235856342025-01-25 02:57:54vdjJob processing beginning172.17.0.5PENDINGvdjserver.org5038650.02ad89d35-7a9b-49f8-92fc-6ac58a616bc0-028
1503865My Job 24-Jan-2025 8:57:09 pmvdjserver.orgaloe.jobq.vdjserver.org.submit.DefaultQueuevdjInternal/VDJ_vdj_keycloak_PRODUCTION,Internal/...ls6.tacc.utexas.edurepcalc-ls6-2.0u86306626279335587345-242ac119-0001-005FINISHED...235856352025-01-25 02:57:54vdjIdentifying input files for staging172.17.0.5PROCESSING_INPUTSvdjserver.org5038650.0a59279d9-4e37-456f-bc14-b44ed6a150fd-028
2503865My Job 24-Jan-2025 8:57:09 pmvdjserver.orgaloe.jobq.vdjserver.org.submit.DefaultQueuevdjInternal/VDJ_vdj_keycloak_PRODUCTION,Internal/...ls6.tacc.utexas.edurepcalc-ls6-2.0u86306626279335587345-242ac119-0001-005FINISHED...235856362025-01-25 02:57:54vdjALL permission granted to schristley172.17.0.3PERMISSION_GRANTvdjserver.org5038650.0808c6bb4-8397-4462-bf5a-81c9f96b0b08-028
3503865My Job 24-Jan-2025 8:57:09 pmvdjserver.orgaloe.jobq.vdjserver.org.submit.DefaultQueuevdjInternal/VDJ_vdj_keycloak_PRODUCTION,Internal/...ls6.tacc.utexas.edurepcalc-ls6-2.0u86306626279335587345-242ac119-0001-005FINISHED...235856372025-01-25 02:57:56vdjTransferring job input data to execution system172.17.0.5STAGING_INPUTSvdjserver.org5038650.0d230d601-4e32-45d1-a4f0-3c131b3572d6-028
4503865My Job 24-Jan-2025 8:57:09 pmvdjserver.orgaloe.jobq.vdjserver.org.submit.DefaultQueuevdjInternal/VDJ_vdj_keycloak_PRODUCTION,Internal/...ls6.tacc.utexas.edurepcalc-ls6-2.0u86306626279335587345-242ac119-0001-005FINISHED...235856382025-01-25 02:57:58vdjJob input copy in progress: agave://data.vdjse...172.17.0.5STAGING_INPUTSvdjserver.org503865560378492.09ee603a8-8777-4677-bbd3-0f55b2863676-028
\n", + "

5 rows × 52 columns

\n", + "
" + ], + "text/plain": [ + " id_x name tenant_id_x \\\n", + "0 503865 My Job 24-Jan-2025 8:57:09 pm vdjserver.org \n", + "1 503865 My Job 24-Jan-2025 8:57:09 pm vdjserver.org \n", + "2 503865 My Job 24-Jan-2025 8:57:09 pm vdjserver.org \n", + "3 503865 My Job 24-Jan-2025 8:57:09 pm vdjserver.org \n", + "4 503865 My Job 24-Jan-2025 8:57:09 pm vdjserver.org \n", + "\n", + " tenant_queue owner \\\n", + "0 aloe.jobq.vdjserver.org.submit.DefaultQueue vdj \n", + "1 aloe.jobq.vdjserver.org.submit.DefaultQueue vdj \n", + "2 aloe.jobq.vdjserver.org.submit.DefaultQueue vdj \n", + "3 aloe.jobq.vdjserver.org.submit.DefaultQueue vdj \n", + "4 aloe.jobq.vdjserver.org.submit.DefaultQueue vdj \n", + "\n", + " roles system_id \\\n", + "0 Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/... ls6.tacc.utexas.edu \n", + "1 Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/... ls6.tacc.utexas.edu \n", + "2 Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/... ls6.tacc.utexas.edu \n", + "3 Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/... ls6.tacc.utexas.edu \n", + "4 Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/... ls6.tacc.utexas.edu \n", + "\n", + " app_id app_uuid status_x ... \\\n", + "0 repcalc-ls6-2.0u8 6306626279335587345-242ac119-0001-005 FINISHED ... \n", + "1 repcalc-ls6-2.0u8 6306626279335587345-242ac119-0001-005 FINISHED ... \n", + "2 repcalc-ls6-2.0u8 6306626279335587345-242ac119-0001-005 FINISHED ... \n", + "3 repcalc-ls6-2.0u8 6306626279335587345-242ac119-0001-005 FINISHED ... \n", + "4 repcalc-ls6-2.0u8 6306626279335587345-242ac119-0001-005 FINISHED ... \n", + "\n", + " id_y created_y created_by \\\n", + "0 23585634 2025-01-25 02:57:54 vdj \n", + "1 23585635 2025-01-25 02:57:54 vdj \n", + "2 23585636 2025-01-25 02:57:54 vdj \n", + "3 23585637 2025-01-25 02:57:56 vdj \n", + "4 23585638 2025-01-25 02:57:58 vdj \n", + "\n", + " description ip_address \\\n", + "0 Job processing beginning 172.17.0.5 \n", + "1 Identifying input files for staging 172.17.0.5 \n", + "2 ALL permission granted to schristley 172.17.0.3 \n", + "3 Transferring job input data to execution system 172.17.0.5 \n", + "4 Job input copy in progress: agave://data.vdjse... 172.17.0.5 \n", + "\n", + " status_y tenant_id_y job_id transfertask \\\n", + "0 PENDING vdjserver.org 503865 0.0 \n", + "1 PROCESSING_INPUTS vdjserver.org 503865 0.0 \n", + "2 PERMISSION_GRANT vdjserver.org 503865 0.0 \n", + "3 STAGING_INPUTS vdjserver.org 503865 0.0 \n", + "4 STAGING_INPUTS vdjserver.org 503865 560378492.0 \n", + "\n", + " uuid_y \n", + "0 2ad89d35-7a9b-49f8-92fc-6ac58a616bc0-028 \n", + "1 a59279d9-4e37-456f-bc14-b44ed6a150fd-028 \n", + "2 808c6bb4-8397-4462-bf5a-81c9f96b0b08-028 \n", + "3 d230d601-4e32-45d1-a4f0-3c131b3572d6-028 \n", + "4 9ee603a8-8777-4677-bbd3-0f55b2863676-028 \n", + "\n", + "[5 rows x 52 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jobs_all_and_events_df = jobs_all_df.merge(job_events_df, right_on='job_id', left_on='id')\n", + "jobs_all_and_events_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'{\"UMIMinFrequency\":\"0.6\",\"PreFilterStatisticsFlag\":true,\"MinimumQuality\":20,\"Workflow\":\"paired\",\"Creator\":\"lcc294\",\"ReversePrimer\":\"none\",\"FindUniqueFlag\":true,\"SequenceFileTypes\":\"454\",\"UMIMaxGap\":\"0.5\",\"FilterFlag\":true,\"PostFilterStatisticsFlag\":true,\"SequenceReversePairedFilesMetadata\":\"1628993133380103705-242ac11c-0001-012\",\"FindUniqueMaxNucleotides\":20,\"ForwardPrimer\":\"none\",\"FindUniqueExclude\":true,\"Barcode\":false,\"MinimumLength\":250,\"SequenceForwardPairedFilesMetadata\":\"2433979131179691545-242ac11c-0001-012\",\"UMIConsensus\":false,\"UMIMaxError\":\"0.1\"}'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "job_event_perms = job_events_df.merge(job_permissions_df, on='job_id')\n", + "job_event_perms.merge(jobs_all_df, left_on='job_id', right_on='id')['parameters'][170000]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "explor", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/conversion/tapis_v2_to_v3/data_exploration_2.ipynb b/conversion/tapis_v2_to_v3/data_exploration_2.ipynb new file mode 100644 index 0000000..0814eb3 --- /dev/null +++ b/conversion/tapis_v2_to_v3/data_exploration_2.ipynb @@ -0,0 +1,9987 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c843c3e4-12c4-44ef-b357-fa8435844e16", + "metadata": {}, + "source": [ + "## All Imports" + ] + }, + { + "cell_type": "raw", + "id": "213eb7e1-5520-468b-a7e7-fd85d3bf9da4", + "metadata": {}, + "source": [ + "%lsmagic" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c18e4b34-fb63-4eac-9507-74ae79802df3", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ef60f956-8f92-4a89-9c28-dd1a366bdacc", + "metadata": {}, + "outputs": [], + "source": [ + "job_events_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverJobEvents.json')\n", + "job_permissions_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverJobPermissions.json')\n", + "jobs_all_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverJobs_all.json')\n", + "jobs_all_df['parameters.Creator'] = jobs_all_df['parameters'].apply(lambda x: json.loads(x).get('Creator', np.nan) if x else np.nan)\n", + "metadata_perms_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverMetadataPermissions.json')\n", + "\n", + "with open('/mnt/md0/Projects/vdjserver/vdjserverJsonArrayFeb042025.json', 'r') as f:\n", + " jsonarray = json.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "163f864c-bfc1-4f60-be28-b2c2ad43f411", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlast_updatedpermissionusernameuuidtenant_id
052014-01-21 17:00:11READ_WRITEtest30001389977207738-5056a550b8-0001-012vdjserver.org
162014-01-21 17:00:41READ_WRITEtest40001389977207738-5056a550b8-0001-012vdjserver.org
292014-01-21 17:21:43READ_WRITEtest110001389977207738-5056a550b8-0001-012vdjserver.org
3142014-01-29 10:28:16READ_WRITEjfonner0001389977207738-5056a550b8-0001-012vdjserver.org
4172014-01-29 14:06:38READ_WRITEadshkl;dasfhkdf0001391025968832-5056a550b8-0001-012vdjserver.org
.....................
76059014449862025-01-25 09:44:43READ_WRITEschristley3580715269144908271-242ac118-0001-012vdjserver.org
76059114449872025-01-25 09:44:44READ_WRITEschristley3569118857445708271-242ac118-0001-012vdjserver.org
76059214449882025-01-25 09:44:44READ_WRITEschristley3557565395419468271-242ac118-0001-012vdjserver.org
76059314449892025-01-25 09:44:44READ_WRITEschristley3547300423582028271-242ac118-0001-012vdjserver.org
76059414449902025-01-25 09:44:44READ_WRITEschristley3534544370712908271-242ac118-0001-012vdjserver.org
\n", + "

760595 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " id last_updated permission username \\\n", + "0 5 2014-01-21 17:00:11 READ_WRITE test3 \n", + "1 6 2014-01-21 17:00:41 READ_WRITE test4 \n", + "2 9 2014-01-21 17:21:43 READ_WRITE test11 \n", + "3 14 2014-01-29 10:28:16 READ_WRITE jfonner \n", + "4 17 2014-01-29 14:06:38 READ_WRITE adshkl;dasfhkdf \n", + "... ... ... ... ... \n", + "760590 1444986 2025-01-25 09:44:43 READ_WRITE schristley \n", + "760591 1444987 2025-01-25 09:44:44 READ_WRITE schristley \n", + "760592 1444988 2025-01-25 09:44:44 READ_WRITE schristley \n", + "760593 1444989 2025-01-25 09:44:44 READ_WRITE schristley \n", + "760594 1444990 2025-01-25 09:44:44 READ_WRITE schristley \n", + "\n", + " uuid tenant_id \n", + "0 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "1 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "2 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "3 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "4 0001391025968832-5056a550b8-0001-012 vdjserver.org \n", + "... ... ... \n", + "760590 3580715269144908271-242ac118-0001-012 vdjserver.org \n", + "760591 3569118857445708271-242ac118-0001-012 vdjserver.org \n", + "760592 3557565395419468271-242ac118-0001-012 vdjserver.org \n", + "760593 3547300423582028271-242ac118-0001-012 vdjserver.org \n", + "760594 3534544370712908271-242ac118-0001-012 vdjserver.org \n", + "\n", + "[760595 rows x 6 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metadata_perms_df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5e2a3b63-e630-4ccb-b35b-acc6573be86c", + "metadata": {}, + "outputs": [], + "source": [ + "def json_print(item):\n", + " print(json.dumps(item, indent = 4))" + ] + }, + { + "cell_type": "markdown", + "id": "814c7971-4a25-4fd2-9075-3ad7ff160e28", + "metadata": {}, + "source": [ + "## VDJServer_mailing_list.txt " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4968e34e-3ef2-4de8-b79b-3130ac48310d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Email updated_email\n", + "0 12ysliu2 at stu.edu.cn 12ysliu2@stu.edu.cn\n", + "1 18982180702 at msn.cn 18982180702@msn.cn\n", + "2 2008110020 at alumni.sjtu.edu.cn 2008110020@alumni.sjtu.edu.cn\n", + "3 2383920158 at qq.com 2383920158@qq.com\n", + "4 2deepayan at gmail.com 2deepayan@gmail.com\n", + ".. ... ...\n", + "571 zhanxw at gmail.com zhanxw@gmail.com\n", + "572 zhe.sang at gmail.com zhe.sang@gmail.com\n", + "573 zicheng at utexas.edu zicheng@utexas.edu\n", + "574 zluo819 at gmail.com zluo819@gmail.com\n", + "575 zyf950619 at gmail.com zyf950619@gmail.com\n", + "\n", + "[576 rows x 2 columns]\n" + ] + } + ], + "source": [ + "mailing_list = pd.read_csv(\"/mnt/md0/Projects/vdjserver/VDJServer_mailing_list.txt\", sep = ';', skiprows = 8)\n", + "mailing_list.columns = ['Email']\n", + "mailing_list = mailing_list.iloc[:-5]\n", + "mailing_list['updated_email'] = mailing_list['Email'].apply(lambda row: row.replace(\" at \", \"@\"))\n", + "\n", + "# mailing_list['updated_email'].to_csv('email_list.txt', index = False)\n", + "print(mailing_list)" + ] + }, + { + "cell_type": "markdown", + "id": "8aff3b9d-a220-4f73-8da5-cb649410c98d", + "metadata": {}, + "source": [ + "## Check What Kind Of Metadata We Have" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "90c2177e-5e24-4274-a148-6d183ef7e422", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['adc_cache', 'adc_cache_repertoire', 'adc_cache_study', 'adc_system_repositories', 'archive_project', 'async_query', 'bioProcessing', 'bioProcessingColumns', 'cellProcessing', 'cellProcessingColumns', 'communityDataSRA', 'data_processing', 'deletedProject', 'diagnosis', 'diagnosisColumns', 'feedback', 'garbage', 'irplus_analysis', 'job', 'nucleicAcidProcessing', 'nucleicAcidProcessingColumns', 'passwordReset', 'private_project', 'processMetadata', 'profile', 'project', 'projectFile', 'projectJob', 'projectJobArchive', 'projectJobFile', 'projectLoad', 'projectPublishInProcess', 'projectUnpublishInProcess', 'publicProject', 'public_project', 'rearrangementLoad', 'repertoire', 'sample', 'sampleColumns', 'sampleGroup', 'sample_processing', 'statistics_cache', 'statistics_cache_repertoire', 'statistics_cache_study', 'subject', 'subjectColumns', 'test', 'testMetadata', 'testmetadata', 'testmetadatamp', 'userVerification', 'vdjpipeWorkflow']\n", + "52\n" + ] + } + ], + "source": [ + "item_types = set()\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " item_types.add(item_type)\n", + "print(sorted(item_types))\n", + "print(len(item_types))" + ] + }, + { + "cell_type": "code", + "execution_count": 283, + "id": "3a96e83a-f451-4f91-a1f7-7f64c1a34300", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"_id\": {\n", + " \"$oid\": \"52d95bcbe4b0f097f3cf5d6d\"\n", + " },\n", + " \"uuid\": \"0001389976523746-5056a550b8-0001-012\",\n", + " \"owner\": \"wscarbor\",\n", + " \"tenantId\": \"vdjserver.org\",\n", + " \"schemaId\": null,\n", + " \"internalUsername\": null,\n", + " \"associationIds\": [],\n", + " \"lastUpdated\": \"2016-04-27T15:07:26.261-05:00\",\n", + " \"name\": \"profile\",\n", + " \"value\": {\n", + " \"firstName\": \"Walter\",\n", + " \"lastName\": \"Scarborough\",\n", + " \"email\": \"wscarbor@tacc.utexas.edu\",\n", + " \"city\": \"Austin\",\n", + " \"state\": \"TX\",\n", + " \"country\": \"USA\",\n", + " \"affiliation\": \"\"\n", + " },\n", + " \"created\": \"2014-01-17T10:35:23.649-06:00\"\n", + "}\n" + ] + } + ], + "source": [ + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'profile':\n", + " print(json.dumps(item, indent = 4))\n", + " break\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "f9ce914f-4fde-41c3-8f89-99effd49de2a", + "metadata": {}, + "source": [ + "## Look At User Email Informations\n", + "- Contains email\n", + "- Firstname, Lastname\n", + "- City, State, Country" + ] + }, + { + "cell_type": "code", + "execution_count": 284, + "id": "59e08da8-2959-4e35-9f0c-c1ea512e798a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerfirstNamelastNameemailcitystatecountrycreatedlastUpdated
18176242932598575984145-242ac118-0001-012rgarciaRodrigoGarcía Valienter.garciavaliente@amsterdamumc.nlAmsterdamNetherlands2025-01-02T11:11:52.894-06:002025-01-02T11:11:52.894-06:00
18182755888095932968465-242ac118-0001-012rgarciavRodrigoGarcía Valienter.garciavaliente@amsterdamumc.nl2025-01-02T11:41:52.070-06:002025-01-02T11:41:52.070-06:00
18195481029658171207185-242ac118-0001-012erichardsonEveRichardsonerichardson@lji.orgSan DiegoCaliforniaUnited States2025-01-07T18:01:25.657-06:002025-01-07T18:01:25.657-06:00
18204458895817601248785-242ac118-0001-012samwolsamuel.wollenburg@utsouthwestern.edu2025-01-07T20:24:59.390-06:002025-01-07T20:24:59.390-06:00
18219076859566261923345-242ac118-0001-012chrisjames1992Chinweike ChristopherUdoyechinweikechristopher.udoye@uksh.deLübeckSchleswig-HolsteinGermany2025-01-17T07:54:02.133-06:002025-01-17T07:54:02.133-06:00
\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "1817 6242932598575984145-242ac118-0001-012 rgarcia \n", + "1818 2755888095932968465-242ac118-0001-012 rgarciav \n", + "1819 5481029658171207185-242ac118-0001-012 erichardson \n", + "1820 4458895817601248785-242ac118-0001-012 samwol \n", + "1821 9076859566261923345-242ac118-0001-012 chrisjames1992 \n", + "\n", + " firstName lastName \\\n", + "1817 Rodrigo García Valiente \n", + "1818 Rodrigo García Valiente \n", + "1819 Eve Richardson \n", + "1820 \n", + "1821 Chinweike Christopher Udoye \n", + "\n", + " email city state \\\n", + "1817 r.garciavaliente@amsterdamumc.nl Amsterdam \n", + "1818 r.garciavaliente@amsterdamumc.nl \n", + "1819 erichardson@lji.org San Diego California \n", + "1820 samuel.wollenburg@utsouthwestern.edu \n", + "1821 chinweikechristopher.udoye@uksh.de Lübeck Schleswig-Holstein \n", + "\n", + " country created \\\n", + "1817 Netherlands 2025-01-02T11:11:52.894-06:00 \n", + "1818 2025-01-02T11:41:52.070-06:00 \n", + "1819 United States 2025-01-07T18:01:25.657-06:00 \n", + "1820 2025-01-07T20:24:59.390-06:00 \n", + "1821 Germany 2025-01-17T07:54:02.133-06:00 \n", + "\n", + " lastUpdated \n", + "1817 2025-01-02T11:11:52.894-06:00 \n", + "1818 2025-01-02T11:41:52.070-06:00 \n", + "1819 2025-01-07T18:01:25.657-06:00 \n", + "1820 2025-01-07T20:24:59.390-06:00 \n", + "1821 2025-01-17T07:54:02.133-06:00 " + ] + }, + "execution_count": 284, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "profile_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'profile':\n", + " uuid = item.get('uuid', None)\n", + " owner = item.get('owner', None)\n", + " first_name = item.get('value', {}).get('firstName', None)\n", + " last_name = item.get('value', {}).get('lastName', None)\n", + " email = item.get('value', {}).get('email', None)\n", + " city = item.get('value', {}).get('city', None)\n", + " state = item.get('value', {}).get('state', None)\n", + " country = item.get('value', {}).get('country', None)\n", + " created = item.get('created', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " # Append the extracted data as a dictionary to the list\n", + " profile_list.append({\n", + " 'uuid': uuid,\n", + " 'owner': owner,\n", + " 'firstName': first_name,\n", + " 'lastName': last_name,\n", + " 'email': email,\n", + " 'city': city,\n", + " 'state': state,\n", + " 'country': country,\n", + " 'created': created,\n", + " 'lastUpdated': last_updated\n", + " })\n", + "# Create a DataFrame from the list of extracted data\n", + "df_profile = pd.DataFrame(profile_list)\n", + "# Print the DataFrame\n", + "df_profile.tail()" + ] + }, + { + "cell_type": "markdown", + "id": "f6c12579-3b84-44f1-971b-be731e983477", + "metadata": {}, + "source": [ + "## Look at UserVerification Data\n", + "- Contains Username\n", + "- Contains if the user is verified or not" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f6eb5a9d-c6c8-4b3a-9858-4769313ac85b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Number of User: (1881, 4)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidusernameisVerifiedlast_updated
18766215315958862704145-242ac118-0001-012rgarciaFalse2025-01-02T11:11:53.537-06:00
18772740039666610728465-242ac118-0001-012rgarciavFalse2025-01-02T11:41:52.439-06:00
18785464751732119367185-242ac118-0001-012erichardsonTrue2025-01-07T18:01:47.373-06:00
18794443820482392288785-242ac118-0001-012samwolTrue2025-01-23T11:35:37.964-06:00
18809055857176184483345-242ac118-0001-012chrisjames1992False2025-01-17T07:54:02.622-06:00
\n", + "
" + ], + "text/plain": [ + " uuid username isVerified \\\n", + "1876 6215315958862704145-242ac118-0001-012 rgarcia False \n", + "1877 2740039666610728465-242ac118-0001-012 rgarciav False \n", + "1878 5464751732119367185-242ac118-0001-012 erichardson True \n", + "1879 4443820482392288785-242ac118-0001-012 samwol True \n", + "1880 9055857176184483345-242ac118-0001-012 chrisjames1992 False \n", + "\n", + " last_updated \n", + "1876 2025-01-02T11:11:53.537-06:00 \n", + "1877 2025-01-02T11:41:52.439-06:00 \n", + "1878 2025-01-07T18:01:47.373-06:00 \n", + "1879 2025-01-23T11:35:37.964-06:00 \n", + "1880 2025-01-17T07:54:02.622-06:00 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "userVerification_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'userVerification':\n", + " uuid = item.get('uuid', None) # Use .get() to avoid KeyError if 'uuid' is missing\n", + " username = item.get('value', {}).get('username', None) # Check if 'username' exists in 'value'\n", + " is_verified = item.get('value', {}).get('isVerified', None) # Check if 'isVerified' exists in 'value'\n", + " last_updated = item.get('lastUpdated', None)\n", + " # Append the extracted data as a dictionary to the list\n", + " userVerification_list.append({\n", + " 'uuid': uuid,\n", + " 'username': username,\n", + " 'isVerified': is_verified,\n", + " 'last_updated': last_updated\n", + " })\n", + "# Create a DataFrame from the list of extracted data\n", + "df_userVerification = pd.DataFrame(userVerification_list)\n", + "# Print the DataFrame\n", + "print(f\"Total Number of User: {df_userVerification.shape}\")\n", + "df_userVerification.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "789b01d5-1cfd-4965-8b7b-cda466f1f9ff", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "4fbdbfb5-07f3-40ae-b7ef-1c225981b12d", + "metadata": {}, + "source": [ + "## Look at ProjectFile data\n", + " - Contains only ProjectUUID\n", + " - Contains file upload information for the project\n", + " - Contains P" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6fe3ca43-a34e-417a-afa8-fb1612094e3c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
359435338423137409494545-242ac118-0001-0125456400192359305711-242ac118-0001-0126793987554023894545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R1_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359441335427718191574545-242ac118-0001-0125456400192359305711-242ac118-0001-0122833383462017494545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R2_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359451840700597200490991-242ac118-0001-0125456400192359305711-242ac118-0001-012366925519251050991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R1_001.fastq.gzNone2025-01-13T16:40:43.277-06:00
359465023614960920170991-242ac118-0001-0125456400192359305711-242ac118-0001-0123549539235260010991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R2_001.fastq.gzNone2025-01-13T16:40:43.281-06:00
359477830832104257678865-242ac118-0001-0125456400192359305711-242ac118-0001-0128017190735231118865-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneprimers.fastaNone2025-01-13T16:41:49.035-06:00
\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "35943 5338423137409494545-242ac118-0001-012 \n", + "35944 1335427718191574545-242ac118-0001-012 \n", + "35945 1840700597200490991-242ac118-0001-012 \n", + "35946 5023614960920170991-242ac118-0001-012 \n", + "35947 7830832104257678865-242ac118-0001-012 \n", + "\n", + " projectUuid \\\n", + "35943 5456400192359305711-242ac118-0001-012 \n", + "35944 5456400192359305711-242ac118-0001-012 \n", + "35945 5456400192359305711-242ac118-0001-012 \n", + "35946 5456400192359305711-242ac118-0001-012 \n", + "35947 5456400192359305711-242ac118-0001-012 \n", + "\n", + " associationIds_1 \\\n", + "35943 6793987554023894545-242ac112-0001-002 \n", + "35944 2833383462017494545-242ac112-0001-002 \n", + "35945 366925519251050991-242ac112-0001-002 \n", + "35946 3549539235260010991-242ac112-0001-002 \n", + "35947 8017190735231118865-242ac112-0001-002 \n", + "\n", + " associationIds_2 owner task_type \\\n", + "35943 5456400192359305711-242ac118-0001-012 vdj None \n", + "35944 5456400192359305711-242ac118-0001-012 vdj None \n", + "35945 5456400192359305711-242ac118-0001-012 vdj None \n", + "35946 5456400192359305711-242ac118-0001-012 vdj None \n", + "35947 5456400192359305711-242ac118-0001-012 vdj None \n", + "\n", + " file_name mimeType last_updated \n", + "35943 4468_S24_L001_R1_001.fastq.gz None 2025-01-13T16:40:40.230-06:00 \n", + "35944 4468_S24_L001_R2_001.fastq.gz None 2025-01-13T16:40:40.230-06:00 \n", + "35945 6634_S25_L001_R1_001.fastq.gz None 2025-01-13T16:40:43.277-06:00 \n", + "35946 6634_S25_L001_R2_001.fastq.gz None 2025-01-13T16:40:43.281-06:00 \n", + "35947 primers.fasta None 2025-01-13T16:41:49.035-06:00 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "projectFiles_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'projectFile':\n", + " uuid = item.get('uuid', None)\n", + " associationIds = item.get('associationIds', None)\n", + " projectUuid = item.get('value', {}).get('projectUuid', None)\n", + " owner = item.get('owner', None)\n", + " task_type= item.get('value', {}).get('type', None)\n", + " file_name = item.get('value', {}).get('name', None)\n", + " mimeType = item.get('value', {}).get('mimeType', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " if associationIds:\n", + " associationIds_1 = associationIds[0]\n", + " if len(associationIds)>1:\n", + " associationIds_2 = associationIds[1]\n", + " if len(associationIds) > 2:\n", + " print(\"Length associationIds: \", len(associationIds))\n", + " else:\n", + " associationIds_2 = None\n", + " else:\n", + " associationIds_1 = None\n", + " associationIds_2 = None\n", + " # Append the extracted data as a dictionary to the list\n", + " projectFiles_list.append({\n", + " 'uuid': uuid,\n", + " 'projectUuid': projectUuid,\n", + " 'associationIds_1': associationIds_1,\n", + " 'associationIds_2': associationIds_2,\n", + " 'owner': owner,\n", + " 'task_type': task_type,\n", + " 'file_name': file_name,\n", + " 'mimeType': mimeType,\n", + " 'last_updated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_projectFiles = pd.DataFrame(projectFiles_list)\n", + "# Print the DataFrame\n", + "df_projectFiles.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f354c691-73aa-4124-af82-e86a0b34be15", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "owner\n", + "vdj 31713\n", + "schristley 1250\n", + "wscarbor 891\n", + "wrounds 517\n", + "esalina 416\n", + " ... \n", + "mrojas 1\n", + "rytis 1\n", + "randocalrissian 1\n", + "hrhinn 1\n", + "xmr5148 1\n", + "Name: count, Length: 102, dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Keeping only projectfiles if the are owned by vdj and schristley\n", + "# df_projectFiles = df_projectFiles[df_projectFiles.owner.isin(['vdj', 'schristley'])]\n", + "df_projectFiles.owner.value_counts()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6d77cdcc-71ff-49dd-ba18-ab742eff954b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
187962428928387311529495-242ac119-0001-0127058216970366620136-242ac11e-0001-0124248491282261929495-242ac112-0001-0027058216970366620136-242ac11e-0001-012vdjNone18M.fastaNone2019-02-15T16:20:15.208-06:00
21542646759332934512150-242ac116-0001-0123276777473314001386-242ac116-0001-0122075952650351472150-242ac113-0001-0023276777473314001386-242ac116-0001-012vdjNone18M.fastaNone2020-01-17T10:25:24.813-06:00
\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "18796 2428928387311529495-242ac119-0001-012 \n", + "21542 646759332934512150-242ac116-0001-012 \n", + "\n", + " projectUuid \\\n", + "18796 7058216970366620136-242ac11e-0001-012 \n", + "21542 3276777473314001386-242ac116-0001-012 \n", + "\n", + " associationIds_1 \\\n", + "18796 4248491282261929495-242ac112-0001-002 \n", + "21542 2075952650351472150-242ac113-0001-002 \n", + "\n", + " associationIds_2 owner task_type file_name \\\n", + "18796 7058216970366620136-242ac11e-0001-012 vdj None 18M.fasta \n", + "21542 3276777473314001386-242ac116-0001-012 vdj None 18M.fasta \n", + "\n", + " mimeType last_updated \n", + "18796 None 2019-02-15T16:20:15.208-06:00 \n", + "21542 None 2020-01-17T10:25:24.813-06:00 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_projectFiles[df_projectFiles.file_name == '18M.fasta']" + ] + }, + { + "cell_type": "markdown", + "id": "1cba69fe-e100-4ace-a9f8-3c5811125ee9", + "metadata": {}, + "source": [ + "## Look at projectJob data\n", + " - Contains ProjectUUID and JobUUID" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "id": "daa652ea-e720-410e-82de-9d3a56aba5d8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerprojectUuidjobUuidlastUpdated
00001400192074855-5056a550b8-0001-012vdj0001399309581559-5056a550b8-0001-0120001399315558601-5056a550b8-0001-0072014-05-15T17:14:34.855-05:00
10001400254373114-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400254372814-5056a550b8-0001-0072014-05-16T10:32:53.114-05:00
20001400273862423-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400273862119-5056a550b8-0001-0072014-05-16T15:57:42.423-05:00
30001400274448495-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400274448320-5056a550b8-0001-0072014-05-16T16:07:28.494-05:00
40001400274714655-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400274714490-5056a550b8-0001-0072014-05-16T16:11:54.655-05:00
\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "0 0001400192074855-5056a550b8-0001-012 vdj \n", + "1 0001400254373114-5056a550b8-0001-012 vdj \n", + "2 0001400273862423-5056a550b8-0001-012 vdj \n", + "3 0001400274448495-5056a550b8-0001-012 vdj \n", + "4 0001400274714655-5056a550b8-0001-012 vdj \n", + "\n", + " projectUuid jobUuid \\\n", + "0 0001399309581559-5056a550b8-0001-012 0001399315558601-5056a550b8-0001-007 \n", + "1 0001400250478554-5056a550b8-0001-012 0001400254372814-5056a550b8-0001-007 \n", + "2 0001400250478554-5056a550b8-0001-012 0001400273862119-5056a550b8-0001-007 \n", + "3 0001400250478554-5056a550b8-0001-012 0001400274448320-5056a550b8-0001-007 \n", + "4 0001400250478554-5056a550b8-0001-012 0001400274714490-5056a550b8-0001-007 \n", + "\n", + " lastUpdated \n", + "0 2014-05-15T17:14:34.855-05:00 \n", + "1 2014-05-16T10:32:53.114-05:00 \n", + "2 2014-05-16T15:57:42.423-05:00 \n", + "3 2014-05-16T16:07:28.494-05:00 \n", + "4 2014-05-16T16:11:54.655-05:00 " + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "projectJob_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'projectJob':\n", + " # json_print(item)\n", + " uuid = item.get('uuid', None)\n", + " owner = item.get('owner', None)\n", + " projectUuid = item.get('value', {}).get('projectUuid', None)\n", + " jobUuid = item.get('value', {}).get('jobUuid', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " projectJob_list.append({\n", + " 'uuid': uuid,\n", + " 'owner': owner,\n", + " 'projectUuid': projectUuid,\n", + " 'jobUuid': jobUuid,\n", + " 'lastUpdated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_projectJob = pd.DataFrame(projectJob_list)\n", + "df_projectJob.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d6b829b6-080b-4a69-9f3f-662fc2cfd36f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerprojectUuidjobUuidlastUpdated
3524360484783884014056-242ac11e-0001-012vdj7058216970366620136-242ac11e-0001-012217677121292014056-242ac11b-0001-0072018-04-17T10:50:14.832-05:00
35255644951932076289560-242ac11e-0001-012vdj7058216970366620136-242ac11e-0001-0125814903787979009560-242ac11b-0001-0072018-04-17T13:54:01.922-05:00
36224704510086896283160-242ac11e-0001-012vdj7058216970366620136-242ac11e-0001-0124916552622299803160-242ac11b-0001-0072018-05-30T16:16:40.464-05:00
38408974371275809231336-242ac11e-0001-012vdj7058216970366620136-242ac11e-0001-0128802787332334031336-242ac11b-0001-0072018-07-16T11:22:51.666-05:00
3841185958136163013096-242ac11e-0001-012vdj7058216970366620136-242ac11e-0001-0120029001919173096-242ac11b-0001-0072018-07-16T11:40:55.535-05:00
4225753010572288070121-242ac119-0001-012vdj7058216970366620136-242ac11e-0001-012592378795417670121-242ac11c-0001-0072019-02-27T16:40:20.145-06:00
42268331554514421092841-242ac119-0001-012vdj7058216970366620136-242ac11e-0001-0128144122141623652841-242ac11c-0001-0072019-02-27T16:31:40.346-06:00
\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "3524 360484783884014056-242ac11e-0001-012 vdj \n", + "3525 5644951932076289560-242ac11e-0001-012 vdj \n", + "3622 4704510086896283160-242ac11e-0001-012 vdj \n", + "3840 8974371275809231336-242ac11e-0001-012 vdj \n", + "3841 185958136163013096-242ac11e-0001-012 vdj \n", + "4225 753010572288070121-242ac119-0001-012 vdj \n", + "4226 8331554514421092841-242ac119-0001-012 vdj \n", + "\n", + " projectUuid \\\n", + "3524 7058216970366620136-242ac11e-0001-012 \n", + "3525 7058216970366620136-242ac11e-0001-012 \n", + "3622 7058216970366620136-242ac11e-0001-012 \n", + "3840 7058216970366620136-242ac11e-0001-012 \n", + "3841 7058216970366620136-242ac11e-0001-012 \n", + "4225 7058216970366620136-242ac11e-0001-012 \n", + "4226 7058216970366620136-242ac11e-0001-012 \n", + "\n", + " jobUuid lastUpdated \n", + "3524 217677121292014056-242ac11b-0001-007 2018-04-17T10:50:14.832-05:00 \n", + "3525 5814903787979009560-242ac11b-0001-007 2018-04-17T13:54:01.922-05:00 \n", + "3622 4916552622299803160-242ac11b-0001-007 2018-05-30T16:16:40.464-05:00 \n", + "3840 8802787332334031336-242ac11b-0001-007 2018-07-16T11:22:51.666-05:00 \n", + "3841 0029001919173096-242ac11b-0001-007 2018-07-16T11:40:55.535-05:00 \n", + "4225 592378795417670121-242ac11c-0001-007 2019-02-27T16:40:20.145-06:00 \n", + "4226 8144122141623652841-242ac11c-0001-007 2019-02-27T16:31:40.346-06:00 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_projectJob[df_projectJob.projectUuid == '7058216970366620136-242ac11e-0001-012']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "96e0abf6-2f1a-4fd5-8388-1ff55fcefcc1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/projects/7058216970366620136-242ac11e-0001-012/analyses/2018-07-16-16-21-59-86-my-job-16-jul-2018-11:21:45-am'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "job_uuid = '8802787332334031336-242ac11b-0001-007'\n", + "jobs_all_df[jobs_all_df.uuid == job_uuid].iloc[0].archive_path" + ] + }, + { + "cell_type": "markdown", + "id": "05986f61-ab74-4749-aa8d-734b06242b2f", + "metadata": {}, + "source": [ + "## Look at ProjectJObFILE \n", + "- Contains both Project UUID and JOBUUID\n", + "- file-type\n", + "- filename\n", + "- if it is deleted " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3464cc27-823d-4de8-b0c6-15d7ac733342", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ownerprojectUuidjobUuidfileTypefile_namefile_lengthisDeletedlastUpdated
474031vdj5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-0076UTSW33_S42_L001_R1_001.fastq.merged.unique.igb...194102False2025-01-25T09:44:27.029-06:00
474032vdj5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-0076UTSW33_S42_L001_R1_001.fastq.merged.unique.igb...4178241False2025-01-25T09:44:27.318-06:00
474033vdj5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-0076UTSW33_S42_L001_R1_001.fastq.merged.unique.igb...10699728False2025-01-25T09:44:27.561-06:00
474034vdj5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-0076UTSW33_S42_L001_R1_001.fastq.merged.unique.igb...1068987False2025-01-25T09:44:27.827-06:00
474035vdj5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-0070vdjserver_germline.airr.json3456292False2025-01-25T09:44:28.085-06:00
\n", + "
" + ], + "text/plain": [ + " owner projectUuid \\\n", + "474031 vdj 5456400192359305711-242ac118-0001-012 \n", + "474032 vdj 5456400192359305711-242ac118-0001-012 \n", + "474033 vdj 5456400192359305711-242ac118-0001-012 \n", + "474034 vdj 5456400192359305711-242ac118-0001-012 \n", + "474035 vdj 5456400192359305711-242ac118-0001-012 \n", + "\n", + " jobUuid fileType \\\n", + "474031 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 6 \n", + "474032 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 6 \n", + "474033 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 6 \n", + "474034 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 6 \n", + "474035 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 0 \n", + "\n", + " file_name file_length \\\n", + "474031 UTSW33_S42_L001_R1_001.fastq.merged.unique.igb... 194102 \n", + "474032 UTSW33_S42_L001_R1_001.fastq.merged.unique.igb... 4178241 \n", + "474033 UTSW33_S42_L001_R1_001.fastq.merged.unique.igb... 10699728 \n", + "474034 UTSW33_S42_L001_R1_001.fastq.merged.unique.igb... 1068987 \n", + "474035 vdjserver_germline.airr.json 3456292 \n", + "\n", + " isDeleted lastUpdated \n", + "474031 False 2025-01-25T09:44:27.029-06:00 \n", + "474032 False 2025-01-25T09:44:27.318-06:00 \n", + "474033 False 2025-01-25T09:44:27.561-06:00 \n", + "474034 False 2025-01-25T09:44:27.827-06:00 \n", + "474035 False 2025-01-25T09:44:28.085-06:00 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "projectJobFile_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'projectJobFile':\n", + " owner = item.get('owner', None)\n", + " projectUuid = item.get('value', {}).get('projectUuid', None)\n", + " jobUuid = item.get('value', {}).get('jobUuid', None)\n", + " fileType = item.get('value', {}).get('fileType', None)\n", + " file_name = item.get('value', {}).get('name', None)\n", + " length = item.get('value', {}).get('length', None)\n", + " isDeleted = item.get('value', {}).get('isDeleted', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " projectJobFile_list.append({\n", + " 'owner': owner,\n", + " 'projectUuid': projectUuid,\n", + " 'jobUuid': jobUuid,\n", + " 'fileType': fileType,\n", + " 'file_name': file_name,\n", + " 'file_length': length,\n", + " 'isDeleted': isDeleted,\n", + " 'lastUpdated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_projectJobFile = pd.DataFrame(projectJobFile_list)\n", + "df_projectJobFile.tail()\n", + "\n", + "# df_projectJobFile.projectUuid.value_counts()\n", + "# df_projectJobFile.jobUuid.value_counts()\n", + "# df_projectJob.projectUuid.value_counts()\n", + "# df_projectJob[df_projectJob.projectUuid.isin(df_projectJobFile.projectUuid)].projectUuid.value_counts()\n", + "# df_projectJobFile.isDeleted.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "e5eb4405-d940-4022-8910-9a50af60573a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "owner\n", + "vdj 473549\n", + "schristley 18\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Keeping only projectfiles if the are owned by vdj and schristley\n", + "df_projectJobFile = df_projectJobFile[df_projectJobFile.owner.isin(['vdj', 'schristley'])]\n", + "df_projectJobFile.owner.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "08d217e1-c529-465d-9c5b-daff664aca86", + "metadata": {}, + "source": [ + "## Look at Subject Metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "id": "2eba901e-f2b8-445c-8fd0-d0c1dfe787d6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerprojectUuidlastUpdated
06360709198094930406-242ac11c-0001-012vdj8241732730493932006-242ac11f-0001-0122016-11-22T13:31:42.578-06:00
16360709202389897702-242ac11c-0001-012vdj8241732730493932006-242ac11f-0001-0122016-11-22T13:31:41.522-06:00
26360923946459730406-242ac11c-0001-012vdj8241732730493932006-242ac11f-0001-0122016-11-22T13:31:40.441-06:00
36361482292208210406-242ac11c-0001-012vdj8241732730493932006-242ac11f-0001-0122017-01-03T13:48:30.111-06:00
41158355700316040730-242ac11c-0001-012demo2016859535379303755290-242ac114-0001-0122018-05-03T10:24:22.094-05:00
...............
54193547623744646082065-242ac118-0001-012vdj-test15139818080333393425-242ac118-0001-0122024-05-21T13:41:59.791-05:00
54204406571231448928751-242ac118-0001-012loubna_boutkhil2377764881775717905-242ac118-0001-0122024-06-01T04:20:51.010-05:00
54214487101868248928751-242ac118-0001-012loubna_boutkhil2377764881775717905-242ac118-0001-0122024-06-01T04:20:52.885-05:00
54224554747603160928751-242ac118-0001-012loubna_boutkhil2377764881775717905-242ac118-0001-0122024-06-01T04:20:54.461-05:00
54234621276646575968751-242ac118-0001-012loubna_boutkhil2377764881775717905-242ac118-0001-0122024-06-01T04:20:56.010-05:00
\n", + "

5424 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "0 6360709198094930406-242ac11c-0001-012 vdj \n", + "1 6360709202389897702-242ac11c-0001-012 vdj \n", + "2 6360923946459730406-242ac11c-0001-012 vdj \n", + "3 6361482292208210406-242ac11c-0001-012 vdj \n", + "4 1158355700316040730-242ac11c-0001-012 demo2016 \n", + "... ... ... \n", + "5419 3547623744646082065-242ac118-0001-012 vdj-test1 \n", + "5420 4406571231448928751-242ac118-0001-012 loubna_boutkhil \n", + "5421 4487101868248928751-242ac118-0001-012 loubna_boutkhil \n", + "5422 4554747603160928751-242ac118-0001-012 loubna_boutkhil \n", + "5423 4621276646575968751-242ac118-0001-012 loubna_boutkhil \n", + "\n", + " projectUuid lastUpdated \n", + "0 8241732730493932006-242ac11f-0001-012 2016-11-22T13:31:42.578-06:00 \n", + "1 8241732730493932006-242ac11f-0001-012 2016-11-22T13:31:41.522-06:00 \n", + "2 8241732730493932006-242ac11f-0001-012 2016-11-22T13:31:40.441-06:00 \n", + "3 8241732730493932006-242ac11f-0001-012 2017-01-03T13:48:30.111-06:00 \n", + "4 859535379303755290-242ac114-0001-012 2018-05-03T10:24:22.094-05:00 \n", + "... ... ... \n", + "5419 5139818080333393425-242ac118-0001-012 2024-05-21T13:41:59.791-05:00 \n", + "5420 2377764881775717905-242ac118-0001-012 2024-06-01T04:20:51.010-05:00 \n", + "5421 2377764881775717905-242ac118-0001-012 2024-06-01T04:20:52.885-05:00 \n", + "5422 2377764881775717905-242ac118-0001-012 2024-06-01T04:20:54.461-05:00 \n", + "5423 2377764881775717905-242ac118-0001-012 2024-06-01T04:20:56.010-05:00 \n", + "\n", + "[5424 rows x 4 columns]" + ] + }, + "execution_count": 183, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "subject_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'subject':\n", + " # json_print(item)\n", + " uuid = item.get('uuid', None)\n", + " owner = item.get('owner', None)\n", + " associationIds = item.get('associationIds', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " if associationIds:\n", + " associationIds_1 = associationIds[0] \n", + " else:\n", + " associationIds_1 = None\n", + " subject_list.append({\n", + " 'uuid': uuid,\n", + " 'owner': owner,\n", + " 'projectUuid': associationIds_1,\n", + " 'lastUpdated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_subjects = pd.DataFrame(subject_list)\n", + "df_subjects" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "id": "da31b42c-05f9-468c-812f-4919a85c2ddf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "382" + ] + }, + "execution_count": 187, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_subjects['projectUuid'].nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 189, + "id": "6e85c0fe-8235-4f25-9a75-505db5b3834c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "owner projectUuid \n", + "vdj 1346418785674727916-242ac117-0001-012 1165\n", + " 7405514755678596630-242ac113-0001-012 786\n", + " 4465604624794643990-242ac116-0001-012 350\n", + " 8006422057040941546-242ac116-0001-012 299\n", + " 1443107289580563990-242ac116-0001-012 170\n", + " ... \n", + "vdj-test1 7808900147506376210-242ac117-0001-012 1\n", + " 7844248211544805871-242ac118-0001-012 1\n", + "za708 3747020520399891990-242ac116-0001-012 1\n", + " 3868686842054635030-242ac116-0001-012 1\n", + " 8428968221950939626-242ac116-0001-012 1\n", + "Name: count, Length: 387, dtype: int64" + ] + }, + "execution_count": 189, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_subjects[['owner', 'projectUuid']].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18ec3752-a531-416a-b793-b87933832185", + "metadata": {}, + "outputs": [], + "source": [ + "meta" + ] + }, + { + "cell_type": "markdown", + "id": "4b5fcb01-1897-40f2-aa10-e4aa83ce7bf0", + "metadata": {}, + "source": [ + "## Look at Private Project data" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c494b780-3688-4b9c-91f3-be04256069b2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"_id\": {\n", + " \"$oid\": \"5e976b3352faff00010d8956\"\n", + " },\n", + " \"uuid\": \"4727197546809323030-242ac116-0001-012\",\n", + " \"schemaId\": null,\n", + " \"internalUsername\": null,\n", + " \"associationIds\": [],\n", + " \"lastUpdated\": \"2022-12-18T01:14:56.568-06:00\",\n", + " \"name\": \"private_project\",\n", + " \"value\": {\n", + " \"study_id\": null,\n", + " \"study_title\": \"My project\",\n", + " \"study_type\": null,\n", + " \"study_description\": \"A test project\",\n", + " \"inclusion_exclusion_criteria\": null,\n", + " \"grants\": null,\n", + " \"collected_by\": null,\n", + " \"lab_name\": \"My lab\",\n", + " \"lab_address\": \"UT Southwestern Medical Center\",\n", + " \"submitted_by\": null,\n", + " \"pub_ids\": null,\n", + " \"keywords_study\": null,\n", + " \"owner\": \"schristley\"\n", + " },\n", + " \"created\": \"2020-04-15T15:14:43.150-05:00\",\n", + " \"owner\": \"vdj\",\n", + " \"tenantId\": \"vdjserver.org\"\n", + "}\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "private_project_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'private_project':\n", + " json_print(item)\n", + " break\n", + " uuid = item.get('uuid', None)\n", + " study_title = item.get('value', {}).get('study_title', None)\n", + " job_owner = item.get('value', {}).get('owner', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " private_project_list.append({\n", + " 'uuid': uuid,\n", + " 'job_owner': job_owner,\n", + " 'study_title': study_title,\n", + " 'lastUpdated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_private_project = pd.DataFrame(private_project_list)\n", + "df_private_project" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "de177b95-a028-4ca1-9870-56cb2c8ff915", + "metadata": {}, + "outputs": [], + "source": [ + "# df_private_project.to_csv('private_projects.csv', index = False)" + ] + }, + { + "cell_type": "markdown", + "id": "3675c13c-9cb2-42e0-b114-8b57759adbd7", + "metadata": {}, + "source": [ + "## Look at Public Project Data" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "id": "af15fcd7-547e-427d-addd-b01a61bfa02a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidstudy_idassociationIdsstudy_titlelastUpdated
04505707319090933270-242ac113-0001-0124505707319090933270-242ac113-0001-012[]Outcome and Immune Correlates of a Phase II Tr...2022-12-18T01:14:48.159-06:00
12034535426280329706-242ac113-0001-012PRJNA300878[]Individual heritable differences result in uni...2022-12-18T01:14:48.306-06:00
25350423756993719830-242ac113-0001-0121371444213709729305-242ac11c-0001-012[]T cell receptor repertoires after adoptive tra...2022-12-18T01:14:48.028-06:00
31570295022599213546-242ac113-0001-0123276777473314001386-242ac116-0001-012[]Biophysicochemical Motifs in T cell Receptor S...2022-12-18T01:14:47.900-06:00
454655627105407466-242ac113-0001-012PRJNA248475[]B cells populating the multiple sclerosis brai...2022-12-18T01:14:48.453-06:00
\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "0 4505707319090933270-242ac113-0001-012 \n", + "1 2034535426280329706-242ac113-0001-012 \n", + "2 5350423756993719830-242ac113-0001-012 \n", + "3 1570295022599213546-242ac113-0001-012 \n", + "4 54655627105407466-242ac113-0001-012 \n", + "\n", + " study_id associationIds \\\n", + "0 4505707319090933270-242ac113-0001-012 [] \n", + "1 PRJNA300878 [] \n", + "2 1371444213709729305-242ac11c-0001-012 [] \n", + "3 3276777473314001386-242ac116-0001-012 [] \n", + "4 PRJNA248475 [] \n", + "\n", + " study_title \\\n", + "0 Outcome and Immune Correlates of a Phase II Tr... \n", + "1 Individual heritable differences result in uni... \n", + "2 T cell receptor repertoires after adoptive tra... \n", + "3 Biophysicochemical Motifs in T cell Receptor S... \n", + "4 B cells populating the multiple sclerosis brai... \n", + "\n", + " lastUpdated \n", + "0 2022-12-18T01:14:48.159-06:00 \n", + "1 2022-12-18T01:14:48.306-06:00 \n", + "2 2022-12-18T01:14:48.028-06:00 \n", + "3 2022-12-18T01:14:47.900-06:00 \n", + "4 2022-12-18T01:14:48.453-06:00 " + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "public_project_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'public_project':\n", + " # json_print(item)\n", + " # break\n", + " uuid = item.get('uuid', None)\n", + " study_id = item.get('value', {}).get('study_id', None)\n", + " study_title = item.get('value', {}).get('study_title', None)\n", + " associationIds = item.get('associationIds', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " public_project_list.append({\n", + " 'uuid': uuid,\n", + " 'study_id': study_id,\n", + " 'associationIds': associationIds,\n", + " 'study_title': study_title,\n", + " 'lastUpdated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_public_project = pd.DataFrame(public_project_list)\n", + "df_public_project.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "71cdba7e-cfdd-4c4a-9bd9-19f390592d12", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'_id': {'$oid': '6119d12ad6018000016d697a'}, 'uuid': '1002552565004824085-242ac117-0001-012', 'schemaId': None, 'internalUsername': None, 'associationIds': [], 'lastUpdated': '2022-12-18T01:14:49.952-06:00', 'name': 'public_project', 'value': {'study_id': 'PRJNA624801', 'study_title': 'A Potently Neutralizing Antibody Protects Mice against SARS-CoV-2 Infection', 'study_type': {'id': 'NCIT:C93130', 'label': 'Animal Study'}, 'study_description': 'Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) is responsible for millions of infections and hundreds of thousands of deaths globally. There are no widely available licensed therapeutics against SARS-CoV-2, highlighting an urgent need for effective interventions. The virus enters host cells through binding of a receptor-binding domain within its trimeric spike glycoprotein to human angiotensin-converting enzyme 2. In this article, we describe the generation and characterization of a panel of murine mAbs directed against the receptor-binding domain. One mAb, 2B04, neutralized wild-type SARS-CoV-2 in vitro with remarkable potency (half-maximal inhibitory concentration of <2 ng/ml). In a murine model of SARS-CoV-2 infection, 2B04 protected challenged animals from weight loss, reduced lung viral load, and blocked systemic dissemination. Thus, 2B04 is a promising candidate for an effective antiviral that can be used to prevent SARS-CoV-2 infection.', 'inclusion_exclusion_criteria': ' ', 'lab_name': 'Ali H. Ellebedy', 'lab_address': 'Department of Pathology and Immunology, St. Louis, MO, 63110; USA', 'submitted_by': 'Scott Christley, scott.christley@utsouthwestern.edu, UT Southwestern Medical Center', 'collected_by': 'Wafaa B. Alsoussi, Department of Pathology and Immunology, St. Louis, MO, 63110; USA', 'grants': 'Work in Ellebedy laboratory was supported by NIAID R21 AI139813, U01 AI141990, and NIAID Centers of Excellence for Influenza Research and Surveillance (CEIRS) contract HHSN272201400008C. Work in the Diamond laboratory was partially supported by was supported by NIH contracts and grants 75N93019C00062 and R01 AI127828 and the Defense Advanced Research Project Agency HR001117S0019. Work in the Fremont laboratory was partially supported by NIAID contracts HHSN272201700060C and 75N93019C00062. Work in the Kleinstein laboratory was partially supported by NIH R01AI104739. Work in the Krammer laboratory was partially supported by the NIAID CEIRS contract HHSN272201400008C and Collaborative Influenza Vaccine Innovation Centers contract 75N93019C00051. The Genome Technology Access Center in the Department of Genetics at Washington University School of Medicine is partially supported by NCI Cancer Center Support Grant #P30 CA91842 to the Siteman Cancer Center and by ICTS/CTSA Grant# UL1 TR000448 from the NCRR. JST was supported by NIAID 5T32CA009547. JBC was supported by a Helen Hay Whitney postdoctoral fellowship.', 'pub_ids': 'PMID: 32591393', 'keywords_study': ['contains_ig', 'contains_paired_chain'], 'showArchivedJobs': False, 'vdjserver_keywords': ['is_10x_genomics', 'contains_single_cell'], 'owner': 'scott_public', 'adc_publish_date': '2021-08-26T22:48:15.506Z', 'adc_update_date': '2022-12-16T20:41:47.563Z'}, 'created': '2021-08-15T21:44:58.300-05:00', 'owner': 'vdj', 'tenantId': 'vdjserver.org'}\n" + ] + } + ], + "source": [ + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if '1002552565004824085-242ac117-0001-012' == item.get('uuid', None):\n", + " print(item)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "730814f0-28e3-4c30-97c4-9dff4cc3b655", + "metadata": {}, + "outputs": [], + "source": [ + "# col_list = ['uuid', 'owner', 'associationIds', 'created', 'lastUpdated', 'name', 'value']\n", + "# obj_list = {}\n", + "# project_uuid = '1874419312501190165-242ac117-0001-012'\n", + "# data_dir = 'Metadata_public_project/' \n", + "\n", + "# # Open a file in write mode\n", + "# with open(f'{data_dir}{project_uuid}_metadata.json', 'w') as file:\n", + "# for item in jsonarray:\n", + "# item_type = item['name']\n", + "# # json_print(item)\n", + "# if project_uuid in item.get('uuid', None) and item_type == 'public_project':\n", + "# obj = {}\n", + "# for col_name in col_list:\n", + "# obj[col_name] = item.get(col_name, None)\n", + "# # print(obj)\n", + "# json.dump(obj, file)\n", + "# file.write('\\n') # Add a newline after each JSON object\n", + " \n", + "# if project_uuid in item.get('associationIds', None):\n", + "# # json_print(item)\n", + "# obj = {}\n", + "# for col_name in col_list:\n", + "# obj[col_name] = item.get(col_name, None)\n", + "# # print(obj)\n", + "# json.dump(obj, file)\n", + "# file.write('\\n') # Add a newline after each JSON object\n", + " \n", + " \n", + " \n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "46d499c8-0cd4-4017-9db2-567d4f83528d", + "metadata": {}, + "source": [ + "## Look for ProjectFiles using ProjectUUID" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "678a29de-b89e-4cb6-bbca-1b73892b2e15", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
00001395955349445-5056a550b8-0001-012NoneNonevdjauthNoneNone2014-03-27T16:22:29.444-05:00
10001396029083309-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395939852441-5056a550b8-0001-002Nonevdjauthuploadedgitprep-latest.zipapplication/zip2014-03-28T12:51:23.309-05:00
20001396029805022-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395956517022-5056a550b8-0001-002NonevdjauthuploadedInduction-28.zipapplication/zip2014-03-28T13:03:25.022-05:00
30001396030144907-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396030144691-5056a550b8-0001-002Nonevdjauthuploadedtest10.txttext/plain2014-03-28T13:09:04.907-05:00
40001396039988083-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396039987794-5056a550b8-0001-002Nonevdjauthuploadedtest11.txttext/plain2014-03-28T15:53:08.083-05:00
\n", + "
" + ], + "text/plain": [ + " uuid projectUuid \\\n", + "0 0001395955349445-5056a550b8-0001-012 \n", + "1 0001396029083309-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "2 0001396029805022-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "3 0001396030144907-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "4 0001396039988083-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "\n", + " associationIds_1 associationIds_2 owner task_type \\\n", + "0 None None vdjauth \n", + "1 0001395939852441-5056a550b8-0001-002 None vdjauth uploaded \n", + "2 0001395956517022-5056a550b8-0001-002 None vdjauth uploaded \n", + "3 0001396030144691-5056a550b8-0001-002 None vdjauth uploaded \n", + "4 0001396039987794-5056a550b8-0001-002 None vdjauth uploaded \n", + "\n", + " file_name mimeType last_updated \n", + "0 None None 2014-03-27T16:22:29.444-05:00 \n", + "1 gitprep-latest.zip application/zip 2014-03-28T12:51:23.309-05:00 \n", + "2 Induction-28.zip application/zip 2014-03-28T13:03:25.022-05:00 \n", + "3 test10.txt text/plain 2014-03-28T13:09:04.907-05:00 \n", + "4 test11.txt text/plain 2014-03-28T15:53:08.083-05:00 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_projectFiles.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "e63abc4a-d711-42f9-8caf-1a572cc82738", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1002552565004824085-242ac117-0001-012\n", + "1874419312501190165-242ac117-0001-012\n", + "7800490195324703211-242ac117-0001-012\n", + "8808030969725784556-242ac117-0001-012\n", + "9007962728031055380-242ac117-0001-012\n", + "5558760323211783700-242ac117-0001-012\n", + "5193259980618657300-242ac117-0001-012\n", + "4764775561909899756-242ac118-0001-012\n", + "8\n" + ] + } + ], + "source": [ + "data_df_list = []\n", + "for project_uuid in df_public_project.uuid:\n", + " data = df_projectFiles[df_projectFiles.associationIds_2 == project_uuid]\n", + " if not data.empty:\n", + " data_df_list.append(data)\n", + " print(project_uuid)\n", + " # # print(data)\n", + " # break\n", + "print(len(data_df_list))\n", + "# data_df_list" + ] + }, + { + "cell_type": "markdown", + "id": "a2c6ffff-a1f1-4ee6-b514-9d28e641753d", + "metadata": {}, + "source": [ + "## Find total number of Verified user" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "5fff6b73-d9b2-4f0f-a237-c1b59aeb6c59", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Number of user is : 1881\n", + "Total Number of verified user is : 1409\n" + ] + } + ], + "source": [ + "# Replace None (or NaN) with an empty string in the 'username' column\n", + "df_userVerification['username'] = df_userVerification['username'].fillna('not_available')\n", + "filtered_df_userVerification = df_userVerification[(~df_userVerification['username'].str.contains('test', case=False)) & (df_userVerification['isVerified'] == True)]\n", + "verified_usernames = filtered_df_userVerification.username.tolist()\n", + "print(f'Total Number of user is : {df_userVerification.username.count()}')\n", + "\n", + "print(f'Total Number of verified user is : {filtered_df_userVerification.username.count()}')\n", + "# print(f'The usernames are: {sorted(verified_usernames)}')" + ] + }, + { + "cell_type": "markdown", + "id": "771e8e4e-bac1-40a5-8dfb-5fc99e8e357d", + "metadata": {}, + "source": [ + "## Check if These Verified Users has any Project" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "ccb47021-4cbc-4410-94a4-88e0ac50db55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Number of verified user that has project : 999\n", + "Total Number of unique project : 450579\n" + ] + } + ], + "source": [ + "verified_users_with_project = metadata_perms_df[metadata_perms_df.username.isin(verified_usernames)]\n", + "verified_users_with_project.username.value_counts()\n", + "print(f'Total Number of verified user that has project : {verified_users_with_project.username.nunique()}')\n", + "print(f'Total Number of unique project : {verified_users_with_project.uuid.nunique()}')" + ] + }, + { + "cell_type": "markdown", + "id": "45876693-1652-4d19-917e-dd7b49aec5df", + "metadata": {}, + "source": [ + "## Check if These Verified Users has Uploaded any Data" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "2c6573d7-6bf1-41ae-b3c7-63bdd6fc4407", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Number of projects with file uploads: 1247\n", + "Total Number of unique users with file uploads: 572\n" + ] + } + ], + "source": [ + "file_upload_with_verified_users = df_projectFiles[df_projectFiles.projectUuid.isin(verified_users_with_project.uuid)]\n", + "file_upload_with_verified_users.owner.value_counts()\n", + "project_uuid_with_file_uploads = file_upload_with_verified_users.projectUuid.unique().tolist()\n", + "print(f'Total Number of projects with file uploads: {file_upload_with_verified_users.projectUuid.nunique()}')\n", + "## Check how many users have uploaded any files\n", + "verified_user_with_file_upload = verified_users_with_project[verified_users_with_project.uuid.isin(project_uuid_with_file_uploads)]\n", + "print(f'Total Number of unique users with file uploads: {verified_user_with_file_upload['username'].nunique()}')" + ] + }, + { + "cell_type": "markdown", + "id": "2d561bf5-3e89-4fca-bff1-57b44740dcca", + "metadata": {}, + "source": [ + "## Check Users with projectFiles if They Ran any Jobs" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "30d8ac4c-78a4-4368-848f-2e637768592f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Number of Projects that Ran Jobs: 787\n", + "Total Number of users that Ran Jobs: 347\n" + ] + } + ], + "source": [ + "job_files_with_verified_users = df_projectJobFile[df_projectJobFile.projectUuid.isin(verified_users_with_project.uuid)]\n", + "users_with_project_job_files = verified_users_with_project[verified_users_with_project.uuid.isin(job_files_with_verified_users.projectUuid)]\n", + "\n", + "print(f'Total Number of Projects that Ran Jobs: {job_files_with_verified_users['projectUuid'].nunique()}')\n", + "print(f'Total Number of users that Ran Jobs: {users_with_project_job_files['username'].nunique()}')\n", + "# job_files_with_verified_users\n", + "# users_with_project_job_files" + ] + }, + { + "cell_type": "markdown", + "id": "97c61552-30ec-47fe-a7a2-d622376064fc", + "metadata": {}, + "source": [ + "## Number of Unique Users Each Year" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ddfc69f6-92fa-439b-9492-2f423a9965b5", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Convert 'last_updated' to datetime format\n", + "df = verified_users_with_project.copy()\n", + "df['last_updated'] = pd.to_datetime(df['last_updated'])\n", + "\n", + " # Extract the year from 'last_updated'\n", + "df['year'] = df['last_updated'].dt.year\n", + "# Count the number of unique users per year\n", + "unique_users_by_year = df.groupby('year')['username'].nunique()\n", + "\n", + "# Plot the number of unique users per year\n", + "plt.figure(figsize=(8, 5))\n", + "unique_users_by_year.plot(kind='bar', color='coral')\n", + "plt.title('Number of Unique Users For Each Year')\n", + "plt.xlabel('Year')\n", + "plt.ylabel('Number of Unique Users')\n", + "plt.xticks(rotation=45)\n", + "plt.grid(True)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "83948f62-811d-4826-a543-d3e6a40354f5", + "metadata": {}, + "source": [ + "## Find Top N User That Ran Any JOb" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "d1132870-8c6f-4b7b-9fe1-b4a72ec89cca", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "df = users_with_project_job_files.copy()\n", + "# number of unique users\n", + "n = 30\n", + "# Group by 'username' and count the number of unique 'id' (projects) each user has\n", + "user_unique_projects = df.groupby('username')['uuid'].nunique()\n", + "# Sort users by the number of unique projects (descending order)\n", + "top_n_users = user_unique_projects.sort_values(ascending=False).head(n)\n", + "# Plot the top 15 users with their unique number of projects\n", + "plt.figure(figsize=(6, 10))\n", + "top_n_users.plot(kind='barh', color='royalblue')\n", + "plt.title(f'Top {n} users that ran any job')\n", + "plt.ylabel('Username')\n", + "plt.xlabel('Number of Unique Projects')\n", + "plt.xticks(rotation=0, ha='right')\n", + "plt.grid(True)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "c868b263-bab2-4edd-aba9-34ca7118e37d", + "metadata": {}, + "source": [ + "## Histogram of Number of Jobs Associated with Each User" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "42bbb4a4-6f1a-46fe-af3a-eb63723c2e23", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "df = users_with_project_job_files.copy()\n", + "# number of unique users\n", + "n = 30\n", + "# Group by 'username' and count the number of unique 'id' (projects) each user has\n", + "user_unique_projects = df.groupby('username')['uuid'].nunique()\n", + "# Sort users by the number of unique projects (descending order)\n", + "top_n_users = user_unique_projects.sort_values(ascending=False)\n", + "# Plot the top 15 users with their unique number of projects\n", + "plt.figure(figsize=(12, 6))\n", + "top_n_users.plot(kind='hist', bins = 'fd', color='royalblue')\n", + "plt.title(f'Histogram users that ran any job')\n", + "plt.ylabel('Number of User (in log scale)')\n", + "plt.xlabel('Number of Unique Projects')\n", + "plt.xticks(rotation=0, ha='right')\n", + "plt.yscale('log')\n", + "plt.grid(True)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "ce5c3dd2-299f-49fa-a3c8-361d114ab096", + "metadata": {}, + "source": [ + "## Number of Projects Associated With Each User" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "b07503e8-ed9a-48b7-9092-c03cab26818b", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Count unique users per project (uuid)\n", + "users_per_project = users_with_project_job_files.groupby('uuid')['username'].nunique()\n", + "# Step 2: Plot histogram\n", + "plt.figure(figsize=(8, 5))\n", + "bins = range(1, users_per_project.max() + 2) # +2 to include the last bin\n", + "counts, edges, patches = plt.hist(users_per_project, bins=bins, color='coral', edgecolor='black')\n", + "\n", + "# Step 3: Plot formatting\n", + "plt.title('Distribution of User Count per Project')\n", + "plt.xlabel('Number of Unique Users')\n", + "plt.ylabel('Number of Projects (in log scale)')\n", + "plt.xticks(range(1, users_per_project.max() + 1))\n", + "plt.yscale('log')\n", + "\n", + "# Step 4: Add count labels on top of each bar\n", + "for count, edge in zip(counts, edges[:-1]):\n", + " if count > 0:\n", + " plt.text(edge + 0.4, count + 1, str(int(count)), ha='center', va='bottom', fontsize=9)\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "dcb13923-3eef-4802-a421-3b9607937e8d", + "metadata": {}, + "source": [ + "## Number of Jobs Run Each Year" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "1d4d1e87-21cd-4242-aaf7-d24c41d4fdd9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of unique JobUUID: 5087\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# Convert 'last_updated' to datetime format\n", + "df = df_projectJob.copy()\n", + "df = df[df.projectUuid.isin(job_files_with_verified_users.projectUuid)]\n", + "print(f'Number of unique JobUUID: {df.jobUuid.nunique()}')\n", + "## Filter the projectUUID by verified user\n", + "\n", + "df['last_updated'] = pd.to_datetime(df['lastUpdated'], utc = True)\n", + " # Extract the year from 'last_updated'\n", + "df['year'] = df['last_updated'].dt.year\n", + "# Count the number of unique users per year\n", + "unique_jobs_by_year = df.groupby('year')['jobUuid'].nunique()\n", + "\n", + "# Plot the number of unique users per year\n", + "plt.figure(figsize=(8, 5))\n", + "ax = unique_jobs_by_year.plot(kind='bar', color='coral')\n", + "for i, value in enumerate(unique_jobs_by_year.values):\n", + " ax.text(i, value + 0.1, str(value), ha='center', va='bottom', fontsize=12)\n", + "plt.title('Number of Unique Jobs Ran Each Year')\n", + "plt.xlabel('Year')\n", + "plt.ylabel('Number of Jobs')\n", + "plt.xticks(rotation=45)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f3964c95-689c-4d45-9ad5-21f939494c3a", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = verified_users_with_project.copy()\n", + "df['last_updated'] = pd.to_datetime(df['last_updated'])\n", + "df['year'] = df['last_updated'].dt.year\n", + "unique_projectes_created_each_year = df.groupby('year')['uuid'].nunique()\n", + "unique_projectes_created_each_year\n", + "\n", + "# Plot the number of unique users per year\n", + "plt.figure(figsize=(8, 5))\n", + "ax = unique_projectes_created_each_year.plot(kind='bar', color='coral')\n", + "for i, value in enumerate(unique_projectes_created_each_year.values):\n", + " ax.text(i, value + 0.1, str(value), ha='center', va='bottom', fontsize=12)\n", + "plt.title('Number of Unique Projects Each Year')\n", + "plt.xlabel('Year')\n", + "plt.ylabel('Number of Projects')\n", + "plt.xticks(rotation=45)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "45d23e2a-501f-45c7-bb80-b2177bc52a65", + "metadata": {}, + "source": [ + "## Number of Projects Created Each Year" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "2363c9b9-dffd-4cac-a72d-7f5c94264b8a", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = verified_users_with_project.copy()\n", + "df['last_updated'] = pd.to_datetime(df['last_updated'])\n", + "df['year'] = df['last_updated'].dt.year\n", + "unique_projectes_created_each_year = df.groupby('year')['uuid'].nunique()\n", + "unique_projectes_created_each_year\n", + "\n", + "# Plot the number of unique users per year\n", + "plt.figure(figsize=(8, 5))\n", + "ax = unique_projectes_created_each_year.plot(kind='bar', color='coral')\n", + "for i, value in enumerate(unique_projectes_created_each_year.values):\n", + " ax.text(i, value + 0.1, str(value), ha='center', va='bottom', fontsize=12)\n", + "plt.title('Number of Unique Projects Each Year')\n", + "plt.xlabel('Year')\n", + "plt.ylabel('Number of Projects')\n", + "plt.xticks(rotation=45)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "50f16b45-14c2-4e39-826d-f570b6403fd9", + "metadata": {}, + "source": [ + "## Number of Projects Created Each Year That Ran any Job" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "7836abe8-9138-4c4b-852b-1084cce9e143", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of unique Projetcs: 784\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = df_projectJob.copy()\n", + "df = df[df.projectUuid.isin(job_files_with_verified_users.projectUuid)]\n", + "print(f'Number of unique Projetcs: {df.projectUuid.nunique()}')\n", + "## Filter the projectUUID by verified user\n", + "\n", + "df['last_updated'] = pd.to_datetime(df['lastUpdated'], utc = True)\n", + " # Extract the year from 'last_updated'\n", + "df['year'] = df['last_updated'].dt.year\n", + "# Count the number of unique users per year\n", + "unique_jobs_by_year = df.groupby('year')['projectUuid'].nunique()\n", + "\n", + "# Plot the number of unique users per year\n", + "plt.figure(figsize=(8, 5))\n", + "ax = unique_jobs_by_year.plot(kind='bar', color='coral')\n", + "for i, value in enumerate(unique_jobs_by_year.values):\n", + " ax.text(i, value + 0.1, str(value), ha='center', va='bottom', fontsize=12)\n", + "plt.title('Number of Unique Projects Each Year That ran Any Job')\n", + "plt.xlabel('Year')\n", + "plt.ylabel('Number of Projects')\n", + "plt.xticks(rotation=45)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "381e5034-f297-4793-ba9f-900b853468b7", + "metadata": {}, + "source": [ + "## Number of Files Associated With Each Project" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "2ee322fa-59f2-4515-a862-f0f17fe0aefc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total number of project: 752\n", + "621.0\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "projectUuid_counts = job_files_with_verified_users.projectUuid.value_counts() \n", + "print(f'Total number of project: {sum(projectUuid_counts>2)}')\n", + "plt.figure(figsize=(10, 6))\n", + "bins = 50\n", + "n, bins, patches = plt.hist(projectUuid_counts, bins=range(10, max(projectUuid_counts)+1, 500), color='coral', edgecolor='black')\n", + "print(sum(n))\n", + "# Add count labels on top of each bar\n", + "for i in range(len(patches)):\n", + " height = patches[i].get_height()\n", + " if height > 0:\n", + " plt.text(patches[i].get_x() + patches[i].get_width() / 2, height, # Offset for clarity\n", + " f'{int(height)}', ha='center', va='bottom', fontsize=10)\n", + "\n", + "plt.title('Distribution of File Counts For Each Project')\n", + "plt.xlabel('File Count')\n", + "plt.ylabel('Number of Project (in log)')\n", + "plt.yscale('log')\n", + "plt.xscale('log')\n", + "\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "30b620e0-b666-42ba-a29a-5d428e9ed0c1", + "metadata": {}, + "source": [ + "# Plot which country has how many Users" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "20c11360-89b6-4f3b-a0c5-7206c2558235", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total number of unique countries: 118\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/s234499/miniforge3/envs/general_env/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 20013 (\\N{CJK UNIFIED IDEOGRAPH-4E2D}) missing from font(s) DejaVu Sans.\n", + " fig.canvas.print_figure(bytes_io, **kw)\n", + "/home/s234499/miniforge3/envs/general_env/lib/python3.12/site-packages/IPython/core/pylabtools.py:170: UserWarning: Glyph 22269 (\\N{CJK UNIFIED IDEOGRAPH-56FD}) missing from font(s) DejaVu Sans.\n", + " fig.canvas.print_figure(bytes_io, **kw)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "n = 50\n", + "df_filtered = df_profile[~df_profile['country'].str.contains(r'[^\\w\\s]', na=False)]\n", + "country_counts = df_filtered['country'].value_counts()\n", + "print(f'Total number of unique countries: {len(country_counts)}')\n", + "top_10_countries = country_counts.head(n)\n", + "\n", + "# Plot the bar plot\n", + "plt.figure(figsize=(16, 6))\n", + "ax = sns.barplot(x=top_10_countries.index, y=top_10_countries.values)\n", + "for i, value in enumerate(top_10_countries.values):\n", + " ax.text(i, value + 0.1, str(value), ha='center', va='bottom', fontsize=12)\n", + "# Add labels and title\n", + "plt.xlabel('Country')\n", + "plt.ylabel('Number of Users')\n", + "plt.title(f'Top {n} Countries with the Most Users ')\n", + "\n", + "# Display the plot\n", + "plt.xticks(rotation=90)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "d7597721-a85d-41c2-84b8-42cfc2a369ac", + "metadata": {}, + "source": [ + "## Look at Metadata Permission File (UUID is projectUUID)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "f22a11eb-7c76-41ff-9ff2-8bcf71338930", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlast_updatedpermissionusernameuuidtenant_id
052014-01-21 17:00:11READ_WRITEtest30001389977207738-5056a550b8-0001-012vdjserver.org
162014-01-21 17:00:41READ_WRITEtest40001389977207738-5056a550b8-0001-012vdjserver.org
292014-01-21 17:21:43READ_WRITEtest110001389977207738-5056a550b8-0001-012vdjserver.org
3142014-01-29 10:28:16READ_WRITEjfonner0001389977207738-5056a550b8-0001-012vdjserver.org
4172014-01-29 14:06:38READ_WRITEadshkl;dasfhkdf0001391025968832-5056a550b8-0001-012vdjserver.org
.....................
76059014449862025-01-25 09:44:43READ_WRITEschristley3580715269144908271-242ac118-0001-012vdjserver.org
76059114449872025-01-25 09:44:44READ_WRITEschristley3569118857445708271-242ac118-0001-012vdjserver.org
76059214449882025-01-25 09:44:44READ_WRITEschristley3557565395419468271-242ac118-0001-012vdjserver.org
76059314449892025-01-25 09:44:44READ_WRITEschristley3547300423582028271-242ac118-0001-012vdjserver.org
76059414449902025-01-25 09:44:44READ_WRITEschristley3534544370712908271-242ac118-0001-012vdjserver.org
\n", + "

760595 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " id last_updated permission username \\\n", + "0 5 2014-01-21 17:00:11 READ_WRITE test3 \n", + "1 6 2014-01-21 17:00:41 READ_WRITE test4 \n", + "2 9 2014-01-21 17:21:43 READ_WRITE test11 \n", + "3 14 2014-01-29 10:28:16 READ_WRITE jfonner \n", + "4 17 2014-01-29 14:06:38 READ_WRITE adshkl;dasfhkdf \n", + "... ... ... ... ... \n", + "760590 1444986 2025-01-25 09:44:43 READ_WRITE schristley \n", + "760591 1444987 2025-01-25 09:44:44 READ_WRITE schristley \n", + "760592 1444988 2025-01-25 09:44:44 READ_WRITE schristley \n", + "760593 1444989 2025-01-25 09:44:44 READ_WRITE schristley \n", + "760594 1444990 2025-01-25 09:44:44 READ_WRITE schristley \n", + "\n", + " uuid tenant_id \n", + "0 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "1 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "2 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "3 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "4 0001391025968832-5056a550b8-0001-012 vdjserver.org \n", + "... ... ... \n", + "760590 3580715269144908271-242ac118-0001-012 vdjserver.org \n", + "760591 3569118857445708271-242ac118-0001-012 vdjserver.org \n", + "760592 3557565395419468271-242ac118-0001-012 vdjserver.org \n", + "760593 3547300423582028271-242ac118-0001-012 vdjserver.org \n", + "760594 3534544370712908271-242ac118-0001-012 vdjserver.org \n", + "\n", + "[760595 rows x 6 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metadata_perms_df\n", + "##\n", + "# metadata_perms_df[(metadata_perms_df['permission'] == 'READ_WRITE')].value_counts('username')" + ] + }, + { + "cell_type": "markdown", + "id": "a4295699-7074-4456-bcaa-df800fa69d60", + "metadata": {}, + "source": [ + "## Look at All the Jobs at VDJ Server" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "0f5f5884-9d94-4422-83a9-1179ce4119b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
system_idownerapp_idstatuslast_updateduuidarchive_pathremote_outcomeupdate_tokenparameters.Creator
0ls6.tacc.utexas.eduvdjrepcalc-ls6-2.0u8FINISHED2025-01-25 15:43:51.678c7cd08ad-a560-4574-a363-b9cc4c5e051d-007/projects/5456400192359305711-242ac118-0001-01...FINISHEDeb27e311-4a37-4aeb-b649-056704dd2711schristley
1ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FINISHED2025-01-24 04:20:37.8919188bf80-e868-4e05-a6b4-308c044108d7-007/projects/5456400192359305711-242ac118-0001-01...FINISHED5e2528fd-25d6-4473-9287-6a67a8de8391schristley
2ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FAILED2025-01-22 15:04:46.891773a5cb7-b369-4517-a221-83d57e3899e5-007/projects/5199144433477554666-242ac116-0001-01...FAILED_SKIP_ARCHIVE78b89c14-3dec-4aa8-acf8-d2592064e3a4scott_public
3ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-14 22:31:02.980c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007/projects/5456400192359305711-242ac118-0001-01...FINISHED1e2f122d-5e5b-4f14-931f-ca55803115ffschristley
4ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-09 04:21:12.476ad02cb34-250e-48cb-a06e-973e431b62ee-007/projects/6589143665654501871-242ac118-0001-01...FINISHED1069949d-1d9a-453f-80b8-7372019aba31schristley
.................................
15776my-lonestarjfonnermy-vdj_pipe-0.0.4FINISHED2014-03-31 16:38:39.0000001396301879424-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/foo123-201...FINISHED3b188d18-7955-49b6-bc21-10a557ced542NaN
15777my-lonestarjfonnermy-vdj_pipe-0.0.4FINISHED2014-03-31 15:44:00.0000001396298592090-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FINISHEDc9dd99e9-2ef2-4fd7-b211-26b56162b21eNaN
15778my-lonestarjfonnermy-vdj_pipe-0.0.4FINISHED2014-03-31 15:35:18.0000001396298085562-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FINISHED282196b2-9972-4615-944d-777e1ee7826cNaN
15779my-lonestarjfonnermy-vdj_pipe-0.0.4FAILED2014-03-31 15:28:36.0000001396297676287-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FAILEDdc81e8a3-9869-47cc-8bee-3d254bb805d1NaN
15780my-lonestarjfonnermy-vdj_pipe-0.0.4FAILED2014-03-31 14:50:18.0000001396295290656-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FAILED08f920a6-e4c1-4029-9ac2-e1de96e7d23aNaN
\n", + "

15781 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " system_id owner app_id status \\\n", + "0 ls6.tacc.utexas.edu vdj repcalc-ls6-2.0u8 FINISHED \n", + "1 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FINISHED \n", + "2 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FAILED \n", + "3 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "4 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "... ... ... ... ... \n", + "15776 my-lonestar jfonner my-vdj_pipe-0.0.4 FINISHED \n", + "15777 my-lonestar jfonner my-vdj_pipe-0.0.4 FINISHED \n", + "15778 my-lonestar jfonner my-vdj_pipe-0.0.4 FINISHED \n", + "15779 my-lonestar jfonner my-vdj_pipe-0.0.4 FAILED \n", + "15780 my-lonestar jfonner my-vdj_pipe-0.0.4 FAILED \n", + "\n", + " last_updated uuid \\\n", + "0 2025-01-25 15:43:51.678 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 \n", + "1 2025-01-24 04:20:37.891 9188bf80-e868-4e05-a6b4-308c044108d7-007 \n", + "2 2025-01-22 15:04:46.891 773a5cb7-b369-4517-a221-83d57e3899e5-007 \n", + "3 2025-01-14 22:31:02.980 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 \n", + "4 2025-01-09 04:21:12.476 ad02cb34-250e-48cb-a06e-973e431b62ee-007 \n", + "... ... ... \n", + "15776 2014-03-31 16:38:39.000 0001396301879424-5056a550b8-0001-007 \n", + "15777 2014-03-31 15:44:00.000 0001396298592090-5056a550b8-0001-007 \n", + "15778 2014-03-31 15:35:18.000 0001396298085562-5056a550b8-0001-007 \n", + "15779 2014-03-31 15:28:36.000 0001396297676287-5056a550b8-0001-007 \n", + "15780 2014-03-31 14:50:18.000 0001396295290656-5056a550b8-0001-007 \n", + "\n", + " archive_path remote_outcome \\\n", + "0 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "1 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "2 /projects/5199144433477554666-242ac116-0001-01... FAILED_SKIP_ARCHIVE \n", + "3 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "4 /projects/6589143665654501871-242ac118-0001-01... FINISHED \n", + "... ... ... \n", + "15776 /scratch/01114/jfonner/vdj/analyses/foo123-201... FINISHED \n", + "15777 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FINISHED \n", + "15778 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FINISHED \n", + "15779 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FAILED \n", + "15780 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FAILED \n", + "\n", + " update_token parameters.Creator \n", + "0 eb27e311-4a37-4aeb-b649-056704dd2711 schristley \n", + "1 5e2528fd-25d6-4473-9287-6a67a8de8391 schristley \n", + "2 78b89c14-3dec-4aa8-acf8-d2592064e3a4 scott_public \n", + "3 1e2f122d-5e5b-4f14-931f-ca55803115ff schristley \n", + "4 1069949d-1d9a-453f-80b8-7372019aba31 schristley \n", + "... ... ... \n", + "15776 3b188d18-7955-49b6-bc21-10a557ced542 NaN \n", + "15777 c9dd99e9-2ef2-4fd7-b211-26b56162b21e NaN \n", + "15778 282196b2-9972-4615-944d-777e1ee7826c NaN \n", + "15779 dc81e8a3-9869-47cc-8bee-3d254bb805d1 NaN \n", + "15780 08f920a6-e4c1-4029-9ac2-e1de96e7d23a NaN \n", + "\n", + "[15781 rows x 10 columns]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jobs_all_df.head()\n", + "## Filter AllJobs columns\n", + "#### Keeping only important ones\n", + "keep_columns = ['system_id', 'owner', 'app_id', 'status', 'last_updated', 'uuid', 'archive_path', 'remote_outcome', 'update_token', 'parameters.Creator']\n", + "jobs_all_df = jobs_all_df[keep_columns]\n", + "jobs_all_df" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "2713d671-05a1-47fc-9304-4cab63f9ace7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
system_idownerapp_idstatuslast_updateduuidarchive_pathremote_outcomeupdate_tokenparameters.Creator
9999ls5.tacc.utexas.eduvdjrepcalc-ls5-1.0FINISHED2019-02-27 18:42:14.0008144122141623652841-242ac11c-0001-007/projects/7058216970366620136-242ac11e-0001-01...FINISHEDfd7354e7-729c-4b42-97b6-2a9dd08bf0a0schristley
\n", + "
" + ], + "text/plain": [ + " system_id owner app_id status \\\n", + "9999 ls5.tacc.utexas.edu vdj repcalc-ls5-1.0 FINISHED \n", + "\n", + " last_updated uuid \\\n", + "9999 2019-02-27 18:42:14.000 8144122141623652841-242ac11c-0001-007 \n", + "\n", + " archive_path remote_outcome \\\n", + "9999 /projects/7058216970366620136-242ac11e-0001-01... FINISHED \n", + "\n", + " update_token parameters.Creator \n", + "9999 fd7354e7-729c-4b42-97b6-2a9dd08bf0a0 schristley " + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jobs_all_df[jobs_all_df.uuid == '8144122141623652841-242ac11c-0001-007']" + ] + }, + { + "cell_type": "markdown", + "id": "38d03089-6aec-4ede-a618-839c7fa7cf38", + "metadata": {}, + "source": [ + "## Look at JobEvents" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "c7aabc2b-c77e-4622-9a31-8beee6bae0e5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idcreatedcreated_bydescriptionip_addressstatustenant_idjob_idtransfertaskuuid
054762014-03-31 14:48:10jfonnerJob accepted and queued for submission.129.114.60.167PENDINGvdjserver.org142NaNNone
154772014-03-31 14:48:20jfonnerNo inputs for the given job. Skipping staging129.114.60.167STAGEDvdjserver.org142NaNNone
254782014-03-31 14:48:30jfonnerAttempt [1] Preparing job for execution and st...129.114.60.167SUBMITTINGvdjserver.org142NaNNone
354792014-03-31 14:50:18jfonnerFailed to submit job 0001396295290656-5056a550...129.114.60.167FAILEDvdjserver.org142NaNNone
454802014-03-31 15:27:56jfonnerJob accepted and queued for submission.129.114.60.167PENDINGvdjserver.org143NaNNone
.................................
341856235858022025-01-25 15:38:25vdjJob running on execution system172.17.0.5RUNNINGvdjserver.org5038650.064c47eaa-102e-4106-bae7-4af2b2a3e3a0-028
341857235858032025-01-25 15:43:33vdjJob completed execution172.17.0.5CLEANING_UPvdjserver.org5038650.0b62a2f3c-b5f6-4ba7-b764-e49091263b76-028
341858235858042025-01-25 15:43:33vdjTransferring job output to archive system172.17.0.5ARCHIVINGvdjserver.org5038650.009c31ec9-7736-4d8a-a4dd-ead09386eea1-028
341859235858052025-01-25 15:43:37vdjJob archiving in progress: agave://ls6.tacc.ut...172.17.0.5ARCHIVINGvdjserver.org503865560378724.0d0d6f978-8813-4829-abe0-ce78c5dd326e-028
341860235858062025-01-25 15:43:52vdjJob completed successfully172.17.0.5FINISHEDvdjserver.org5038650.0f93fc121-4027-4e5f-a654-32b7df137a94-028
\n", + "

341861 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " id created created_by \\\n", + "0 5476 2014-03-31 14:48:10 jfonner \n", + "1 5477 2014-03-31 14:48:20 jfonner \n", + "2 5478 2014-03-31 14:48:30 jfonner \n", + "3 5479 2014-03-31 14:50:18 jfonner \n", + "4 5480 2014-03-31 15:27:56 jfonner \n", + "... ... ... ... \n", + "341856 23585802 2025-01-25 15:38:25 vdj \n", + "341857 23585803 2025-01-25 15:43:33 vdj \n", + "341858 23585804 2025-01-25 15:43:33 vdj \n", + "341859 23585805 2025-01-25 15:43:37 vdj \n", + "341860 23585806 2025-01-25 15:43:52 vdj \n", + "\n", + " description ip_address \\\n", + "0 Job accepted and queued for submission. 129.114.60.167 \n", + "1 No inputs for the given job. Skipping staging 129.114.60.167 \n", + "2 Attempt [1] Preparing job for execution and st... 129.114.60.167 \n", + "3 Failed to submit job 0001396295290656-5056a550... 129.114.60.167 \n", + "4 Job accepted and queued for submission. 129.114.60.167 \n", + "... ... ... \n", + "341856 Job running on execution system 172.17.0.5 \n", + "341857 Job completed execution 172.17.0.5 \n", + "341858 Transferring job output to archive system 172.17.0.5 \n", + "341859 Job archiving in progress: agave://ls6.tacc.ut... 172.17.0.5 \n", + "341860 Job completed successfully 172.17.0.5 \n", + "\n", + " status tenant_id job_id transfertask \\\n", + "0 PENDING vdjserver.org 142 NaN \n", + "1 STAGED vdjserver.org 142 NaN \n", + "2 SUBMITTING vdjserver.org 142 NaN \n", + "3 FAILED vdjserver.org 142 NaN \n", + "4 PENDING vdjserver.org 143 NaN \n", + "... ... ... ... ... \n", + "341856 RUNNING vdjserver.org 503865 0.0 \n", + "341857 CLEANING_UP vdjserver.org 503865 0.0 \n", + "341858 ARCHIVING vdjserver.org 503865 0.0 \n", + "341859 ARCHIVING vdjserver.org 503865 560378724.0 \n", + "341860 FINISHED vdjserver.org 503865 0.0 \n", + "\n", + " uuid \n", + "0 None \n", + "1 None \n", + "2 None \n", + "3 None \n", + "4 None \n", + "... ... \n", + "341856 64c47eaa-102e-4106-bae7-4af2b2a3e3a0-028 \n", + "341857 b62a2f3c-b5f6-4ba7-b764-e49091263b76-028 \n", + "341858 09c31ec9-7736-4d8a-a4dd-ead09386eea1-028 \n", + "341859 d0d6f978-8813-4829-abe0-ce78c5dd326e-028 \n", + "341860 f93fc121-4027-4e5f-a654-32b7df137a94-028 \n", + "\n", + "[341861 rows x 10 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "job_events_df" + ] + }, + { + "cell_type": "markdown", + "id": "fc85305a-7c00-4491-a7f2-c22eb210b52d", + "metadata": {}, + "source": [ + "## Filter Metadata file by removing users with only READ Acess and Usernames that contains test" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "f0cdc3bd-0291-471b-9ac4-3744a8ebabe1", + "metadata": {}, + "outputs": [], + "source": [ + "metadata_perms_df\n", + "#filter metadata keeping only items that has permission for both READ_WRITE and ALL\n", + "filtered_metadata_perms_df = metadata_perms_df[~(metadata_perms_df.permission == 'READ')]\n", + "# Filter out usernames containing 'test'\n", + "filtered_metadata_perms_df = filtered_metadata_perms_df[~filtered_metadata_perms_df['username'].str.contains('test', case=False)]" + ] + }, + { + "cell_type": "markdown", + "id": "cf1864e5-72cb-4ebe-a429-165c5c0c59e8", + "metadata": {}, + "source": [ + "## Plot Number of Unique Users for Each Year" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "b1e5203a-54ce-451b-90e5-cf8f7939faf8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "year\n", + "2014 741\n", + "2015 5495\n", + "2016 10172\n", + "2017 51812\n", + "2018 65288\n", + "2019 61059\n", + "2020 52641\n", + "2021 54077\n", + "2022 31584\n", + "2023 87717\n", + "2024 34663\n", + "2025 3887\n", + "Name: uuid, dtype: int64\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAArcAAAHpCAYAAACY8RRtAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAXoBJREFUeJzt3XlYVHX///HXACOL4QIpiLmQW265Z2luKZiluVRWmEtZeqflnkZ1J7Zo2qJlu+XSwrfuFtvuUrFMM1vct8zSXNIkTQkUDEb4/P7oZn5OgM5BYIbT83FdXDqf+Zxz3q85A7w5c+aMwxhjBAAAANhAgK8LAAAAAEoKzS0AAABsg+YWAAAAtkFzCwAAANuguQUAAIBt0NwCAADANmhuAQAAYBs0twAAALANmlsAAADYBs0tUIoWLlwoh8OhkJAQ7du3r8D9Xbt2VbNmzXxQmfTFF1/I4XDonXfe8cn2rdq7d6+uvvpqRUREyOFwaNy4cUXOdTgcuvPOOwu975133pHD4dAXX3xRrBocDocWLlxoedmyVFr5faVr165yOByFfm3btq1Utz1s2LAit+1wOEptu3Xr1lXv3r0tL/f222/L4XBo7ty5hd4/YsQIBQcHa8uWLedaIuC3gnxdAPBPkJ2drfvvv1+vvfaar0spt8aPH69vv/1W8+fPV3R0tGrUqFHmNdSoUUNff/216tWrV+bb/qe78MIL9cYbbxQYL4t9ERoaqs8//7zUt1MSrr/+eiUkJOiee+5Rr169VL9+ffd9y5Yt07x58zRjxgxdfPHFPqwSKF00t0AZuPLKK5WcnKxJkyapRYsWvi6nTJ08eVIhISHnfJRr27ZtuuSSS9SvX7+SKawYgoODdemll/ps++VVVlaWwsLCzmkdoaGhpfbYnzx5UqGhoUXeHxAQUK72+zPPPKMvvvhCw4YN06pVqxQQEKCMjAzddtttuuyyy3T33XeXSR0lsd+B4uC0BKAMTJ48WZGRkZoyZcoZ553pZW+Hw6GkpCT37aSkJDkcDm3ZskXXX3+9KleurIiICE2YMEGnTp3Szp07deWVVyo8PFx169bVrFmzCt3mn3/+qQkTJig6OlqhoaHq0qWLNm7cWGDeunXrdM011ygiIkIhISFq1aqV/vOf/3jMyT8NY9myZbr11ltVrVo1hYWFKTs7u8jM+/fv180336zq1asrODhYjRs31hNPPKG8vDxJ///0iV27dunTTz91vxy8d+/eMz6WVuSfHrJ27Vp16tRJYWFhuvDCC/Xoo4+665CK3j///e9/1bJlSwUHBys2NlaPP/64e/+cbVmp4L6VpJ9++kkJCQkej8uzzz5bYplPt3HjRvXu3du9rZiYGF199dU6cOCAe44xRs8995xatmyp0NBQVa1aVdddd51+/vlnj3XlP5arVq1Shw4dFBYWpltvvVWS9Pnnn6tr166KjIxUaGioateurWuvvVZZWVnnnOHPP/9UYmKiYmNjVaFCBdWsWVOjR4/WH3/84TEv/+X+9957T61atVJISIimTZtWItufOHGiWrZs6f5evOyyy/TBBx8UmJuXl6e5c+e6H8sqVaro0ksv1Ycfflhg7pIlS9S6dWuFhobqoosu0vz5889aS9WqVfXKK6/oq6++0uzZsyX99crH0aNHtWjRIgUGBiojI0OTJk3yeLzGjRunzMxMj3U9++yz6ty5s6pXr66KFSuqefPmmjVrllwul8e8M+13oKxx5BYoA+Hh4br//vs1duxYff7557riiitKbN0DBw7UzTffrJEjRyolJcX9i2f58uUaNWqUJk2apOTkZE2ZMkX169fXgAEDPJa/99571bp1a7388stKT09XUlKSunbtqo0bN+rCCy+UJK1YsUJXXnml2rdvrxdeeEGVK1fWm2++qRtuuEFZWVkaNmyYxzpvvfVWXX311XrttdeUmZkpp9NZaO1HjhxRhw4dlJOTo4ceekh169bVxx9/rEmTJmn37t167rnn1Lp1a3399dfq37+/6tWrp8cff1ySSvy0hNTUVA0aNEgTJ07U1KlTtXjxYiUmJiomJkZDhgwpcrnPPvtMffv21WWXXaY333xTubm5mjVrln777bdi1/L999+rQ4cOql27tp544glFR0dr6dKlGjNmjH7//XdNnTq12Ov+u8zMTMXFxSk2NlbPPvusoqKilJqaqhUrVuj48ePueSNHjtTChQs1ZswYzZw5U8eOHdODDz6oDh06aPPmzYqKinLPPXTokG6++WZNnjxZ06dPV0BAgPuc6U6dOmn+/PmqUqWKDh48qCVLlignJ8erI3ynTp3yuB0QEKCAgAAZY9SvXz999tlnSkxMVKdOnbRlyxZNnTpVX3/9tb7++msFBwe7l9uwYYN27Nih+++/X7GxsapYsaLlbZ++femvU4+OHTumSZMmqWbNmsrJydHy5cs1YMAALViwwOM5NGzYML3++usaPny4HnzwQVWoUEEbNmwo8Afb5s2bNXHiRN1zzz2KiorSyy+/rOHDh6t+/frq3LnzGeu98sorNXLkSN1///0KCAjQ/Pnz9cwzz6hBgwbKyspSly5ddODAAd177726+OKLtX37dj3wwAPaunWrli9f7v7DbPfu3UpISHA3wZs3b9YjjzyiH374oUCjXdh+B3zCACg1CxYsMJLM2rVrTXZ2trnwwgtN27ZtTV5enjHGmC5dupimTZu65+/Zs8dIMgsWLCiwLklm6tSp7ttTp041kswTTzzhMa9ly5ZGknnvvffcYy6Xy1SrVs0MGDDAPbZixQojybRu3dpdjzHG7N271zidTnPbbbe5xy666CLTqlUr43K5PLbVu3dvU6NGDZObm+uRd8iQIV49Pvfcc4+RZL799luP8TvuuMM4HA6zc+dO91idOnXM1Vdf7dV6JZnRo0cXet/bb79tJJkVK1a4x7p06VJoHU2aNDE9e/Z03y5s/7Rv397ExMSYkydPuscyMjJMRESEOf1HrJV927NnT3PBBReY9PR0j3l33nmnCQkJMceOHTtTfEv5161bZySZ999/v8j1ff3114U+13755RcTGhpqJk+e7B7Lfyw/++wzj7nvvPOOkWQ2bdp0xtoLk7/Ov38NGjTIGGPMkiVLjCQza9Ysj+XeeustI8m89NJL7rE6deqYwMBAj+fWmQwdOrTQbUsy3bt3L3K5U6dOGZfLZYYPH25atWrlHl+1apWRZO67774zbrdOnTomJCTE7Nu3zz128uRJExERYUaOHOlV7cePHzcXXnihkWR69Ojh/j6fMWOGCQgIMGvXrvWYn7+PPvnkk0LXl5uba1wul3n11VdNYGCgx/OwqP0O+AJ/VgFlpEKFCnr44Ye1bt26Ai/nn4u/v6O6cePGcjgc6tWrl3ssKChI9evXL/SKDQkJCR4vn9epU0cdOnTQihUrJEm7du3SDz/8oEGDBkn66whW/tdVV12lQ4cOaefOnR7rvPbaa72q/fPPP1eTJk10ySWXeIwPGzZMxpgyfRNPdHR0gTouvvjiQh+zfJmZmVq7dq0GDBigkJAQ93h4eLj69OlTrDr+/PNPffbZZ+rfv7/CwsIKPN5//vmnvvnmm2KtuzD169dX1apVNWXKFL3wwgv6/vvvC8z5+OOP5XA4dPPNN3vUEx0drRYtWhS48kLVqlULvDrRsmVLVahQQSNGjNCiRYsKnM5wNvXq1dPatWs9vh566CFJcj9P/v4KwvXXX6+KFSvqs88+8xi/+OKL1bBhQ6+3HRoaWmDba9eu1XPPPecx7+2331bHjh113nnnKSgoSE6nU6+88op27NjhnvPpp59KkkaPHn3W7bZs2VK1a9d23w4JCVHDhg3P+Jw83XnnnafJkydLkqZNm+b+Pv/444/VrFkztWzZ0mN/9uzZs8CVNDZu3KhrrrlGkZGRCgwMlNPp1JAhQ5Sbm6sff/zRY3uF7XfAF2hugTJ04403qnXr1rrvvvsKnLNWXBERER63K1SooLCwMI9mK3/8zz//LLB8dHR0oWNHjx6VJPfL65MmTZLT6fT4GjVqlCTp999/91je21MGjh49WujcmJgY9/3FERgYqNzc3ELvy395+e+nSkRGRhaYGxwcrJMnTxa5nbS0NOXl5RX5GBbH0aNHderUKc2dO7fA433VVVdJKvh4/52V/JUrV9bKlSvVsmVL3XvvvWratKliYmI0depU93P0t99+kzFGUVFRBWr65ptvvNr/9erV0/Lly1W9enWNHj1a9erVU7169fTUU0959biEhISobdu2Hl+xsbHuxywoKEjVqlXzWMbhcHg8l89U35kEBAQU2Hbbtm09GuT33ntPAwcOVM2aNfX666/r66+/1tq1a3Xrrbd6fN8dOXJEgYGBXj0/ivOcLGy+9Nf3f77ffvtNW7ZsKbAvw8PDZYxx78/9+/erU6dOOnjwoJ566il9+eWXWrt2rfvc77/X4YsrmACF4ZxboAw5HA7NnDlTcXFxeumllwrcn9+Q/v0NWMVt8ryRmppa6Fj+L9bzzz9fkpSYmFjgfN18jRo18rjt7ZURIiMjdejQoQLjv/76q8e2rYqKitLBgwcLvS9//PRzRIuratWqcjgcRT6Gp/N231atWlWBgYEaPHhwkUf38pu6oljN37x5c7355psyxmjLli1auHChHnzwQYWGhuqee+7R+eefL4fDoS+//NLj3NV8fx8rav936tRJnTp1Um5urtatW6e5c+dq3LhxioqK0o033njGTGcSGRmpU6dO6ciRIx4NrjFGqampateunVf1nYvXX39dsbGxeuuttzzW//f9Xa1aNeXm5io1NdVnzeD555+v0NDQIt+clv999/777yszM1Pvvfee6tSp475/06ZNhS5Xmtf9BazgyC1Qxnr06KG4uDg9+OCDOnHihMd9UVFRCgkJKXCB9cLecV1S/u///k/GGPftffv2ac2aNerataukvxrXBg0aaPPmzYUevWrbtq3Cw8OLte3u3bvr+++/14YNGzzGX331VTkcDnXr1q1Y6+3Ro4dWrFihI0eOeIwbY/T222+rbt26Htf/LK6KFSvqkksu0XvvvedxdO748eP66KOPPOZ6u2/DwsLUrVs3bdy4URdffHGhj3dhR/ROV9z8DodDLVq00OzZs1WlShX3fundu7eMMTp48GCh9TRv3vzsD9ZpAgMD1b59e/cRwL/vf6u6d+8u6a8G83TvvvuuMjMz3feXJofDoQoVKng0eKmpqQX2b/7pQs8//3yp11SU3r17a/fu3YqMjCx0f9atW1fS/29WT//jxRijefPm+aJswGscuQV8YObMmWrTpo0OHz6spk2busfzz2ucP3++6tWrpxYtWui7775TcnJyqdVy+PBh9e/fX7fffrvS09M1depUhYSEKDEx0T3nxRdfVK9evdSzZ08NGzZMNWvW1LFjx7Rjxw5t2LBBb7/9drG2PX78eL366qu6+uqr9eCDD6pOnTr673//q+eee0533HGHpfMiT/fAAw/oo48+Uvv27XXPPfeoQYMGSk1N1bx587R27doSPef5oYce0pVXXqm4uDhNnDhRubm5mjlzpipWrKhjx46551nZt0899ZQuv/xyderUSXfccYfq1q2r48ePa9euXfroo4/Oei6ylfwff/yxnnvuOfXr108XXnihjDF677339McffyguLk6S1LFjR40YMUK33HKL1q1bp86dO6tixYo6dOiQVq9erebNm+uOO+44Y00vvPCCPv/8c1199dWqXbu2/vzzT/eRwx49enj9eBcmLi5OPXv21JQpU5SRkaGOHTu6r5bQqlUrDR48+JzWn5eXV+R5zq1atVJwcLD78mKjRo3Sddddp19++UUPPfSQatSooZ9++sk9v1OnTho8eLAefvhh/fbbb+rdu7eCg4O1ceNGhYWF6a677jqnWr0xbtw4vfvuu+rcubPGjx+viy++WHl5edq/f7+WLVumiRMnqn379oqLi1OFChV00003afLkyfrzzz/1/PPPKy0trdRrBM6Jj97IBvwjnH61hL9LSEgwkjyulmCMMenp6ea2224zUVFRpmLFiqZPnz5m7969RV4t4ciRIx7LDx061FSsWLHA9v5+ZYb8qyW89tprZsyYMaZatWomODjYdOrUyaxbt67A8ps3bzYDBw401atXN06n00RHR5srrrjCvPDCC17lLcq+fftMQkKCiYyMNE6n0zRq1Mg89thj7isw5LNytQRjjPnpp5/MzTffbGrUqGGCgoJMlSpVTHx8fKHv5v77Y5Nv6NChpk6dOu7bRV3x4MMPPzQXX3yxqVChgqldu7Z59NFH3fvndN7u2/xt3XrrraZmzZrG6XSaatWqmQ4dOpiHH364RPP/8MMP5qabbjL16tUzoaGhpnLlyuaSSy4xCxcuLLDO+fPnm/bt25uKFSua0NBQU69ePTNkyBCP50tRj+XXX39t+vfvb+rUqWOCg4NNZGSk6dKli/nwww/PmqWodZ7u5MmTZsqUKaZOnTrG6XSaGjVqmDvuuMOkpaV5zLP6PDrT1RIkmZ9++sk999FHHzV169Y1wcHBpnHjxmbevHmFPg9yc3PN7NmzTbNmzUyFChVM5cqVzWWXXWY++uijs9bZpUsX06VLF6/rL+p78sSJE+b+++83jRo1ctfQvHlzM378eJOamuqe99FHH5kWLVqYkJAQU7NmTXP33XebTz/9tNArjpxtHwFlxWHMaa9HAgBKRFJSkqZNmyZ+xAJA2eKcWwAAANgGzS0AAABsg9MSAAAAYBscuQUAAIBt+LS5XbVqlfr06aOYmBg5HA69//77Bebs2LFD11xzjSpXrqzw8HBdeuml2r9/v/v+7Oxs3XXXXTr//PNVsWJFXXPNNTpw4EAZpgAAAIC/8Glzm5mZqRYtWuiZZ54p9P7du3fr8ssv10UXXaQvvvhCmzdv1r///W+PjxUdN26cFi9erDfffFOrV6/WiRMn1Lt37yI/ehIAAAD25Tfn3DocDi1evFj9+vVzj914441yOp167bXXCl0mPT1d1apV02uvvaYbbrhB0l8f21mrVi198skn6tmzp1fbzsvL06+//qrw8HA+PhAAAMAPGWN0/PhxxcTEKCCg6OOzfvsJZXl5efrvf/+ryZMnq2fPntq4caNiY2OVmJjoboDXr18vl8ul+Ph493IxMTFq1qyZ1qxZU2Rzm52d7fF53wcPHlSTJk1KNQ8AAADO3S+//KILLrigyPv9trk9fPiwTpw4oUcffVQPP/ywZs6cqSVLlmjAgAFasWKFunTpotTUVFWoUEFVq1b1WDYqKkqpqalFrnvGjBmaNm1agfGXX35ZYWFhJZ4FAAAA5yYrK0u33XabwsPDzzjPb5vbvLw8SVLfvn01fvx4SVLLli21Zs0avfDCC+rSpUuRyxpjznh6QWJioiZMmOC+nZGRoVq1aqlfv36qVKlSCSUoyOVyKSUlRXFxcXI6naW2nbJgpywSefyZnbJI5PFndsoikcef2SmLVHZ5MjIydNttt531FFK/bW7PP/98BQUFFThdoHHjxlq9erUkKTo6Wjk5OUpLS/M4env48GF16NChyHUHBwcrODi4wLjT6SyTJ1lZbacs2CmLRB5/ZqcsEnn8mZ2ySOTxZ3bKIpV+Hm/X7bfXua1QoYLatWunnTt3eoz/+OOPqlOnjiSpTZs2cjqdSklJcd9/6NAhbdu27YzNLQAAAOzJp0duT5w4oV27drlv79mzR5s2bVJERIRq166tu+++WzfccIM6d+6sbt26acmSJfroo4/0xRdfSJIqV66s4cOHa+LEiYqMjFRERIQmTZqk5s2bq0ePHj5KBQAAAF/xaXO7bt06devWzX07/zzYoUOHauHCherfv79eeOEFzZgxQ2PGjFGjRo307rvv6vLLL3cvM3v2bAUFBWngwIE6efKkunfvroULFyowMLDM8wAAAMC3fNrcdu3aVWe7zO6tt96qW2+9tcj7Q0JCNHfuXM2dO7ekywMAAEA547fn3AIAAABW0dwCAADANmhuAQAAYBs0twAAALANmlsAAADYBs0tAAAAbIPmFgAAALZBcwsAAADboLkFAACAbdDcAgAAwDZ8+vG7ALyU1N/a/ACn1GKQNGOQlOeysJ3F1rYDAMXFzzWUEo7cAgAAwDZobgEAAGAbNLcAAACwDZpbAAAA2AbNLQAAAGyD5hYAAAC2QXMLAAAA26C5BQAAgG3Q3AIAAMA2aG4BAABgGzS3AAAAsA2aWwAAANgGzS0AAABsg+YWAAAAtkFzCwAAANuguQUAAIBt0NwCAADANmhuAQAAYBs0twAAALANmlsAAADYBs0tAAAAbIPmFgAAALZBcwsAAADboLkFAACAbdDcAgAAwDZobgEAAGAbPm1uV61apT59+igmJkYOh0Pvv/9+kXNHjhwph8OhOXPmeIxnZ2frrrvu0vnnn6+KFSvqmmuu0YEDB0q3cAAAAPglnza3mZmZatGihZ555pkzznv//ff17bffKiYmpsB948aN0+LFi/Xmm29q9erVOnHihHr37q3c3NzSKhsAAAB+KsiXG+/Vq5d69ep1xjkHDx7UnXfeqaVLl+rqq6/2uC89PV2vvPKKXnvtNfXo0UOS9Prrr6tWrVpavny5evbsWWq1AwAAwP/4tLk9m7y8PA0ePFh33323mjZtWuD+9evXy+VyKT4+3j0WExOjZs2aac2aNUU2t9nZ2crOznbfzsjIkCS5XC65XK4STvH/5a+7NLdRVuyURSoHeQKclqa7AoI8/vV+Qf/L7/f7xiLy+C87ZZHKQR5+rvnvvrGorPJ4u36HMcaUaiVecjgcWrx4sfr16+cemzFjhlasWKGlS5fK4XCobt26GjdunMaNGydJSk5O1i233OLRqEpSfHy8YmNj9eKLLxa6raSkJE2bNq3AeHJyssLCwkosEwAAAEpGVlaWEhISlJ6erkqVKhU5z2+P3K5fv15PPfWUNmzYIIfDYWlZY8wZl0lMTNSECRPctzMyMlSrVi3Fx8ef8cE6Vy6XSykpKYqLi5PTae0vVn9jpyxSOcgzY5Cl6a6AIKU0v0FxW9+SM++U9wsmvmGxsNLn9/vGIvL4LztlkcpBHn6u+e++sais8uS/0n42ftvcfvnllzp8+LBq167tHsvNzdXEiRM1Z84c7d27V9HR0crJyVFaWpqqVq3qnnf48GF16NChyHUHBwcrODi4wLjT6SyTJ1lZbacs2CmL5Md58or3Uo8z75ScVpb1x+z/47f7ppjI47/slEXy4zz8XPPffVNMpZ3H23X77XVuBw8erC1btmjTpk3ur5iYGN19991aunSpJKlNmzZyOp1KSUlxL3fo0CFt27btjM0tAAAA7MmnR25PnDihXbt2uW/v2bNHmzZtUkREhGrXrq3IyEiP+U6nU9HR0WrUqJEkqXLlyho+fLgmTpyoyMhIRUREaNKkSWrevLn76gkAAAD45/Bpc7tu3Tp169bNfTv/PNihQ4dq4cKFXq1j9uzZCgoK0sCBA3Xy5El1795dCxcuVGBgYGmUDAAAAD/m0+a2a9eusnKxhr179xYYCwkJ0dy5czV37twSrAwAAADlkd+ecwsAAABYRXMLAAAA26C5BQAAgG347XVuAQDAaZL6W5sf4JRaDPrrwxKsXBc2abG17QB+hiO3AAAAsA2aWwAAANgGzS0AAABsg+YWAAAAtkFzCwAAANuguQUAAIBt0NwCAADANmhuAQAAYBs0twAAALANmlsAAADYBs0tAAAAbIPmFgAAALZBcwsAAADboLkFAACAbdDcAgAAwDZobgEAAGAbQb4uAH4iqb+1+QFOqcUgacYgKc9lYTuLrW0HAADAAo7cAgAAwDY4cgsAsCdekQL+kThyCwAAANuguQUAAIBt0NwCAADANmhuAQAAYBs0twAAALANmlsAAADYBs0tAAAAbIPmFgAAALZBcwsAAADboLkFAACAbdDcAgAAwDZobgEAAGAbNLcAAACwDZpbAAAA2IZPm9tVq1apT58+iomJkcPh0Pvvv+++z+VyacqUKWrevLkqVqyomJgYDRkyRL/++qvHOrKzs3XXXXfp/PPPV8WKFXXNNdfowIEDZZwEAAAA/sCnzW1mZqZatGihZ555psB9WVlZ2rBhg/79739rw4YNeu+99/Tjjz/qmmuu8Zg3btw4LV68WG+++aZWr16tEydOqHfv3srNzS2rGAAAAPATQb7ceK9evdSrV69C76tcubJSUlI8xubOnatLLrlE+/fvV+3atZWenq5XXnlFr732mnr06CFJev3111WrVi0tX75cPXv2LPUMAAAA8B8+bW6tSk9Pl8PhUJUqVSRJ69evl8vlUnx8vHtOTEyMmjVrpjVr1hTZ3GZnZys7O9t9OyMjQ9Jfp0K4XK5Sqz9/3aW5jWILcFqa7goI8vjX+wX9MLv8fN9I/+j94/f7xiLylCG7fd+Qx+Nf7xf0v+emX3/fFENZ5fF2/Q5jjCnVSrzkcDi0ePFi9evXr9D7//zzT11++eW66KKL9Prrr0uSkpOTdcstt3g0qpIUHx+v2NhYvfjii4WuKykpSdOmTSswnpycrLCwsHMLAgAAgBKXlZWlhIQEpaenq1KlSkXOKxdHbl0ul2688Ubl5eXpueeeO+t8Y4wcDkeR9ycmJmrChAnu2xkZGapVq5bi4+PP+GCdK5fLpZSUFMXFxcnptPYXa6mbMcjSdFdAkFKa36C4rW/JmXfK+wUT37BYWNnw630j/aP3j9/vG4vIU4bs9n1DHv/OY4Fff98UQ1nlyX+l/Wz8vrl1uVwaOHCg9uzZo88//9yj+YyOjlZOTo7S0tJUtWpV9/jhw4fVoUOHItcZHBys4ODgAuNOp7NMnmRltR1L8or3UoIz75ScVpb1t9x/45f7RmL/yI/3TTGRpwzY7fuGPJL8OE8x+OX3zTko7Tzertuvm9v8xvann37SihUrFBkZ6XF/mzZt5HQ6lZKSooEDB0qSDh06pG3btmnWrFm+KBnA2ST1tzY/wCm1GPTXUR4rv9CSFlvbDgDAFnza3J44cUK7du1y396zZ482bdqkiIgIxcTE6LrrrtOGDRv08ccfKzc3V6mpqZKkiIgIVahQQZUrV9bw4cM1ceJERUZGKiIiQpMmTVLz5s3dV08AAADAP4dPm9t169apW7du7tv558EOHTpUSUlJ+vDDDyVJLVu29FhuxYoV6tq1qyRp9uzZCgoK0sCBA3Xy5El1795dCxcuVGBgYJlkAAAAgP/waXPbtWtXneliDd5cyCEkJERz587V3LlzS7I0AAAAlEM+/YQyAAAAoCTR3AIAAMA2aG4BAABgGzS3AAAAsA2aWwAAANiG5eb25MmTysrKct/et2+f5syZo2XLlpVoYQAAAIBVlpvbvn376tVXX5Uk/fHHH2rfvr2eeOIJ9e3bV88//3yJFwgAAAB4y3Jzu2HDBnXq1EmS9M477ygqKkr79u3Tq6++qqeffrrECwQAAAC8Zbm5zcrKUnh4uCRp2bJlGjBggAICAnTppZdq3759JV4gAAAA4C3LzW39+vX1/vvv65dfftHSpUsVHx8vSTp8+LAqVapU4gUCAAAA3rLc3D7wwAOaNGmS6tatq/bt2+uyyy6T9NdR3FatWpV4gQAAAIC3gqwucN111+nyyy/XoUOH1KJFC/d49+7d1b9//xItDgAAALDCUnN76tQphYSEaNOmTQWO0l5yySUlWhgAAABglaXTEoKCglSnTh3l5uaWVj0AAABAsVk+5/b+++9XYmKijh07Vhr1AAAAAMVm+Zzbp59+Wrt27VJMTIzq1KmjihUrety/YcOGEisOAAAAsMJyc9uvX79SKAMAAAA4d5ab26lTp5ZGHQAAAMA5s3zOrST98ccfevnllz3Ovd2wYYMOHjxYosUBAAAAVlg+crtlyxb16NFDlStX1t69e3X77bcrIiJCixcv1r59+/Tqq6+WRp0AAADAWVk+cjthwgQNGzZMP/30k0JCQtzjvXr10qpVq0q0OAAAAMAKy83t2rVrNXLkyALjNWvWVGpqaokUBQAAABSH5eY2JCREGRkZBcZ37typatWqlUhRAAAAQHFYbm779u2rBx98UC6XS5LkcDi0f/9+3XPPPbr22mtLvEAAAADAW5ab28cff1xHjhxR9erVdfLkSXXp0kX169dXeHi4HnnkkdKoEQAAAPCK5aslVKpUSatXr9bnn3+uDRs2KC8vT61bt1aPHj1Koz4AAADAa5ab23xXXHGFrrjiCkl/XfcWAAAA8DXLpyXMnDlTb731lvv2wIEDFRkZqZo1a2rz5s0lWhwAAABgheXm9sUXX1StWrUkSSkpKUpJSdGnn36qXr166e677y7xAgEAAABvWT4t4dChQ+7m9uOPP9bAgQMVHx+vunXrqn379iVeIAAAAOAty0duq1atql9++UWStGTJEvcbyYwxys3NLdnqAAAAAAssH7kdMGCAEhIS1KBBAx09elS9evWSJG3atEn169cv8QIBAAAAb1lubmfPnq26devql19+0axZs3TeeedJ+ut0hVGjRpV4gQAAAIC3LDe3TqdTkyZNKjA+bty4kqgHAAAAKDavm9tXX3210PHKlSurUaNGuuiii0qsKAAAgHIjqb+1+QFOqcUgacYgKc9lYTuLrW3nH8rr5nbs2LGFjp84cUJ5eXm66qqrlJycrPDw8BIrDgAAALDC66slpKWlFfqVnZ2tb775Rvv379e0adNKs1YAAADgjCxfCqzACgIC1K5dOz3xxBP66KOPLC27atUq9enTRzExMXI4HHr//fc97jfGKCkpSTExMQoNDVXXrl21fft2jznZ2dm66667dP7556tixYq65pprdODAgXONBQAAgHLonJvbfPXr17fcVGZmZqpFixZ65plnCr1/1qxZevLJJ/XMM89o7dq1io6OVlxcnI4fP+6eM27cOC1evFhvvvmmVq9erRMnTqh3795ccxcAAOAfyPLVEoqye/duXXDBBZaW6dWrl/s6uX9njNGcOXN03333acCAAZKkRYsWKSoqSsnJyRo5cqTS09P1yiuv6LXXXnN/mMTrr7+uWrVqafny5erZs+e5hQIAAEC5cs7NrTFGGzdu1MSJE9WnT5+SqEmStGfPHqWmpio+Pt49FhwcrC5dumjNmjUaOXKk1q9fL5fL5TEnJiZGzZo105o1a4psbrOzs5Wdne2+nZGRIUlyuVxyuSy8a9Gi/HWX5jaKLcBpaborIMjjX+8X9MPs8vN9I9lr/9gpSzH4/XPNIr/OY7fnGnk8/vV+QX6ulbay+jng7fodxhjjzcSqVavK4XAUGD9x4oRyc3N15ZVX6q233nJ/qINVDodDixcvVr9+/SRJa9asUceOHXXw4EHFxMS4540YMUL79u3T0qVLlZycrFtuucWjUZWk+Ph4xcbG6sUXXyx0W0lJSYW++S05OVlhYWHFqh8AAAClJysrSwkJCUpPT1elSpWKnOf1nwxz5swpdLxSpUq66KKL1LhxY8tFeuPvDbUxptAm28qcxMRETZgwwX07IyNDtWrVUnx8/BkfrHPlcrmUkpKiuLg4OZ3W/sordTMGWZruCghSSvMbFLf1LTnzTnm/YOIbFgsrG369byR77R87ZSkGv3+uWeTXeez2XCOP/+axU5ZiKKufA/mvtJ+N183t0KFDi11McURHR0uSUlNTVaNGDff44cOHFRUV5Z6Tk5OjtLQ0Va1a1WNOhw4dilx3cHCwgoODC4w7nc4y+eFcVtuxxMpFpE/jzDslp5Vl/S333/jlvpHstX/slOUc+O1zrZj8Mo/dnmvkkeSneeyU5RyU9s8Bb9ddYldLKGmxsbGKjo5WSkqKeywnJ0crV650N65t2rSR0+n0mHPo0CFt27btjM0tAAAA7KnErpZQHCdOnNCuXbvct/fs2aNNmzYpIiJCtWvX1rhx4zR9+nQ1aNBADRo00PTp0xUWFqaEhARJf3307/DhwzVx4kRFRkYqIiJCkyZNUvPmzd1XTwAAAMA/h0+b23Xr1qlbt27u2/nnwQ4dOlQLFy7U5MmTdfLkSY0aNUppaWlq3769li1b5vERv7Nnz1ZQUJAGDhyokydPqnv37lq4cKECAwPLPA8AAAB8y6fNbdeuXXWmizU4HA4lJSUpKSmpyDkhISGaO3eu5s6dWwoVAgAAoDwp9jm3u3bt0tKlS3Xy5ElJOmOTCgAAAJQFy83t0aNH1aNHDzVs2FBXXXWVDh06JEm67bbbNHHixBIvEAAAAPCW5eZ2/PjxCgoK0v79+z0+8OCGG27QkiVLSrQ4AAAAwArL59wuW7ZMS5cu1QUXXOAx3qBBA+3bt6/ECgMAAACssnzkNjMzs9CPqP39998L/WAEAAAAoKxYPnLbuXNnvfrqq3rooYck/XVFg7y8PD322GMel/UCfCqpv7X5AU6pxaC/PkLRyqfFJC22th0AAFCqLDe3jz32mLp27ap169YpJydHkydP1vbt23Xs2DF99dVXpVEjAAAA4BXLzW2TJk20ZcsWPf/88woMDFRmZqYGDBig0aNHq0aNGqVRIwCgrPCqB4Byrlgf4hAdHa1p06aVdC0AAADAObHc3K5ateqM93fu3LnYxQAAAADnwnJz27Vr1wJjDofD/f/c3NxzKggAAAAoLsuXAktLS/P4Onz4sJYsWaJ27dpp2bJlpVEjAAAA4BXLR24rV65cYCwuLk7BwcEaP3681q9fXyKFAQAAAFZZPnJblGrVqmnnzp0ltToAAADAMstHbrds2eJx2xijQ4cO6dFHH1WLFi1KrDAAAADAKsvNbcuWLeVwOGSM8Ri/9NJLNX/+/BIrDAAAALDKcnO7Z88ej9sBAQGqVq2aQkJCSqwoAAAAoDgsN7d16tQpjToAAACAc2a5uX366ae9njtmzBirqwcAAACKzXJzO3v2bB05ckRZWVmqUqWKJOmPP/5QWFiYqlWr5p7ncDhobgEAAFCmLF8K7JFHHlHLli21Y8cOHTt2TMeOHdOOHTvUunVrPfzww9qzZ4/27Nmjn3/+uTTqBQAAAIpkubn997//rblz56pRo0busUaNGmn27Nm6//77S7Q4AAAAwArLze2hQ4fkcrkKjOfm5uq3334rkaIAAACA4rDc3Hbv3l2333671q1b577W7bp16zRy5Ej16NGjxAsEAAAAvGW5uZ0/f75q1qypSy65RCEhIQoODlb79u1Vo0YNvfzyy6VRIwAAAOAVy1dLqFatmj755BP9+OOP+uGHH2SMUePGjdWwYcPSqA8AAADwmuXmNl/Dhg1paAEAAOBXvGpuJ0yYoIceekgVK1bUhAkTzjj3ySefLJHCAAAAAKu8am43btzovkLCxo0bi5zncDhKpioAAACgGLxqblesWFHo/wEAAAB/YvlqCQAAAIC/svyGsszMTD366KP67LPPdPjwYeXl5Xncz8fuAgAAwFcsN7e33XabVq5cqcGDB6tGjRqcZwsAAAC/Ybm5/fTTT/Xf//5XHTt2LI16AAAAgGKzfM5t1apVFRERURq1AAAAAOfEcnP70EMP6YEHHlBWVlZp1AMAAAAUm+XTEp544gnt3r1bUVFRqlu3rpxOp8f9GzZsKLHiAAAAACssN7f9+vUrhTIKd+rUKSUlJemNN95QamqqatSooWHDhun+++9XQMBfB52NMZo2bZpeeuklpaWlqX379nr22WfVtGnTMqsTAAAA/sFyczt16tTSqKNQM2fO1AsvvKBFixapadOmWrdunW655RZVrlxZY8eOlSTNmjVLTz75pBYuXKiGDRvq4YcfVlxcnHbu3Knw8PAyqxUAAAC+Z7m5LUtff/21+vbtq6uvvlqSVLduXf3f//2f1q1bJ+mvo7Zz5szRfffdpwEDBkiSFi1apKioKCUnJ2vkyJE+qx3AP0RSf2vzA5xSi0HSjEFSnsvCdhZb2w4A/EN53dwGBAQUek3bSpUqqVGjRpo8ebK7wSwpl19+uV544QX9+OOPatiwoTZv3qzVq1drzpw5kqQ9e/YoNTVV8fHx7mWCg4PVpUsXrVmzpsjmNjs7W9nZ2e7bGRkZkiSXyyWXy8IvG4vy112a2yi2AOfZ55zGFRDk8a/3C5ZRdvJ4/Ov9gmWQx05ZJPL4cx47ZZHI48957JSlGMqqv/F2/Q5jjPFm4gcffFDo+B9//KHvvvtOCxYs0KJFi3T99dd7X+VZGGN07733aubMmQoMDFRubq4eeeQRJSYmSpLWrFmjjh076uDBg4qJiXEvN2LECO3bt09Lly4tdL1JSUmaNm1agfHk5GSFhYWVWP0AAAAoGVlZWUpISFB6eroqVapU5Dyv/2To27dvkfcNHTpUTZo00eOPP16ize1bb72l119/XcnJyWratKk2bdqkcePGKSYmRkOHDnXP+/sRZWPMGT85LTExURMmTHDfzsjIUK1atRQfH3/GB+tcuVwupaSkKC4ursBVJnxuxiBL010BQUppfoPitr4lZ94p7xdMfMNiYcVEHv/NY6csEnn8OY+dskjk8ec8dspSDGXV3+S/0n42JXbObXx8vO6///6SWp0k6e6779Y999yjG2+8UZLUvHlz7du3TzNmzNDQoUMVHR0tSe4rKeQ7fPiwoqKiilxvcHCwgoODC4w7nc4yaTrLajuWWDn37zTOvFNyWlm2rHKTR5Kf5rFTFok8/+OXeeyURSLP//hlHjtlOQel3d94u27LH+JQlJMnTyokJKSkVifpr8PP+Zf8yhcYGKi8vDxJUmxsrKKjo5WSkuK+PycnRytXrlSHDh1KtBYAAAD4vxI7cjtv3jy1atWqpFYnSerTp48eeeQR1a5dW02bNtXGjRv15JNP6tZbb5X01+kI48aN0/Tp09WgQQM1aNBA06dPV1hYmBISEkq0FgAAAPg/r5vb089RPV16errWrVun3bt368svvyyxwiRp7ty5+ve//61Ro0bp8OHDiomJ0ciRI/XAAw+450yePFknT57UqFGj3B/isGzZMq5xCwAA8A/kdXO7cePGQscrVaqkK6+8UqNGjVKdOnVKrDBJCg8P15w5c9yX/iqMw+FQUlKSkpKSSnTbAAAAKH+8bm5XrFhRmnUAAAAA56zE3lAGAAAA+BrNLQAAAGyjxK6W8I/EZ8oDAAD4FY7cAgAAwDa8am5bt26ttLQ0SdKDDz6orKysUi0KAAAAKA6vmtsdO3YoMzNTkjRt2jSdOHGiVIsCAAAAisOrc25btmypW265RZdffrmMMXr88cd13nnnFTr39A9YAAAAAMqSV83twoULNXXqVH388cdyOBz69NNPFRRUcFGHw0FzCwAAAJ/xqrlt1KiR3nzzTUlSQECAPvvsM1WvXr1UCwMAAACssnwpsLy8vNKoAwAAADhnxbrO7e7duzVnzhzt2LFDDodDjRs31tixY1WvXr2Srg8AAADwmuXr3C5dulRNmjTRd999p4svvljNmjXTt99+q6ZNmyolJaU0agQAAAC8YvnI7T333KPx48fr0UcfLTA+ZcoUxcXFlVhxAAAAgBWWj9zu2LFDw4cPLzB+66236vvvvy+RogAAAIDisNzcVqtWTZs2bSowvmnTJq6gAAAAAJ+yfFrC7bffrhEjRujnn39Whw4d5HA4tHr1as2cOVMTJ04sjRoBAAAAr1hubv/9738rPDxcTzzxhBITEyVJMTExSkpK0pgxY0q8QAAAAMBblptbh8Oh8ePHa/z48Tp+/LgkKTw8vMQLAwAAAKwq1nVu89HUAgAAwJ9YfkMZAAAA4K9obgEAAGAbNLcAAACwDUvNrcvlUrdu3fTjjz+WVj0AAABAsVlqbp1Op7Zt2yaHw1Fa9QAAAADFZvm0hCFDhuiVV14pjVoAAACAc2L5UmA5OTl6+eWXlZKSorZt26pixYoe9z/55JMlVhwAAABgheXmdtu2bWrdurUkFTj3ltMVAAAA4EuWm9sVK1aURh0AAADAOSv2pcB27dqlpUuX6uTJk5IkY0yJFQUAAAAUh+Xm9ujRo+revbsaNmyoq666SocOHZIk3XbbbZo4cWKJFwgAAAB4y3JzO378eDmdTu3fv19hYWHu8RtuuEFLliwp0eIAAAAAKyyfc7ts2TItXbpUF1xwgcd4gwYNtG/fvhIrDAAAALDK8pHbzMxMjyO2+X7//XcFBweXSFEAAABAcVhubjt37qxXX33VfdvhcCgvL0+PPfaYunXrVqLFAQAAAFZYPi3hscceU9euXbVu3Trl5ORo8uTJ2r59u44dO6avvvqqNGoEAAAAvGL5yG2TJk20ZcsWXXLJJYqLi1NmZqYGDBigjRs3ql69eqVRIwAAAOCVYl3nNjo6WtOmTdPHH3+sTz75RA8//LBq1KhR0rVJkg4ePKibb75ZkZGRCgsLU8uWLbV+/Xr3/cYYJSUlKSYmRqGhoeratau2b99eKrUAAADAv1k+LUGS0tLS9Morr2jHjh1yOBxq3LixbrnlFkVERJRocWlpaerYsaO6deumTz/9VNWrV9fu3btVpUoV95xZs2bpySef1MKFC9WwYUM9/PDDiouL086dOxUeHl6i9QAAAMC/WT5yu3LlSsXGxurpp59WWlqajh07pqefflqxsbFauXJliRY3c+ZM1apVSwsWLNAll1yiunXrqnv37u7TH4wxmjNnju677z4NGDBAzZo106JFi5SVlaXk5OQSrQUAAAD+z/KR29GjR2vgwIF6/vnnFRgYKEnKzc3VqFGjNHr0aG3btq3Eivvwww/Vs2dPXX/99Vq5cqVq1qypUaNG6fbbb5ck7dmzR6mpqYqPj3cvExwcrC5dumjNmjUaOXJkoevNzs5Wdna2+3ZGRoYkyeVyyeVyeV9ggNNSHldAkMe/3i9ooabislMWiTz+nMdOWSTy+HMeO2WRyOPPeeyUpRjyeydLPdQ5bOdsHMYYY2XFoaGh2rRpkxo1auQxvnPnTrVs2VInT560srozCgkJkSRNmDBB119/vb777juNGzdOL774ooYMGaI1a9aoY8eOOnjwoGJiYtzLjRgxQvv27dPSpUsLXW9SUpKmTZtWYDw5ObnQa/gCAADAt7KyspSQkKD09HRVqlSpyHmWj9y2bt1aO3bsKNDc7tixQy1btrRc6Jnk5eWpbdu2mj59uiSpVatW2r59u55//nkNGTLEPc/hcHgsZ4wpMHa6xMRETZgwwX07IyNDtWrVUnx8/BkfrAJmDPJ+rv76Cy2l+Q2K2/qWnHmnvF8w8Q1L2ykWO2WRyOPPeeyURSKPP+exUxaJPP6cx05ZisHlciklJUVxcXFyOq0dxbYi/5X2s/Gqud2yZYv7/2PGjNHYsWO1a9cuXXrppZKkb775Rs8++6weffTRYpRatBo1aqhJkyYeY40bN9a7774r6a+rNkhSamqqx9UaDh8+rKioqCLXGxwcXOinqTmdTms7Ja94h9+deafktLJsKT5R3OyURSLP//hlHjtlkcjzP36Zx05ZJPL8j1/msVOWc2C5jyrG+r3hVXPbsmVLORwOnX4Gw+TJkwvMS0hI0A033OBliWfXsWNH7dy502Psxx9/VJ06dSRJsbGxio6OVkpKilq1aiVJysnJ0cqVKzVz5swSqwMAAADlg1fN7Z49e0q7jkKNHz9eHTp00PTp0zVw4EB99913eumll/TSSy9J+ut0hHHjxmn69Olq0KCBGjRooOnTpyssLEwJCQk+qRkAAAC+41Vzm3+ktKy1a9dOixcvVmJioh588EHFxsZqzpw5GjTo/5/bMnnyZJ08eVKjRo1SWlqa2rdvr2XLlnGNWwAAgH+gYn2Iw8GDB/XVV1/p8OHDysvL87hvzJgxJVJYvt69e6t3795F3u9wOJSUlKSkpKQS3S4AAADKH8vN7YIFC/Svf/1LFSpUUGRkpMdVCRwOR4k3twAAAIC3LDe3DzzwgB544AElJiYqIMDyB5wBAAAApcZyd5qVlaUbb7yRxhYAAAB+x3KHOnz4cL399tulUQsAAABwTiyfljBjxgz17t1bS5YsUfPmzQtcUPfJJ58sseIAAAAAKyw3t9OnT9fSpUvdH7/79zeUAQAAAL5iubl98sknNX/+fA0bNqwUygEAAACKz/I5t8HBwerYsWNp1AIAAACcE8vN7dixYzV37tzSqAUAAAA4J5ZPS/juu+/0+eef6+OPP1bTpk0LvKHsvffeK7HiAAAAACssN7dVqlTRgAEDSqMWAAAA4JwU6+N3AQAAAH/Ex4wBAADANiwfuY2NjT3j9Wx//vnncyoIAAAAKC7Lze24ceM8brtcLm3cuFFLlizR3XffXVJ1AQAAAJZZbm7Hjh1b6Pizzz6rdevWnXNBAAAAQHGV2Dm3vXr10rvvvltSqwMAAAAsK7Hm9p133lFERERJrQ4AAACwzPJpCa1atfJ4Q5kxRqmpqTpy5Iiee+65Ei0OAAAAsMJyc9uvXz+P2wEBAapWrZq6du2qiy66qKTqAgAAACyz3NxOnTq1NOoAAAAAzhkf4gAAAADb8PrIbUBAwBk/vEGSHA6HTp06dc5FAQAAAMXhdXO7ePHiIu9bs2aN5s6dK2NMiRQFAAAAFIfXzW3fvn0LjP3www9KTEzURx99pEGDBumhhx4q0eIAAAAAK4p1zu2vv/6q22+/XRdffLFOnTqlTZs2adGiRapdu3ZJ1wcAAAB4zVJzm56erilTpqh+/fravn27PvvsM3300Udq1qxZadUHAAAAeM3r0xJmzZqlmTNnKjo6Wv/3f/9X6GkKAAAAgC953dzec889Cg0NVf369bVo0SItWrSo0HnvvfdeiRUHAAAAWOF1cztkyJCzXgoMAAAA8CWvm9uFCxeWYhkAAADAueMTygAAAGAbNLcAAACwDZpbAAAA2AbNLQAAAGyD5hYAAAC2QXMLAAAA2yhXze2MGTPkcDg0btw495gxRklJSYqJiVFoaKi6du2q7du3+65IAAAA+Ey5aW7Xrl2rl156SRdffLHH+KxZs/Tkk0/qmWee0dq1axUdHa24uDgdP37cR5UCAADAV8pFc3vixAkNGjRI8+bNU9WqVd3jxhjNmTNH9913nwYMGKBmzZpp0aJFysrKUnJysg8rBgAAgC94/QllvjR69GhdffXV6tGjhx5++GH3+J49e5Samqr4+Hj3WHBwsLp06aI1a9Zo5MiRha4vOztb2dnZ7tsZGRmSJJfLJZfL5X1hAU5LOVwBQR7/er+ghZqKy05ZJPL4cx47ZZHI48957JRFIo8/57FTlmLI750s9VDnsJ2zcRhjTKlWco7efPNNPfLII1q7dq1CQkLUtWtXtWzZUnPmzNGaNWvUsWNHHTx4UDExMe5lRowYoX379mnp0qWFrjMpKUnTpk0rMJ6cnKywsLBSywIAAIDiycrKUkJCgtLT01WpUqUi5/n1kdtffvlFY8eO1bJlyxQSElLkPIfD4XHbGFNg7HSJiYmaMGGC+3ZGRoZq1aql+Pj4Mz5YBcwY5P1c/fUXWkrzGxS39S058055v2DiG5a2Uyx2yiKRx5/z2CmLRB5/zmOnLBJ5/DmPnbIUg8vlUkpKiuLi4uR0WjuKbUX+K+1n49fN7fr163X48GG1adPGPZabm6tVq1bpmWee0c6dOyVJqampqlGjhnvO4cOHFRUVVeR6g4ODFRwcXGDc6XRa2yl5xTv87sw7JaeVZUvxieJmpywSef7HL/PYKYtEnv/xyzx2yiKR53/8Mo+dspwDy31UMdbvDb9+Q1n37t21detWbdq0yf3Vtm1bDRo0SJs2bdKFF16o6OhopaSkuJfJycnRypUr1aFDBx9WDgAAAF/w6yO34eHhatasmcdYxYoVFRkZ6R4fN26cpk+frgYNGqhBgwaaPn26wsLClJCQ4IuSAQAA4EN+3dx6Y/LkyTp58qRGjRqltLQ0tW/fXsuWLVN4eLivSwMAAEAZK3fN7RdffOFx2+FwKCkpSUlJST6pBwAAAP7Dr8+5BQAAAKyguQUAAIBt0NwCAADANmhuAQAAYBs0twAAALANmlsAAADYBs0tAAAAbIPmFgAAALZBcwsAAADboLkFAACAbdDcAgAAwDZobgEAAGAbNLcAAACwDZpbAAAA2AbNLQAAAGyD5hYAAAC2QXMLAAAA26C5BQAAgG3Q3AIAAMA2aG4BAABgGzS3AAAAsA2aWwAAANgGzS0AAABsg+YWAAAAtkFzCwAAANuguQUAAIBt0NwCAADANmhuAQAAYBs0twAAALANmlsAAADYBs0tAAAAbIPmFgAAALZBcwsAAADboLkFAACAbdDcAgAAwDZobgEAAGAbNLcAAACwDb9ubmfMmKF27dopPDxc1atXV79+/bRz506POcYYJSUlKSYmRqGhoeratau2b9/uo4oBAADgS37d3K5cuVKjR4/WN998o5SUFJ06dUrx8fHKzMx0z5k1a5aefPJJPfPMM1q7dq2io6MVFxen48eP+7ByAAAA+EKQrws4kyVLlnjcXrBggapXr67169erc+fOMsZozpw5uu+++zRgwABJ0qJFixQVFaXk5GSNHDnSF2UDAADAR/y6uf279PR0SVJERIQkac+ePUpNTVV8fLx7TnBwsLp06aI1a9YU2dxmZ2crOzvbfTsjI0OS5HK55HK5vC8owGmpfldAkMe/3i9ooabislMWiTz+nMdOWSTy+HMeO2WRyOPPeeyUpRjyeydLPdQ5bOdsHMYYU6qVlBBjjPr27au0tDR9+eWXkqQ1a9aoY8eOOnjwoGJiYtxzR4wYoX379mnp0qWFrispKUnTpk0rMJ6cnKywsLDSCQAAAIBiy8rKUkJCgtLT01WpUqUi55WbI7d33nmntmzZotWrVxe4z+FweNw2xhQYO11iYqImTJjgvp2RkaFatWopPj7+jA9WATMGeT9Xf/2FltL8BsVtfUvOvFPeL5j4hqXtFIudskjk8ec8dsoikcef89gpi0Qef85jpyzF4HK5lJKSori4ODmd1o5iW5H/SvvZlIvm9q677tKHH36oVatW6YILLnCPR0dHS5JSU1NVo0YN9/jhw4cVFRVV5PqCg4MVHBxcYNzpdFrbKXnFO/zuzDslp5VlS/GJ4manLBJ5/scv89gpi0Se//HLPHbKIpHnf/wyj52ynAPLfVQx1u8Nv75agjFGd955p9577z19/vnnio2N9bg/NjZW0dHRSklJcY/l5ORo5cqV6tChQ1mXCwAAAB/z6yO3o0ePVnJysj744AOFh4crNTVVklS5cmWFhobK4XBo3Lhxmj59uho0aKAGDRpo+vTpCgsLU0JCgo+rBwAAQFnz6+b2+eeflyR17drVY3zBggUaNmyYJGny5Mk6efKkRo0apbS0NLVv317Lli1TeHh4GVcLAAAAX/Pr5tabCzk4HA4lJSUpKSmp9AsCAACAX/Prc24BAAAAK2huAQAAYBs0twAAALANmlsAAADYBs0tAAAAbIPmFgAAALZBcwsAAADboLkFAACAbdDcAgAAwDZobgEAAGAbNLcAAACwDZpbAAAA2AbNLQAAAGyD5hYAAAC2QXMLAAAA26C5BQAAgG3Q3AIAAMA2aG4BAABgGzS3AAAAsA2aWwAAANgGzS0AAABsg+YWAAAAtkFzCwAAANuguQUAAIBt0NwCAADANmhuAQAAYBs0twAAALANmlsAAADYBs0tAAAAbIPmFgAAALZBcwsAAADboLkFAACAbdDcAgAAwDZobgEAAGAbQb4uAAAAAH4kqb+1+QFOqcUgacYgKc9lYTuLrW3H23JKZa0AAACAD9DcAgAAwDZs09w+99xzio2NVUhIiNq0aaMvv/zS1yUBAACgjNmiuX3rrbc0btw43Xfffdq4caM6deqkXr16af/+/b4uDQAAAGXIFs3tk08+qeHDh+u2225T48aNNWfOHNWqVUvPP/+8r0sDAABAGSr3V0vIycnR+vXrdc8993iMx8fHa82aNYUuk52drezsbPft9PR0SdKxY8fkcll4l1+OtVpdAUZZWVk6mmPkzLOw4NGj1jZUHHbKIpHHn/PYKYtEHn/OY6csEnn8OY+dskh+m+f48eOSJGPMmSeacu7gwYNGkvnqq688xh955BHTsGHDQpeZOnWqkcQXX3zxxRdffPHFVzn7+uWXX87YG5b7I7f5HA6Hx21jTIGxfImJiZowYYL7dl5eno4dO6bIyMgilykJGRkZqlWrln755RdVqlSp1LZTFuyURSKPP7NTFok8/sxOWSTy+DM7ZZHKLo8xRsePH1dMTMwZ55X75vb8889XYGCgUlNTPcYPHz6sqKioQpcJDg5WcHCwx1iVKlVKq8QCKlWqZIsns2SvLBJ5/Jmdskjk8Wd2yiKRx5/ZKYtUNnkqV6581jnl/g1lFSpUUJs2bZSSkuIxnpKSog4dOvioKgAAAPhCuT9yK0kTJkzQ4MGD1bZtW1122WV66aWXtH//fv3rX//ydWkAAAAoQ7Zobm+44QYdPXpUDz74oA4dOqRmzZrpk08+UZ06dXxdmofg4GBNnTq1wCkR5ZGdskjk8Wd2yiKRx5/ZKYtEHn9mpyyS/+VxGHO26ykAAAAA5UO5P+cWAAAAyEdzCwAAANuguQUAAIBt0NwCAADANmhuAQAAYBs0tygVubm5vi6hxLhcLkl/feyfHaSlpenkyZO+LgNFsMvzLJ/d8tgJ+wZlwRf9AM2tH/jll1/0448/+rqMEvP999/rkUceUWZmpq9LOWc//PCDRowYoX379snhcPi6nHO2fft2NWnSRJ988omvSykRR44c0ZYtW7RlyxZfl3LOsrKyJEnHjx/3cSUlIzMzU7m5ubbJk88ODWH+H+x//vmnJCkvL8+X5ZyzEydO6MSJEzp8+LCk8p/HTj2Br/oBmlsfO3DggOrWrat+/frphx9+8HU552zz5s1q1qyZnE6nKlasKKn8/jLYunWrLr/8coWFhSk9Pd3X5ZyzTZs26fLLL1dGRoZeeOEFHTt2zNclnZOtW7eqa9euGjRokFq2bKmkpCRfl1Rs27Zt07XXXqsrrrhCXbt21csvv6wjR474uqxi27Ztm6655hpddtll6tChg1566SX99ttvvi6r2H788Ud99NFHkiSHw1Fuf6ZJf/3BfscddyguLk5Dhw7Vd999p4CAgHKb6fvvv3d/77Rp00bLli1TQED5bW3s1BP4sh8ov88Am3A4HGratKlycnJ09dVXa8eOHb4uqdi2bNmiDh06aPLkyUpMTHSP578kUZ5+eKalpWnIkCFKSEjQs88+q4svvlg5OTlKTU31dWnFsnnzZnXo0EF33nmn5s+fr61bt+rQoUOSyudRjl27dikuLk79+/fX22+/rfnz5+vBBx/UgQMHfF2aZT/++KO6deumpk2bavDgwerXr59GjBihSZMmae3atb4uz7Kff/5ZnTt3VrNmzTRkyBD169dPY8aM0eTJk8tlnp9++knt2rVT37599dprr0kqvw3utm3b1LFjRzmdTjVq1Ei5ubkaOnSo9uzZUy5fmcrP06RJE91xxx3q1auXhg8frj/++ENS+fqdk88uPYHP+wEDnzl16pQ5dOiQ6dGjh9mxY4fp0aOHqV+/vtm9e7cxxph169b5uELv/fTTT+a8884zw4YNc4/NnDnTDBs2zFx//fXmv//9rw+rs+6nn34yl1xyiTl27JjJy8sz119/venYsaMJCwszY8aMMV999ZWvS/Tahg0bjMPhMPfdd597rHnz5ubaa6/1YVXn5r777jO9e/d23z5+/Li56qqrzPr1681XX31lfvvtNx9WZ83YsWNNQkKCx9igQYNMhQoVzJAhQ8yOHTt8VFnxPPHEE6Zjx44eY0uXLjUNGzY0CQkJZsuWLT6qzLqjR4+aAQMGmGuuucbcddddJjw83CxYsMB9f15enu+Ks+jQoUOmXbt25u6773aPrV+/3jRv3tx8/PHHxpjylWffvn2madOmJjEx0T22fPly069fP3P06FFz8OBBH1ZXPHbpCfyhH+DIrQ8FBgYqOjpalStX1pEjR/Tmm28qKipKV199tfr166ekpCRlZGT4ukyv7NmzR9nZ2YqJidH27dvVuXNnLVmyRMeOHZPL5VLv3r31+OOPSyoff01nZmbq2LFjOn78uPr27asTJ05ozJgxeuqpp7RixQrNnj1bO3fu9HWZZ5Wbm6t33nlHd999tx5++GH3X8233XabfvzxR23evFlS+dgnpzt48KACAgLc5w4+/fTTWrp0qf71r3/pyiuv1MiRI/Xdd9/5uMqzM8Zo165dioyMlPT/z7tt1KiRevXqpQ8++EDJycnuueVBZmamcnJylJeXp9zcXOXm5io+Pl7PPPOMvvjiCy1cuFBS+ciTnp6uKlWq6F//+pemTJmiUaNGacyYMe4M5ekI7g8//KDzzjtPCQkJ7ppbt26typUra9OmTb4trhhSU1PVtGlT3X777e6xL774QitXrlSXLl3UvHlzPfDAA+XqvR926Qn8oh8o9fYZRcr/K7l///4mKSnJPR4dHW0cDod59913fVVasbz99tumZs2aJjo62vTr18/8+uuvJjc31xhjzNNPP20CAgLMd9995+MqvbN7924TFRVlnnvuOTNkyBDzww8/uO/76quvTFRUlJk3b54PK/TeiRMn3P/Pf87t2bPHREREmKlTp/qoqnPzyiuvmICAAHPzzTe7j3K+//77Jj093WzcuNE0bNjQPPDAA74u0ytTpkwxsbGx5tdffzXGGHPgwAFTqVIls2rVKvPKK6+YihUrmn379vm4Su+9/fbbJjAw0Kxdu9YYY4zL5XI/7/7zn/+YgIAA8/XXX/uyREt+/vln9//3799vJk+eXOAIrsvlMidPnvRBdd77+eefzX/+8x/3bZfLZYwxJj4+vtCfA/k/u/3ZgQMH3P+fN2+eCQ4ONgsXLjTr1q0zb7zxhnE4HOa9997zYYXW2Kkn8HU/QHNbxgp72efZZ591P5EHDx5soqOjTcuWLU2TJk3M1q1by7pES/Ly8jwyvfvuu6Zz585mzZo1HvN+//13U6NGDfPCCy+UdYle+/u+SUxMNA6Hw4SGhpqNGzd6zBkwYIAZMmRIWZdoSVG/nPLHZ8yYYWJjY833339flmUV29+fa6+88opJSkoyAwYMMKNGjTLG/P9sw4YNM926dXP/Avc3p+dYu3at6dmzpznvvPPMNddcY8LCwszIkSONMcbs2rXL1KhRw6xfv95XpVrmcrnMwIEDTcOGDd2nVGRnZxtjjMnJyTFNmjQxzzzzjC9LtOTv30cHDhwo0ODedddd5umnn/b7hjD/eXd6nQMHDjT33nuv+3ZSUpL55ptvyry24sjP4XK5zLx58wqcLta6dWszbtw4X5RWLKdOnTLGlO+e4HS+7AeCSud4MP7ut99+U1RUlPtlrNNP3o+JidGHH36o66+/Xl9++aWWL1+u2NhYtW/fXsOGDdOaNWtUoUIFH1Zf0N/z5GcaMGCAWrRooZiYGElyj584cUJRUVGKjY31ceUF/T2L9NfLjaNGjdJvv/2mBQsWaPXq1WrWrJmCgv76ljHGqF69er4su0j5efLfAf33N4rkv5P4sssu09NPP62tW7eqcePGysvL88t3GZ++f/Ly8tx5br31Vve/1apVkyR3huzsbDVt2tTv8vw9S0BAgNq2batnn31WH3zwgY4fP67rr79eN998syQpIyNDVapUUVhYmI8rL9zevXv1wQcfKC0tTfXr19fNN9+soKAgjRo1StOnT9fNN9+s119/XRdddJGkv76vQkNDFRoa6uPKC1dYnr9/H9WsWVNjxoyRJE2YMEELFizQl19+qfXr1/vV8+30LPXq1dPgwYM9nnenyz9d6d///rceeeQR9enTxxcln1FR+yY3N1dBQUG67bbbPOanpaWpSpUqatWqlY8qPrPC8gQGBkoqfz1BYVkk+bYfKLW2GW7ff/+9cTgcpk+fPu6x04/cbN682dStW9c0btzY4wjNH3/8Yfbs2VOWpXqlqDxnejPCvffea5o1a+Z+6dVfFJbl9KMa27dvNzfffLNxOBxm4sSJ5vHHHzcTJ040kZGRfvlGn7M91/5uyJAhpl69eiYrK6ssyrPsbPvHmL+OQIeGhpo1a9aY9evXmwceeMCcf/75fndEurAsZzuyPHHiRNO6dWtz9OjR0i7Psi1btpiaNWuaHj16mHbt2png4GAzbdo09/1Lly41vXr1MlWrVjWvvPKKefvtt80999xjIiIi3G+Q8SeF5XnwwQeLnP/zzz+biy66yERERJjNmzeXYaVnV1iWhx56yGNO/vfRVVddZR555BHz9NNPm+DgYL98lcCbffP3n3P333+/adCggdm7d29ZluqVs+UpTz2B1e8bY8qmH6C5LWWHDh0yHTt2NF26dHGfe5Lv9F/SCxYs8LtfxoU5U57CmqgvvvjC/Otf/zJVq1Z1v7TvL86UJf/lIWOMycrKMk8//bS59NJLTZs2bcyVV15pNm3a5IuSz8jKvsl/7v3nP/8x7dq1M6mpqWVaqzfOlic/0/79+81NN91kHA6Hady4sWnevHm5eq4V9lL28uXLzR133GEqVarkd1mMMWbv3r2mXr16ZvLkySYvL89kZGSYF1980TRp0sT89NNP7nm7du0ykydPNjExMaZJkyamXbt2ZsOGDT6svHBnynP6Obf5cnNzzaRJk0xQUJDfXf3BapaEhAQTGBhowsPD/fI9EVbzfPnll2b06NGmatWq5fq5Vh56grNl+fvvnbLsB2huS9n7779vbrzxRrNq1Srz+eefm+rVq3v8Yss/F628OFue039R//bbb2b27NmmQ4cOfvcLwJizZ/n7UbVjx46ZU6dOebxBy59Y2Tf5/vzzT3Po0KGyLNNrZ8tz+h8gxhizatUqs2nTJr+8DJjVffPtt9+awYMHm23btpV1qWeVm5trZs6caa688kqTnp7uHl+3bp2pVq1aob+Qf/nlF5OWlmbS0tLKsFLvFDfPTTfd5Hd/eBQny9ixY014eLhfnstpNc/hw4fN888/b3r06GGLPP7MapbffvvNzJkzp8z6AZrbUpaWlmY+/fRT9+38X2x9+/Z1j/n7mxBO502e0/9aO378uF/+QjPG+31TXvaP1X3j77zJc+rUqXKRydt9c3qWP//8syxLtOSzzz4zM2bMcN/Oy8szOTk55sILLyz0GtD+/j1kNY8xxm+vjmA1y8aNG80vv/xSliVaYjVPenq6+eOPP8qyREuK81zzV1azHDt2rMz6AZrbMpaXl2dWrFhR4BfbCy+8UOAdheVBUXmee+65cnW5H2P+OfuGPL53piz5vxT8uWnPyclx///0OuvVq2eWL1/uvp2SkuL3ja0x1vL46xU48nmbZdmyZWVaV3FZyWO355q/57Gyb8r65xnNbQnbt2+f+fjjj828efPMr7/+ajIzM40xnkcucnNz3b/Y+vfvb0aPHm0cDodfvsnCTnnslMUY8vhzHjtlMeb/53nppZfMr7/+6j41J//UEJfLZU6cOGHq1Knjbs7vu+8+43A4/PKTouyU51yynH6dWH9hp31jjL3ylKcsNLclaPPmzSYqKsq0atXKVKlSxdSqVctMmjTJfZL43/8KS0lJMQ6Hw0RERPjlx+rZKY+dshhDHn/OY6csxpw9T15ennG5XCYzM9PUqVPHbNy40UyfPt2cd9557g9y8Cd2ymOnLMaQx5/zlLcsNLclJC0tzbRp08bcfffd5tixY8YYY6ZNm2Y6depkrrnmGvc7iE+/iPbtt99uKlasaLZv3+6zuotipzx2ymIMefw5j52yGON9nnytW7c27dq1MxUqVPC7X87G2CuPnbIYQx5/zlMes9DclpB9+/aZOnXqmKVLl3qML1q0yHTu3NkkJCR4XNPtiy++MBdffLHfPYnz2SmPnbIYQx5/zmOnLMZ4lyf/ahvHjh0zlStX9svLY+WzUx47ZTGGPP6cpzxm8Z+PUynnAgMDFRoaql9//VWSdOrUKUnSkCFDNGjQIG3btk0pKSnu+W3atNHy5cvVtm1bn9R7NnbKY6csEnn8OY+dskje5Vm2bJkkqWrVqnr22We1detWNW/e3Gc1n4md8tgpi0Qef85TLrP4rK22oT59+piWLVu6L3Vx+rtqr7vuOnPZZZcZY/z7XdCns1MeO2Uxhjz+zE5ZjPE+jzH+f8kvY+yVx05ZjCGPPytvWThyW0yZmZk6fvy4MjIy3GPz589Xenq6Bg4cqJycHAUFBbnv69mzp4wxysnJcX9GuT+xUx47ZZHI48957JRFKn6e7OxsSVJAgH/9SrFTHjtlkcjjz3nskMX3FZRD33//vQYMGKAuXbqocePGeuONN5SXl6fzzz9fycnJ+uGHHxQfH6+dO3fqzz//lCR99913Cg8PlzHGx9UXZKc8dsoikUfy3zx2yiKdWx5/ZKc8dsoikUfy3zy2yVLWh4rLu+3bt5vIyEgzfvx4k5ycbCZMmGCcTqfHZ1hv3brVNG/e3NSrV8+0bdvW9OnTx4SHh5tNmzb5sPLC2SmPnbIYQx5/zmOnLMaQx5/z2CmLMeTx5zx2yuIwxg8PIfipY8eO6aabbtJFF12kp556yj1+xRVXqHnz5nrqqadkjHG/3Pjss8/qwIEDCg0N1Q033KBGjRr5qvRC2SmPnbJI5PHnPHbKIpHHn/PYKYtEHn/OY6cskhR09inI53K59Mcff+i6666TJOXl5SkgIEAXXnihjh49KklyOBzKzc1VYGCgRo8e7ctyz8pOeeyURSKPP+exUxaJPP6cx05ZJPL4cx47ZZE459aSqKgovf766+rUqZMkKTc3V5JUs2ZNjxOoAwMDdfz4cfdtfz04bqc8dsoikSefP+axUxaJPPn8MY+dskjkyeePeeyURaK5taxBgwaS/vqrxul0SvrrSfDbb7+558yYMUPz5s1zXwvOH98Vnc9OeeyURSKP5L957JRFIo/kv3nslEUij+S/eeyUhdMSiikgIMB9/onD4VBgYKAk6YEHHtDDDz+sjRs3elwqw9/ZKY+dskjk8Wd2yiKRx5/ZKYtEHn9mhywcuT0H+YfjAwMDVatWLT3++OOaNWuW1q1bpxYtWvi4OuvslMdOWSTy+DM7ZZHI48/slEUijz8r71n8u/X2c/nnoTidTs2bN0+VKlXS6tWr1bp1ax9XVjx2ymOnLBJ5/Jmdskjk8Wd2yiKRx5+V+ywlfW2xf6K1a9cah8Nhtm/f7utSSoSd8tgpizHk8Wd2ymIMefyZnbIYQx5/Vl6zcJ3bEpKZmamKFSv6uowSY6c8dsoikcef2SmLRB5/ZqcsEnn8WXnMQnMLAAAA2+ANZQAAALANmlsAAADYBs0tAAAAbIPmFgAAALZBcwsAAADboLkFAACAbdDcAgAAwDZobgHAjxlj1KNHD/Xs2bPAfc8995wqV66s/fv3+6AyAPBPNLcA4MccDocWLFigb7/9Vi+++KJ7fM+ePZoyZYqeeuop1a5du0S36XK5SnR9AFCWaG4BwM/VqlVLTz31lCZNmqQ9e/bIGKPhw4ere/fuuuSSS3TVVVfpvPPOU1RUlAYPHqzff//dveySJUt0+eWXq0qVKoqMjFTv3r21e/du9/179+6Vw+HQf/7zH3Xt2lUhISF6/fXXfRETAEoEH78LAOVEv3799Mcff+jaa6/VQw89pLVr16pt27a6/fbbNWTIEJ08eVJTpkzRqVOn9Pnnn0uS3n33XTkcDjVv3lyZmZl64IEHtHfvXm3atEkBAQHau3evYmNjVbduXT3xxBNq1aqVgoODFRMT4+O0AFA8NLcAUE4cPnxYzZo109GjR/XOO+9o48aN+vbbb7V06VL3nAMHDqhWrVrauXOnGjZsWGAdR44cUfXq1bV161Y1a9bM3dzOmTNHY8eOLcs4AFAqOC0BAMqJ6tWra8SIEWrcuLH69++v9evXa8WKFTrvvPPcXxdddJEkuU892L17txISEnThhReqUqVKio2NlaQCb0Jr27Zt2YYBgFIS5OsCAADeCwoKUlDQXz+68/Ly1KdPH82cObPAvBo1akiS+vTpo1q1amnevHmKiYlRXl6emjVrppycHI/5FStWLP3iAaAM0NwCQDnVunVrvfvuu6pbt6674T3d0aNHtWPHDr344ovq1KmTJGn16tVlXSYAlClOSwCAcmr06NE6duyYbrrpJn333Xf6+eeftWzZMt16663Kzc1V1apVFRkZqZdeekm7du3S559/rgkTJvi6bAAoVTS3AFBOxcTE6KuvvlJubq569uypZs2aaezYsapcubICAgIUEBCgN998U+vXr1ezZs00fvx4PfbYY74uGwBKFVdLAAAAgG1w5BYAAAC2QXMLAAAA26C5BQAAgG3Q3AIAAMA2aG4BAABgGzS3AAAAsA2aWwAAANgGzS0AAABsg+YWAAAAtkFzCwAAANuguQUAAIBt/D+8qNIQptZNcQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Convert 'last_updated' to datetime format\n", + "df = filtered_metadata_perms_df\n", + "df['last_updated'] = pd.to_datetime(df['last_updated'])\n", + "\n", + " # Extract the year from 'last_updated'\n", + "df['year'] = df['last_updated'].dt.year\n", + "\n", + "print(df.groupby('year')['uuid'].nunique())\n", + "# Count the number of unique users per year\n", + "unique_users_by_year = df.groupby('year')['username'].nunique()\n", + "\n", + "# Plot the number of unique users per year\n", + "plt.figure(figsize=(8, 5))\n", + "unique_users_by_year.plot(kind='bar', color='coral')\n", + "plt.title('Number of Unique Users For Each Year')\n", + "plt.xlabel('Year')\n", + "plt.ylabel('Number of Unique Users')\n", + "plt.xticks(rotation=45)\n", + "plt.grid(True)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "02cea1a5-d43d-4eec-a370-86854414a511", + "metadata": {}, + "source": [ + "## Plot Top n Users with Unique Projects" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "9f2affbe-e6df-452a-8b0a-bef83881f5a0", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# number of unique users\n", + "n = 20\n", + "# Group by 'username' and count the number of unique 'id' (projects) each user has\n", + "user_unique_projects = df.groupby('username')['uuid'].nunique()\n", + "# Sort users by the number of unique projects (descending order)\n", + "top_n_users = user_unique_projects.sort_values(ascending=False).head(n)\n", + "# Plot the top 15 users with their unique number of projects\n", + "plt.figure(figsize=(10, 6))\n", + "top_n_users.plot(kind='bar', color='royalblue')\n", + "plt.title(f'Top {n} Users with Unique Projects')\n", + "plt.xlabel('Username')\n", + "plt.ylabel('Number of Unique Projects')\n", + "plt.xticks(rotation=45, ha='right')\n", + "plt.grid(True)\n", + "plt.show()\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "ddf99fea-77bd-4f7a-a443-dd4582fca0d7", + "metadata": {}, + "source": [ + " ## Plot the Top n Users by Number of Projects in a Year" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "87da77f4-c687-4b0d-a285-9174c7017514", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# number of unique users\n", + "n = 20# Filter the dataframe for a specific year, e.g., 2014\n", + "year_of_interest = 2018\n", + "# Group by 'username' and 'year' and count the number of records for each user in the specified year\n", + "user_project_counts = df[df['year'] == year_of_interest].groupby('username').size()\n", + "\n", + "# Sort the users by the number of projects (in descending order)\n", + "top_15_users = user_project_counts.sort_values(ascending=False).head(15)\n", + "\n", + "# Plot the top 10 users based on the number of projects (or records)\n", + "plt.figure(figsize=(10, 6))\n", + "top_15_users.plot(kind='bar')\n", + "plt.title(f'Top 10 Users by Number of Projects in {year_of_interest}')\n", + "plt.xlabel('Username')\n", + "plt.ylabel('Number of Projects')\n", + "plt.xticks(rotation=45, ha='right')\n", + "plt.grid(True)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "e80f7930-0f5d-42f9-af6e-f84470817ce0", + "metadata": {}, + "source": [ + "## For each Project How many Jobs are there?" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "fa6b967f-34b6-44b9-9acb-de7f85a82025", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerprojectUuidjobUuidlastUpdated
00001400192074855-5056a550b8-0001-012vdj0001399309581559-5056a550b8-0001-0120001399315558601-5056a550b8-0001-0072014-05-15T17:14:34.855-05:00
10001400254373114-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400254372814-5056a550b8-0001-0072014-05-16T10:32:53.114-05:00
20001400273862423-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400273862119-5056a550b8-0001-0072014-05-16T15:57:42.423-05:00
30001400274448495-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400274448320-5056a550b8-0001-0072014-05-16T16:07:28.494-05:00
40001400274714655-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400274714490-5056a550b8-0001-0072014-05-16T16:11:54.655-05:00
..................
63505097479121213854191-242ac118-0001-012vdj6589143665654501871-242ac118-0001-012ad02cb34-250e-48cb-a06e-973e431b62ee-0072025-01-08T12:13:35.460-06:00
63511948444895656078865-242ac118-0001-012vdj5456400192359305711-242ac118-0001-012c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-0072025-01-13T16:44:05.995-06:00
63521819643224410746385-242ac118-0001-012vdj5199144433477554666-242ac116-0001-012773a5cb7-b369-4517-a221-83d57e3899e5-0072025-01-20T03:06:57.762-06:00
63532845695380777266705-242ac118-0001-012vdj5456400192359305711-242ac118-0001-0129188bf80-e868-4e05-a6b4-308c044108d7-0072025-01-23T15:05:59.570-06:00
63543203620026767118831-242ac118-0001-012vdj5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-0072025-01-24T20:57:54.599-06:00
\n", + "

6355 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "0 0001400192074855-5056a550b8-0001-012 vdj \n", + "1 0001400254373114-5056a550b8-0001-012 vdj \n", + "2 0001400273862423-5056a550b8-0001-012 vdj \n", + "3 0001400274448495-5056a550b8-0001-012 vdj \n", + "4 0001400274714655-5056a550b8-0001-012 vdj \n", + "... ... ... \n", + "6350 5097479121213854191-242ac118-0001-012 vdj \n", + "6351 1948444895656078865-242ac118-0001-012 vdj \n", + "6352 1819643224410746385-242ac118-0001-012 vdj \n", + "6353 2845695380777266705-242ac118-0001-012 vdj \n", + "6354 3203620026767118831-242ac118-0001-012 vdj \n", + "\n", + " projectUuid \\\n", + "0 0001399309581559-5056a550b8-0001-012 \n", + "1 0001400250478554-5056a550b8-0001-012 \n", + "2 0001400250478554-5056a550b8-0001-012 \n", + "3 0001400250478554-5056a550b8-0001-012 \n", + "4 0001400250478554-5056a550b8-0001-012 \n", + "... ... \n", + "6350 6589143665654501871-242ac118-0001-012 \n", + "6351 5456400192359305711-242ac118-0001-012 \n", + "6352 5199144433477554666-242ac116-0001-012 \n", + "6353 5456400192359305711-242ac118-0001-012 \n", + "6354 5456400192359305711-242ac118-0001-012 \n", + "\n", + " jobUuid lastUpdated \n", + "0 0001399315558601-5056a550b8-0001-007 2014-05-15T17:14:34.855-05:00 \n", + "1 0001400254372814-5056a550b8-0001-007 2014-05-16T10:32:53.114-05:00 \n", + "2 0001400273862119-5056a550b8-0001-007 2014-05-16T15:57:42.423-05:00 \n", + "3 0001400274448320-5056a550b8-0001-007 2014-05-16T16:07:28.494-05:00 \n", + "4 0001400274714490-5056a550b8-0001-007 2014-05-16T16:11:54.655-05:00 \n", + "... ... ... \n", + "6350 ad02cb34-250e-48cb-a06e-973e431b62ee-007 2025-01-08T12:13:35.460-06:00 \n", + "6351 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 2025-01-13T16:44:05.995-06:00 \n", + "6352 773a5cb7-b369-4517-a221-83d57e3899e5-007 2025-01-20T03:06:57.762-06:00 \n", + "6353 9188bf80-e868-4e05-a6b4-308c044108d7-007 2025-01-23T15:05:59.570-06:00 \n", + "6354 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 2025-01-24T20:57:54.599-06:00 \n", + "\n", + "[6355 rows x 5 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_projectJob" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "bf9407ef-de0b-4d73-9713-236d9a820376", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total number of ProjectJob: \t\t\t\t6355\n", + "Total number of Unique projectUuid: \t\t\t1086\n", + "Total number of Unique projectUuid after filtration: \t890\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerprojectUuidjobUuidlastUpdated
360001402415655966-5056a550b8-0001-012vdj0001402413135675-5056a550b8-0001-0120001402415655772-5056a550b8-0001-0072017-01-12T17:35:19.626-06:00
370001402584065947-5056a550b8-0001-012vdj0001402413135675-5056a550b8-0001-0120001402584065562-5056a550b8-0001-0072017-01-12T17:35:16.665-06:00
640001404239409839-5056a550b8-0001-012vdj0001402413135675-5056a550b8-0001-0120001404239409637-5056a550b8-0001-0072017-01-12T17:35:13.705-06:00
1570001410472799189-5056a550b8-0001-012vdj0001410472310261-5056a550b8-0001-0120001410472799004-5056a550b8-0001-0072017-01-12T17:36:48.130-06:00
2140001415039388759-5056a550b8-0001-012vdj0001415029221897-5056a550b8-0001-0120001415039388481-5056a550b8-0001-0072017-01-12T17:37:17.842-06:00
..................
63505097479121213854191-242ac118-0001-012vdj6589143665654501871-242ac118-0001-012ad02cb34-250e-48cb-a06e-973e431b62ee-0072025-01-08T12:13:35.460-06:00
63511948444895656078865-242ac118-0001-012vdj5456400192359305711-242ac118-0001-012c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-0072025-01-13T16:44:05.995-06:00
63521819643224410746385-242ac118-0001-012vdj5199144433477554666-242ac116-0001-012773a5cb7-b369-4517-a221-83d57e3899e5-0072025-01-20T03:06:57.762-06:00
63532845695380777266705-242ac118-0001-012vdj5456400192359305711-242ac118-0001-0129188bf80-e868-4e05-a6b4-308c044108d7-0072025-01-23T15:05:59.570-06:00
63543203620026767118831-242ac118-0001-012vdj5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-0072025-01-24T20:57:54.599-06:00
\n", + "

5505 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "36 0001402415655966-5056a550b8-0001-012 vdj \n", + "37 0001402584065947-5056a550b8-0001-012 vdj \n", + "64 0001404239409839-5056a550b8-0001-012 vdj \n", + "157 0001410472799189-5056a550b8-0001-012 vdj \n", + "214 0001415039388759-5056a550b8-0001-012 vdj \n", + "... ... ... \n", + "6350 5097479121213854191-242ac118-0001-012 vdj \n", + "6351 1948444895656078865-242ac118-0001-012 vdj \n", + "6352 1819643224410746385-242ac118-0001-012 vdj \n", + "6353 2845695380777266705-242ac118-0001-012 vdj \n", + "6354 3203620026767118831-242ac118-0001-012 vdj \n", + "\n", + " projectUuid \\\n", + "36 0001402413135675-5056a550b8-0001-012 \n", + "37 0001402413135675-5056a550b8-0001-012 \n", + "64 0001402413135675-5056a550b8-0001-012 \n", + "157 0001410472310261-5056a550b8-0001-012 \n", + "214 0001415029221897-5056a550b8-0001-012 \n", + "... ... \n", + "6350 6589143665654501871-242ac118-0001-012 \n", + "6351 5456400192359305711-242ac118-0001-012 \n", + "6352 5199144433477554666-242ac116-0001-012 \n", + "6353 5456400192359305711-242ac118-0001-012 \n", + "6354 5456400192359305711-242ac118-0001-012 \n", + "\n", + " jobUuid lastUpdated \n", + "36 0001402415655772-5056a550b8-0001-007 2017-01-12T17:35:19.626-06:00 \n", + "37 0001402584065562-5056a550b8-0001-007 2017-01-12T17:35:16.665-06:00 \n", + "64 0001404239409637-5056a550b8-0001-007 2017-01-12T17:35:13.705-06:00 \n", + "157 0001410472799004-5056a550b8-0001-007 2017-01-12T17:36:48.130-06:00 \n", + "214 0001415039388481-5056a550b8-0001-007 2017-01-12T17:37:17.842-06:00 \n", + "... ... ... \n", + "6350 ad02cb34-250e-48cb-a06e-973e431b62ee-007 2025-01-08T12:13:35.460-06:00 \n", + "6351 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 2025-01-13T16:44:05.995-06:00 \n", + "6352 773a5cb7-b369-4517-a221-83d57e3899e5-007 2025-01-20T03:06:57.762-06:00 \n", + "6353 9188bf80-e868-4e05-a6b4-308c044108d7-007 2025-01-23T15:05:59.570-06:00 \n", + "6354 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 2025-01-24T20:57:54.599-06:00 \n", + "\n", + "[5505 rows x 5 columns]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(f'Total number of ProjectJob: \\t\\t\\t\\t{df_projectJob.shape[0]}')\n", + "print(f'Total number of Unique projectUuid: \\t\\t\\t{df_projectJob.projectUuid.nunique()}')\n", + "\n", + "#Filter projectJob Based on Metadata permission file\n", + "filtered_df_projectJob = df_projectJob[df_projectJob.projectUuid.isin(filtered_metadata_perms_df['uuid'])]\n", + "print(f'Total number of Unique projectUuid after filtration: \\t{filtered_df_projectJob.projectUuid.nunique()}')\n", + "\n", + "filtered_df_projectJob" + ] + }, + { + "cell_type": "markdown", + "id": "53013c07-503c-41b5-9bf5-281e30c70d6f", + "metadata": {}, + "source": [ + "## Number of Unique ProjectUUID Each Year for filtered_df_projectJob" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "c88364c4-3855-4a5d-9fcb-41d0f8bb9c71", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = filtered_df_projectJob.copy()\n", + "df['lastUpdated'] = pd.to_datetime(df['lastUpdated'], utc=True)\n", + " # Extract the year from 'last_updated'\n", + "df['lastUpdated'] = df['lastUpdated'].dt.year\n", + "#Count the number of unique users per year\n", + "unique_users_by_year = df.groupby('lastUpdated')['projectUuid'].nunique()\n", + "# Plot the number of unique users per year\n", + "plt.figure(figsize=(8, 5))\n", + "unique_users_by_year.plot(kind='bar', color='coral')\n", + "plt.title('Number of Unique ProjectUUID For Each Year')\n", + "plt.xlabel('Year')\n", + "plt.ylabel('Number of Unique ProjectUUID')\n", + "plt.xticks(rotation=45)\n", + "plt.grid(True)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d10bbd4d-621e-48d2-889c-f4f0221ba7d1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "46ae18e3-355d-48d2-8d50-43bf51985573", + "metadata": {}, + "outputs": [], + "source": [ + "# # number of unique users\n", + "# n = 20\n", + "# # Group by 'username' and count the number of unique 'id' (projects) each user has\n", + "# user_unique_projects = filtered_df_projectJob.groupby('username')['uuid'].nunique()\n", + "# # Sort users by the number of unique projects (descending order)\n", + "# top_n_users = user_unique_projects.sort_values(ascending=False).head(n)\n", + "# # Plot the top 15 users with their unique number of projects\n", + "# plt.figure(figsize=(10, 6))\n", + "# top_n_users.plot(kind='bar', color='royalblue')\n", + "# plt.title(f'Top {n} Users with Unique Projects')\n", + "# plt.xlabel('Username')\n", + "# plt.ylabel('Number of Unique Projects')\n", + "# plt.xticks(rotation=45, ha='right')\n", + "# plt.grid(True)\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "6960ed90-610f-4bb6-bf9a-045cbc2e6e73", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "projectUuid\n", + "0001430750947192-5056a550b8-0001-012 121\n", + "3057760388135251475-242ac11a-0001-012 114\n", + "0001428091338341-5056a550b8-0001-012 103\n", + "5199144433477554666-242ac116-0001-012 100\n", + "46850669884665370-242ac114-0001-012 73\n", + " ... \n", + "8068155099655311846-242ac11c-0001-012 1\n", + "5211345989129277926-242ac11c-0001-012 1\n", + "7349712692870124006-242ac11c-0001-012 1\n", + "7671983811578696166-242ac11c-0001-012 1\n", + "4542076765918597606-242ac11c-0001-012 1\n", + "Name: count, Length: 1086, dtype: int64" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_projectJob.projectUuid.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "ac66f5f2-f7e1-45df-b6df-13f85ff430ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
10001396029083309-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395939852441-5056a550b8-0001-002Nonevdjauthuploadedgitprep-latest.zipapplication/zip2014-03-28T12:51:23.309-05:00
20001396029805022-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395956517022-5056a550b8-0001-002NonevdjauthuploadedInduction-28.zipapplication/zip2014-03-28T13:03:25.022-05:00
30001396030144907-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396030144691-5056a550b8-0001-002Nonevdjauthuploadedtest10.txttext/plain2014-03-28T13:09:04.907-05:00
40001396039988083-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396039987794-5056a550b8-0001-002Nonevdjauthuploadedtest11.txttext/plain2014-03-28T15:53:08.083-05:00
50001396043273330-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396043273029-5056a550b8-0001-002Nonevdjauthuploadedtest14.txttext/plain2014-03-28T16:47:53.330-05:00
..............................
359435338423137409494545-242ac118-0001-0125456400192359305711-242ac118-0001-0126793987554023894545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R1_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359441335427718191574545-242ac118-0001-0125456400192359305711-242ac118-0001-0122833383462017494545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R2_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359451840700597200490991-242ac118-0001-0125456400192359305711-242ac118-0001-012366925519251050991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R1_001.fastq.gzNone2025-01-13T16:40:43.277-06:00
359465023614960920170991-242ac118-0001-0125456400192359305711-242ac118-0001-0123549539235260010991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R2_001.fastq.gzNone2025-01-13T16:40:43.281-06:00
359477830832104257678865-242ac118-0001-0125456400192359305711-242ac118-0001-0128017190735231118865-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneprimers.fastaNone2025-01-13T16:41:49.035-06:00
\n", + "

29194 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "1 0001396029083309-5056a550b8-0001-012 \n", + "2 0001396029805022-5056a550b8-0001-012 \n", + "3 0001396030144907-5056a550b8-0001-012 \n", + "4 0001396039988083-5056a550b8-0001-012 \n", + "5 0001396043273330-5056a550b8-0001-012 \n", + "... ... \n", + "35943 5338423137409494545-242ac118-0001-012 \n", + "35944 1335427718191574545-242ac118-0001-012 \n", + "35945 1840700597200490991-242ac118-0001-012 \n", + "35946 5023614960920170991-242ac118-0001-012 \n", + "35947 7830832104257678865-242ac118-0001-012 \n", + "\n", + " projectUuid \\\n", + "1 0001395346788177-5056a550b8-0001-012 \n", + "2 0001395346788177-5056a550b8-0001-012 \n", + "3 0001395346788177-5056a550b8-0001-012 \n", + "4 0001395346788177-5056a550b8-0001-012 \n", + "5 0001395346788177-5056a550b8-0001-012 \n", + "... ... \n", + "35943 5456400192359305711-242ac118-0001-012 \n", + "35944 5456400192359305711-242ac118-0001-012 \n", + "35945 5456400192359305711-242ac118-0001-012 \n", + "35946 5456400192359305711-242ac118-0001-012 \n", + "35947 5456400192359305711-242ac118-0001-012 \n", + "\n", + " associationIds_1 \\\n", + "1 0001395939852441-5056a550b8-0001-002 \n", + "2 0001395956517022-5056a550b8-0001-002 \n", + "3 0001396030144691-5056a550b8-0001-002 \n", + "4 0001396039987794-5056a550b8-0001-002 \n", + "5 0001396043273029-5056a550b8-0001-002 \n", + "... ... \n", + "35943 6793987554023894545-242ac112-0001-002 \n", + "35944 2833383462017494545-242ac112-0001-002 \n", + "35945 366925519251050991-242ac112-0001-002 \n", + "35946 3549539235260010991-242ac112-0001-002 \n", + "35947 8017190735231118865-242ac112-0001-002 \n", + "\n", + " associationIds_2 owner task_type \\\n", + "1 None vdjauth uploaded \n", + "2 None vdjauth uploaded \n", + "3 None vdjauth uploaded \n", + "4 None vdjauth uploaded \n", + "5 None vdjauth uploaded \n", + "... ... ... ... \n", + "35943 5456400192359305711-242ac118-0001-012 vdj None \n", + "35944 5456400192359305711-242ac118-0001-012 vdj None \n", + "35945 5456400192359305711-242ac118-0001-012 vdj None \n", + "35946 5456400192359305711-242ac118-0001-012 vdj None \n", + "35947 5456400192359305711-242ac118-0001-012 vdj None \n", + "\n", + " file_name mimeType \\\n", + "1 gitprep-latest.zip application/zip \n", + "2 Induction-28.zip application/zip \n", + "3 test10.txt text/plain \n", + "4 test11.txt text/plain \n", + "5 test14.txt text/plain \n", + "... ... ... \n", + "35943 4468_S24_L001_R1_001.fastq.gz None \n", + "35944 4468_S24_L001_R2_001.fastq.gz None \n", + "35945 6634_S25_L001_R1_001.fastq.gz None \n", + "35946 6634_S25_L001_R2_001.fastq.gz None \n", + "35947 primers.fasta None \n", + "\n", + " last_updated \n", + "1 2014-03-28T12:51:23.309-05:00 \n", + "2 2014-03-28T13:03:25.022-05:00 \n", + "3 2014-03-28T13:09:04.907-05:00 \n", + "4 2014-03-28T15:53:08.083-05:00 \n", + "5 2014-03-28T16:47:53.330-05:00 \n", + "... ... \n", + "35943 2025-01-13T16:40:40.230-06:00 \n", + "35944 2025-01-13T16:40:40.230-06:00 \n", + "35945 2025-01-13T16:40:43.277-06:00 \n", + "35946 2025-01-13T16:40:43.281-06:00 \n", + "35947 2025-01-13T16:41:49.035-06:00 \n", + "\n", + "[29194 rows x 9 columns]" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_projectFiles = df_projectFiles[df_projectFiles.projectUuid.isin(filtered_metadata_perms_df['uuid'])]\n", + "filtered_projectFiles" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "c78f0e99-576a-4d64-aa3b-b95e263a45c4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
00001395955349445-5056a550b8-0001-012NoneNonevdjauthNoneNone2014-03-27T16:22:29.444-05:00
10001396029083309-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395939852441-5056a550b8-0001-002Nonevdjauthuploadedgitprep-latest.zipapplication/zip2014-03-28T12:51:23.309-05:00
20001396029805022-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395956517022-5056a550b8-0001-002NonevdjauthuploadedInduction-28.zipapplication/zip2014-03-28T13:03:25.022-05:00
30001396030144907-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396030144691-5056a550b8-0001-002Nonevdjauthuploadedtest10.txttext/plain2014-03-28T13:09:04.907-05:00
40001396039988083-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396039987794-5056a550b8-0001-002Nonevdjauthuploadedtest11.txttext/plain2014-03-28T15:53:08.083-05:00
..............................
359435338423137409494545-242ac118-0001-0125456400192359305711-242ac118-0001-0126793987554023894545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R1_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359441335427718191574545-242ac118-0001-0125456400192359305711-242ac118-0001-0122833383462017494545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R2_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359451840700597200490991-242ac118-0001-0125456400192359305711-242ac118-0001-012366925519251050991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R1_001.fastq.gzNone2025-01-13T16:40:43.277-06:00
359465023614960920170991-242ac118-0001-0125456400192359305711-242ac118-0001-0123549539235260010991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R2_001.fastq.gzNone2025-01-13T16:40:43.281-06:00
359477830832104257678865-242ac118-0001-0125456400192359305711-242ac118-0001-0128017190735231118865-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneprimers.fastaNone2025-01-13T16:41:49.035-06:00
\n", + "

35948 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "0 0001395955349445-5056a550b8-0001-012 \n", + "1 0001396029083309-5056a550b8-0001-012 \n", + "2 0001396029805022-5056a550b8-0001-012 \n", + "3 0001396030144907-5056a550b8-0001-012 \n", + "4 0001396039988083-5056a550b8-0001-012 \n", + "... ... \n", + "35943 5338423137409494545-242ac118-0001-012 \n", + "35944 1335427718191574545-242ac118-0001-012 \n", + "35945 1840700597200490991-242ac118-0001-012 \n", + "35946 5023614960920170991-242ac118-0001-012 \n", + "35947 7830832104257678865-242ac118-0001-012 \n", + "\n", + " projectUuid \\\n", + "0 \n", + "1 0001395346788177-5056a550b8-0001-012 \n", + "2 0001395346788177-5056a550b8-0001-012 \n", + "3 0001395346788177-5056a550b8-0001-012 \n", + "4 0001395346788177-5056a550b8-0001-012 \n", + "... ... \n", + "35943 5456400192359305711-242ac118-0001-012 \n", + "35944 5456400192359305711-242ac118-0001-012 \n", + "35945 5456400192359305711-242ac118-0001-012 \n", + "35946 5456400192359305711-242ac118-0001-012 \n", + "35947 5456400192359305711-242ac118-0001-012 \n", + "\n", + " associationIds_1 \\\n", + "0 None \n", + "1 0001395939852441-5056a550b8-0001-002 \n", + "2 0001395956517022-5056a550b8-0001-002 \n", + "3 0001396030144691-5056a550b8-0001-002 \n", + "4 0001396039987794-5056a550b8-0001-002 \n", + "... ... \n", + "35943 6793987554023894545-242ac112-0001-002 \n", + "35944 2833383462017494545-242ac112-0001-002 \n", + "35945 366925519251050991-242ac112-0001-002 \n", + "35946 3549539235260010991-242ac112-0001-002 \n", + "35947 8017190735231118865-242ac112-0001-002 \n", + "\n", + " associationIds_2 owner task_type \\\n", + "0 None vdjauth \n", + "1 None vdjauth uploaded \n", + "2 None vdjauth uploaded \n", + "3 None vdjauth uploaded \n", + "4 None vdjauth uploaded \n", + "... ... ... ... \n", + "35943 5456400192359305711-242ac118-0001-012 vdj None \n", + "35944 5456400192359305711-242ac118-0001-012 vdj None \n", + "35945 5456400192359305711-242ac118-0001-012 vdj None \n", + "35946 5456400192359305711-242ac118-0001-012 vdj None \n", + "35947 5456400192359305711-242ac118-0001-012 vdj None \n", + "\n", + " file_name mimeType \\\n", + "0 None None \n", + "1 gitprep-latest.zip application/zip \n", + "2 Induction-28.zip application/zip \n", + "3 test10.txt text/plain \n", + "4 test11.txt text/plain \n", + "... ... ... \n", + "35943 4468_S24_L001_R1_001.fastq.gz None \n", + "35944 4468_S24_L001_R2_001.fastq.gz None \n", + "35945 6634_S25_L001_R1_001.fastq.gz None \n", + "35946 6634_S25_L001_R2_001.fastq.gz None \n", + "35947 primers.fasta None \n", + "\n", + " last_updated \n", + "0 2014-03-27T16:22:29.444-05:00 \n", + "1 2014-03-28T12:51:23.309-05:00 \n", + "2 2014-03-28T13:03:25.022-05:00 \n", + "3 2014-03-28T13:09:04.907-05:00 \n", + "4 2014-03-28T15:53:08.083-05:00 \n", + "... ... \n", + "35943 2025-01-13T16:40:40.230-06:00 \n", + "35944 2025-01-13T16:40:40.230-06:00 \n", + "35945 2025-01-13T16:40:43.277-06:00 \n", + "35946 2025-01-13T16:40:43.281-06:00 \n", + "35947 2025-01-13T16:41:49.035-06:00 \n", + "\n", + "[35948 rows x 9 columns]" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_projectFiles.task_type.nunique()\n", + "df_projectFiles.associationIds_1.nunique()\n", + "df_projectFiles.projectUuid.value_counts()\n", + "df_projectFiles" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "3cd9e0c3-37c9-41d9-bb98-3373235ad548", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
10001396029083309-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395939852441-5056a550b8-0001-002Nonevdjauthuploadedgitprep-latest.zipapplication/zip2014-03-28T12:51:23.309-05:00
20001396029805022-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395956517022-5056a550b8-0001-002NonevdjauthuploadedInduction-28.zipapplication/zip2014-03-28T13:03:25.022-05:00
30001396030144907-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396030144691-5056a550b8-0001-002Nonevdjauthuploadedtest10.txttext/plain2014-03-28T13:09:04.907-05:00
40001396039988083-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396039987794-5056a550b8-0001-002Nonevdjauthuploadedtest11.txttext/plain2014-03-28T15:53:08.083-05:00
50001396043273330-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396043273029-5056a550b8-0001-002Nonevdjauthuploadedtest14.txttext/plain2014-03-28T16:47:53.330-05:00
..............................
359435338423137409494545-242ac118-0001-0125456400192359305711-242ac118-0001-0126793987554023894545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R1_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359441335427718191574545-242ac118-0001-0125456400192359305711-242ac118-0001-0122833383462017494545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R2_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359451840700597200490991-242ac118-0001-0125456400192359305711-242ac118-0001-012366925519251050991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R1_001.fastq.gzNone2025-01-13T16:40:43.277-06:00
359465023614960920170991-242ac118-0001-0125456400192359305711-242ac118-0001-0123549539235260010991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R2_001.fastq.gzNone2025-01-13T16:40:43.281-06:00
359477830832104257678865-242ac118-0001-0125456400192359305711-242ac118-0001-0128017190735231118865-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneprimers.fastaNone2025-01-13T16:41:49.035-06:00
\n", + "

29194 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "1 0001396029083309-5056a550b8-0001-012 \n", + "2 0001396029805022-5056a550b8-0001-012 \n", + "3 0001396030144907-5056a550b8-0001-012 \n", + "4 0001396039988083-5056a550b8-0001-012 \n", + "5 0001396043273330-5056a550b8-0001-012 \n", + "... ... \n", + "35943 5338423137409494545-242ac118-0001-012 \n", + "35944 1335427718191574545-242ac118-0001-012 \n", + "35945 1840700597200490991-242ac118-0001-012 \n", + "35946 5023614960920170991-242ac118-0001-012 \n", + "35947 7830832104257678865-242ac118-0001-012 \n", + "\n", + " projectUuid \\\n", + "1 0001395346788177-5056a550b8-0001-012 \n", + "2 0001395346788177-5056a550b8-0001-012 \n", + "3 0001395346788177-5056a550b8-0001-012 \n", + "4 0001395346788177-5056a550b8-0001-012 \n", + "5 0001395346788177-5056a550b8-0001-012 \n", + "... ... \n", + "35943 5456400192359305711-242ac118-0001-012 \n", + "35944 5456400192359305711-242ac118-0001-012 \n", + "35945 5456400192359305711-242ac118-0001-012 \n", + "35946 5456400192359305711-242ac118-0001-012 \n", + "35947 5456400192359305711-242ac118-0001-012 \n", + "\n", + " associationIds_1 \\\n", + "1 0001395939852441-5056a550b8-0001-002 \n", + "2 0001395956517022-5056a550b8-0001-002 \n", + "3 0001396030144691-5056a550b8-0001-002 \n", + "4 0001396039987794-5056a550b8-0001-002 \n", + "5 0001396043273029-5056a550b8-0001-002 \n", + "... ... \n", + "35943 6793987554023894545-242ac112-0001-002 \n", + "35944 2833383462017494545-242ac112-0001-002 \n", + "35945 366925519251050991-242ac112-0001-002 \n", + "35946 3549539235260010991-242ac112-0001-002 \n", + "35947 8017190735231118865-242ac112-0001-002 \n", + "\n", + " associationIds_2 owner task_type \\\n", + "1 None vdjauth uploaded \n", + "2 None vdjauth uploaded \n", + "3 None vdjauth uploaded \n", + "4 None vdjauth uploaded \n", + "5 None vdjauth uploaded \n", + "... ... ... ... \n", + "35943 5456400192359305711-242ac118-0001-012 vdj None \n", + "35944 5456400192359305711-242ac118-0001-012 vdj None \n", + "35945 5456400192359305711-242ac118-0001-012 vdj None \n", + "35946 5456400192359305711-242ac118-0001-012 vdj None \n", + "35947 5456400192359305711-242ac118-0001-012 vdj None \n", + "\n", + " file_name mimeType \\\n", + "1 gitprep-latest.zip application/zip \n", + "2 Induction-28.zip application/zip \n", + "3 test10.txt text/plain \n", + "4 test11.txt text/plain \n", + "5 test14.txt text/plain \n", + "... ... ... \n", + "35943 4468_S24_L001_R1_001.fastq.gz None \n", + "35944 4468_S24_L001_R2_001.fastq.gz None \n", + "35945 6634_S25_L001_R1_001.fastq.gz None \n", + "35946 6634_S25_L001_R2_001.fastq.gz None \n", + "35947 primers.fasta None \n", + "\n", + " last_updated \n", + "1 2014-03-28T12:51:23.309-05:00 \n", + "2 2014-03-28T13:03:25.022-05:00 \n", + "3 2014-03-28T13:09:04.907-05:00 \n", + "4 2014-03-28T15:53:08.083-05:00 \n", + "5 2014-03-28T16:47:53.330-05:00 \n", + "... ... \n", + "35943 2025-01-13T16:40:40.230-06:00 \n", + "35944 2025-01-13T16:40:40.230-06:00 \n", + "35945 2025-01-13T16:40:43.277-06:00 \n", + "35946 2025-01-13T16:40:43.281-06:00 \n", + "35947 2025-01-13T16:41:49.035-06:00 \n", + "\n", + "[29194 rows x 9 columns]" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_df_projectFiles = df_projectFiles[df_projectFiles.projectUuid.isin(filtered_metadata_perms_df['uuid'])]\n", + "filtered_df_projectFiles" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "96384f71-90ec-41d0-8b3b-4cda8e4a93fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1916" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_projectFiles.projectUuid.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "552ed66a-fa62-4cdd-9b88-2b2ee726cc05", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1223" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_df_projectFiles.projectUuid.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "ed51ee48-e1e0-4629-8e20-b601b98f0b46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
system_idownerapp_idstatuslast_updateduuidarchive_pathremote_outcomeupdate_tokenparameters.Creator
0ls6.tacc.utexas.eduvdjrepcalc-ls6-2.0u8FINISHED2025-01-25 15:43:51.678c7cd08ad-a560-4574-a363-b9cc4c5e051d-007/projects/5456400192359305711-242ac118-0001-01...FINISHEDeb27e311-4a37-4aeb-b649-056704dd2711schristley
1ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FINISHED2025-01-24 04:20:37.8919188bf80-e868-4e05-a6b4-308c044108d7-007/projects/5456400192359305711-242ac118-0001-01...FINISHED5e2528fd-25d6-4473-9287-6a67a8de8391schristley
2ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FAILED2025-01-22 15:04:46.891773a5cb7-b369-4517-a221-83d57e3899e5-007/projects/5199144433477554666-242ac116-0001-01...FAILED_SKIP_ARCHIVE78b89c14-3dec-4aa8-acf8-d2592064e3a4scott_public
3ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-14 22:31:02.980c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007/projects/5456400192359305711-242ac118-0001-01...FINISHED1e2f122d-5e5b-4f14-931f-ca55803115ffschristley
4ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-09 04:21:12.476ad02cb34-250e-48cb-a06e-973e431b62ee-007/projects/6589143665654501871-242ac118-0001-01...FINISHED1069949d-1d9a-453f-80b8-7372019aba31schristley
.................................
15515lonestar.tacc.utexas.edumlevinvdj_pipe-0.0.16u2FINISHED2014-11-04 18:33:43.0000001415039388481-5056a550b8-0001-007/projects/0001415029221897-5056a550b8-0001-012...FINISHEDf1941c26-0827-4812-a7e3-c03e6ba53803NaN
15574lonestar.tacc.utexas.edumlevinvdj_pipe-0.0.16u2FINISHED2014-09-11 17:09:18.0000001410472799004-5056a550b8-0001-007/projects/0001410472310261-5056a550b8-0001-012...FINISHED522d82f1-159e-4b9e-8766-3ad0a23b6985NaN
15672lonestar.tacc.utexas.eduesalinavdj_pipe-0.0.12u1FINISHED2014-07-01 13:34:09.0000001404239409637-5056a550b8-0001-007/projects/0001402413135675-5056a550b8-0001-012...FINISHED5300feea-5337-4955-aa40-8a4c9b955e5dNaN
15700lonestar.tacc.utexas.eduesalinavdj_pipe-0.0.12u1FINISHED2014-06-12 09:43:38.0000001402584065562-5056a550b8-0001-007/projects/0001402413135675-5056a550b8-0001-012...FINISHEDeded8505-6fc6-477a-b5c5-ffb3e50eb731NaN
15701lonestar.tacc.utexas.eduesalinavdj_pipe-0.0.12u1FINISHED2014-06-10 10:55:42.0000001402415655772-5056a550b8-0001-007/projects/0001402413135675-5056a550b8-0001-012...FINISHEDa1227a4c-2c94-4ff9-9542-69ea1db4e164NaN
\n", + "

5477 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " system_id owner app_id status \\\n", + "0 ls6.tacc.utexas.edu vdj repcalc-ls6-2.0u8 FINISHED \n", + "1 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FINISHED \n", + "2 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FAILED \n", + "3 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "4 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "... ... ... ... ... \n", + "15515 lonestar.tacc.utexas.edu mlevin vdj_pipe-0.0.16u2 FINISHED \n", + "15574 lonestar.tacc.utexas.edu mlevin vdj_pipe-0.0.16u2 FINISHED \n", + "15672 lonestar.tacc.utexas.edu esalina vdj_pipe-0.0.12u1 FINISHED \n", + "15700 lonestar.tacc.utexas.edu esalina vdj_pipe-0.0.12u1 FINISHED \n", + "15701 lonestar.tacc.utexas.edu esalina vdj_pipe-0.0.12u1 FINISHED \n", + "\n", + " last_updated uuid \\\n", + "0 2025-01-25 15:43:51.678 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 \n", + "1 2025-01-24 04:20:37.891 9188bf80-e868-4e05-a6b4-308c044108d7-007 \n", + "2 2025-01-22 15:04:46.891 773a5cb7-b369-4517-a221-83d57e3899e5-007 \n", + "3 2025-01-14 22:31:02.980 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 \n", + "4 2025-01-09 04:21:12.476 ad02cb34-250e-48cb-a06e-973e431b62ee-007 \n", + "... ... ... \n", + "15515 2014-11-04 18:33:43.000 0001415039388481-5056a550b8-0001-007 \n", + "15574 2014-09-11 17:09:18.000 0001410472799004-5056a550b8-0001-007 \n", + "15672 2014-07-01 13:34:09.000 0001404239409637-5056a550b8-0001-007 \n", + "15700 2014-06-12 09:43:38.000 0001402584065562-5056a550b8-0001-007 \n", + "15701 2014-06-10 10:55:42.000 0001402415655772-5056a550b8-0001-007 \n", + "\n", + " archive_path remote_outcome \\\n", + "0 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "1 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "2 /projects/5199144433477554666-242ac116-0001-01... FAILED_SKIP_ARCHIVE \n", + "3 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "4 /projects/6589143665654501871-242ac118-0001-01... FINISHED \n", + "... ... ... \n", + "15515 /projects/0001415029221897-5056a550b8-0001-012... FINISHED \n", + "15574 /projects/0001410472310261-5056a550b8-0001-012... FINISHED \n", + "15672 /projects/0001402413135675-5056a550b8-0001-012... FINISHED \n", + "15700 /projects/0001402413135675-5056a550b8-0001-012... FINISHED \n", + "15701 /projects/0001402413135675-5056a550b8-0001-012... FINISHED \n", + "\n", + " update_token parameters.Creator \n", + "0 eb27e311-4a37-4aeb-b649-056704dd2711 schristley \n", + "1 5e2528fd-25d6-4473-9287-6a67a8de8391 schristley \n", + "2 78b89c14-3dec-4aa8-acf8-d2592064e3a4 scott_public \n", + "3 1e2f122d-5e5b-4f14-931f-ca55803115ff schristley \n", + "4 1069949d-1d9a-453f-80b8-7372019aba31 schristley \n", + "... ... ... \n", + "15515 f1941c26-0827-4812-a7e3-c03e6ba53803 NaN \n", + "15574 522d82f1-159e-4b9e-8766-3ad0a23b6985 NaN \n", + "15672 5300feea-5337-4955-aa40-8a4c9b955e5d NaN \n", + "15700 eded8505-6fc6-477a-b5c5-ffb3e50eb731 NaN \n", + "15701 a1227a4c-2c94-4ff9-9542-69ea1db4e164 NaN \n", + "\n", + "[5477 rows x 10 columns]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_jobs_all_df = jobs_all_df[jobs_all_df.uuid.isin(filtered_df_projectJob['jobUuid'])]\n", + "filtered_jobs_all_df" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "59652cd5-4817-4cd8-9bc4-fb4f013f0c27", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
system_idownerapp_idstatuslast_updateduuidarchive_pathremote_outcomeupdate_tokenparameters.Creator
0ls6.tacc.utexas.eduvdjrepcalc-ls6-2.0u8FINISHED2025-01-25 15:43:51.678c7cd08ad-a560-4574-a363-b9cc4c5e051d-007/projects/5456400192359305711-242ac118-0001-01...FINISHEDeb27e311-4a37-4aeb-b649-056704dd2711schristley
1ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FINISHED2025-01-24 04:20:37.8919188bf80-e868-4e05-a6b4-308c044108d7-007/projects/5456400192359305711-242ac118-0001-01...FINISHED5e2528fd-25d6-4473-9287-6a67a8de8391schristley
2ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FAILED2025-01-22 15:04:46.891773a5cb7-b369-4517-a221-83d57e3899e5-007/projects/5199144433477554666-242ac116-0001-01...FAILED_SKIP_ARCHIVE78b89c14-3dec-4aa8-acf8-d2592064e3a4scott_public
3ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-14 22:31:02.980c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007/projects/5456400192359305711-242ac118-0001-01...FINISHED1e2f122d-5e5b-4f14-931f-ca55803115ffschristley
4ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-09 04:21:12.476ad02cb34-250e-48cb-a06e-973e431b62ee-007/projects/6589143665654501871-242ac118-0001-01...FINISHED1069949d-1d9a-453f-80b8-7372019aba31schristley
.................................
15776my-lonestarjfonnermy-vdj_pipe-0.0.4FINISHED2014-03-31 16:38:39.0000001396301879424-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/foo123-201...FINISHED3b188d18-7955-49b6-bc21-10a557ced542NaN
15777my-lonestarjfonnermy-vdj_pipe-0.0.4FINISHED2014-03-31 15:44:00.0000001396298592090-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FINISHEDc9dd99e9-2ef2-4fd7-b211-26b56162b21eNaN
15778my-lonestarjfonnermy-vdj_pipe-0.0.4FINISHED2014-03-31 15:35:18.0000001396298085562-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FINISHED282196b2-9972-4615-944d-777e1ee7826cNaN
15779my-lonestarjfonnermy-vdj_pipe-0.0.4FAILED2014-03-31 15:28:36.0000001396297676287-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FAILEDdc81e8a3-9869-47cc-8bee-3d254bb805d1NaN
15780my-lonestarjfonnermy-vdj_pipe-0.0.4FAILED2014-03-31 14:50:18.0000001396295290656-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FAILED08f920a6-e4c1-4029-9ac2-e1de96e7d23aNaN
\n", + "

15781 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " system_id owner app_id status \\\n", + "0 ls6.tacc.utexas.edu vdj repcalc-ls6-2.0u8 FINISHED \n", + "1 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FINISHED \n", + "2 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FAILED \n", + "3 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "4 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "... ... ... ... ... \n", + "15776 my-lonestar jfonner my-vdj_pipe-0.0.4 FINISHED \n", + "15777 my-lonestar jfonner my-vdj_pipe-0.0.4 FINISHED \n", + "15778 my-lonestar jfonner my-vdj_pipe-0.0.4 FINISHED \n", + "15779 my-lonestar jfonner my-vdj_pipe-0.0.4 FAILED \n", + "15780 my-lonestar jfonner my-vdj_pipe-0.0.4 FAILED \n", + "\n", + " last_updated uuid \\\n", + "0 2025-01-25 15:43:51.678 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 \n", + "1 2025-01-24 04:20:37.891 9188bf80-e868-4e05-a6b4-308c044108d7-007 \n", + "2 2025-01-22 15:04:46.891 773a5cb7-b369-4517-a221-83d57e3899e5-007 \n", + "3 2025-01-14 22:31:02.980 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 \n", + "4 2025-01-09 04:21:12.476 ad02cb34-250e-48cb-a06e-973e431b62ee-007 \n", + "... ... ... \n", + "15776 2014-03-31 16:38:39.000 0001396301879424-5056a550b8-0001-007 \n", + "15777 2014-03-31 15:44:00.000 0001396298592090-5056a550b8-0001-007 \n", + "15778 2014-03-31 15:35:18.000 0001396298085562-5056a550b8-0001-007 \n", + "15779 2014-03-31 15:28:36.000 0001396297676287-5056a550b8-0001-007 \n", + "15780 2014-03-31 14:50:18.000 0001396295290656-5056a550b8-0001-007 \n", + "\n", + " archive_path remote_outcome \\\n", + "0 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "1 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "2 /projects/5199144433477554666-242ac116-0001-01... FAILED_SKIP_ARCHIVE \n", + "3 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "4 /projects/6589143665654501871-242ac118-0001-01... FINISHED \n", + "... ... ... \n", + "15776 /scratch/01114/jfonner/vdj/analyses/foo123-201... FINISHED \n", + "15777 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FINISHED \n", + "15778 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FINISHED \n", + "15779 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FAILED \n", + "15780 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FAILED \n", + "\n", + " update_token parameters.Creator \n", + "0 eb27e311-4a37-4aeb-b649-056704dd2711 schristley \n", + "1 5e2528fd-25d6-4473-9287-6a67a8de8391 schristley \n", + "2 78b89c14-3dec-4aa8-acf8-d2592064e3a4 scott_public \n", + "3 1e2f122d-5e5b-4f14-931f-ca55803115ff schristley \n", + "4 1069949d-1d9a-453f-80b8-7372019aba31 schristley \n", + "... ... ... \n", + "15776 3b188d18-7955-49b6-bc21-10a557ced542 NaN \n", + "15777 c9dd99e9-2ef2-4fd7-b211-26b56162b21e NaN \n", + "15778 282196b2-9972-4615-944d-777e1ee7826c NaN \n", + "15779 dc81e8a3-9869-47cc-8bee-3d254bb805d1 NaN \n", + "15780 08f920a6-e4c1-4029-9ac2-e1de96e7d23a NaN \n", + "\n", + "[15781 rows x 10 columns]" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_jobs_all_df['parameters.Creator'].value_counts()\n", + "filtered_jobs_all_df['uuid'].value_counts()\n", + "jobs_all_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "33845402-0431-4f5a-a7c3-ef8e62e943d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(5505, 5)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerprojectUuidjobUuidlastUpdated
360001402415655966-5056a550b8-0001-012vdj0001402413135675-5056a550b8-0001-0120001402415655772-5056a550b8-0001-0072017-01-12T17:35:19.626-06:00
370001402584065947-5056a550b8-0001-012vdj0001402413135675-5056a550b8-0001-0120001402584065562-5056a550b8-0001-0072017-01-12T17:35:16.665-06:00
640001404239409839-5056a550b8-0001-012vdj0001402413135675-5056a550b8-0001-0120001404239409637-5056a550b8-0001-0072017-01-12T17:35:13.705-06:00
1570001410472799189-5056a550b8-0001-012vdj0001410472310261-5056a550b8-0001-0120001410472799004-5056a550b8-0001-0072017-01-12T17:36:48.130-06:00
2140001415039388759-5056a550b8-0001-012vdj0001415029221897-5056a550b8-0001-0120001415039388481-5056a550b8-0001-0072017-01-12T17:37:17.842-06:00
\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "36 0001402415655966-5056a550b8-0001-012 vdj \n", + "37 0001402584065947-5056a550b8-0001-012 vdj \n", + "64 0001404239409839-5056a550b8-0001-012 vdj \n", + "157 0001410472799189-5056a550b8-0001-012 vdj \n", + "214 0001415039388759-5056a550b8-0001-012 vdj \n", + "\n", + " projectUuid \\\n", + "36 0001402413135675-5056a550b8-0001-012 \n", + "37 0001402413135675-5056a550b8-0001-012 \n", + "64 0001402413135675-5056a550b8-0001-012 \n", + "157 0001410472310261-5056a550b8-0001-012 \n", + "214 0001415029221897-5056a550b8-0001-012 \n", + "\n", + " jobUuid lastUpdated \n", + "36 0001402415655772-5056a550b8-0001-007 2017-01-12T17:35:19.626-06:00 \n", + "37 0001402584065562-5056a550b8-0001-007 2017-01-12T17:35:16.665-06:00 \n", + "64 0001404239409637-5056a550b8-0001-007 2017-01-12T17:35:13.705-06:00 \n", + "157 0001410472799004-5056a550b8-0001-007 2017-01-12T17:36:48.130-06:00 \n", + "214 0001415039388481-5056a550b8-0001-007 2017-01-12T17:37:17.842-06:00 " + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(filtered_df_projectJob.shape)\n", + "filtered_df_projectJob.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "a5a1c832-2af1-4387-a3d8-b522e35268dd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(642544, 7)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlast_updatedpermissionusernameuuidtenant_idyear
3142014-01-29 10:28:16READ_WRITEjfonner0001389977207738-5056a550b8-0001-012vdjserver.org2014
4172014-01-29 14:06:38READ_WRITEadshkl;dasfhkdf0001391025968832-5056a550b8-0001-012vdjserver.org2014
5182014-02-20 10:07:51READ_WRITEVDJAuth0001392912471365-5056a550b8-0001-012vdjserver.org2014
6192014-02-20 10:14:20READ_WRITEVDJAuth0001392912860303-5056a550b8-0001-012vdjserver.org2014
7212014-02-20 11:10:54READ_WRITEwscarbor0001392914178983-5056a550b8-0001-012vdjserver.org2014
\n", + "
" + ], + "text/plain": [ + " id last_updated permission username \\\n", + "3 14 2014-01-29 10:28:16 READ_WRITE jfonner \n", + "4 17 2014-01-29 14:06:38 READ_WRITE adshkl;dasfhkdf \n", + "5 18 2014-02-20 10:07:51 READ_WRITE VDJAuth \n", + "6 19 2014-02-20 10:14:20 READ_WRITE VDJAuth \n", + "7 21 2014-02-20 11:10:54 READ_WRITE wscarbor \n", + "\n", + " uuid tenant_id year \n", + "3 0001389977207738-5056a550b8-0001-012 vdjserver.org 2014 \n", + "4 0001391025968832-5056a550b8-0001-012 vdjserver.org 2014 \n", + "5 0001392912471365-5056a550b8-0001-012 vdjserver.org 2014 \n", + "6 0001392912860303-5056a550b8-0001-012 vdjserver.org 2014 \n", + "7 0001392914178983-5056a550b8-0001-012 vdjserver.org 2014 " + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(filtered_metadata_perms_df.shape)\n", + "filtered_metadata_perms_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "8c20a1c5-4a2e-468c-bf43-51818121b61d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(29194, 9)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
10001396029083309-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395939852441-5056a550b8-0001-002Nonevdjauthuploadedgitprep-latest.zipapplication/zip2014-03-28T12:51:23.309-05:00
20001396029805022-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395956517022-5056a550b8-0001-002NonevdjauthuploadedInduction-28.zipapplication/zip2014-03-28T13:03:25.022-05:00
30001396030144907-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396030144691-5056a550b8-0001-002Nonevdjauthuploadedtest10.txttext/plain2014-03-28T13:09:04.907-05:00
40001396039988083-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396039987794-5056a550b8-0001-002Nonevdjauthuploadedtest11.txttext/plain2014-03-28T15:53:08.083-05:00
50001396043273330-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396043273029-5056a550b8-0001-002Nonevdjauthuploadedtest14.txttext/plain2014-03-28T16:47:53.330-05:00
\n", + "
" + ], + "text/plain": [ + " uuid projectUuid \\\n", + "1 0001396029083309-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "2 0001396029805022-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "3 0001396030144907-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "4 0001396039988083-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "5 0001396043273330-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "\n", + " associationIds_1 associationIds_2 owner task_type \\\n", + "1 0001395939852441-5056a550b8-0001-002 None vdjauth uploaded \n", + "2 0001395956517022-5056a550b8-0001-002 None vdjauth uploaded \n", + "3 0001396030144691-5056a550b8-0001-002 None vdjauth uploaded \n", + "4 0001396039987794-5056a550b8-0001-002 None vdjauth uploaded \n", + "5 0001396043273029-5056a550b8-0001-002 None vdjauth uploaded \n", + "\n", + " file_name mimeType last_updated \n", + "1 gitprep-latest.zip application/zip 2014-03-28T12:51:23.309-05:00 \n", + "2 Induction-28.zip application/zip 2014-03-28T13:03:25.022-05:00 \n", + "3 test10.txt text/plain 2014-03-28T13:09:04.907-05:00 \n", + "4 test11.txt text/plain 2014-03-28T15:53:08.083-05:00 \n", + "5 test14.txt text/plain 2014-03-28T16:47:53.330-05:00 " + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(filtered_df_projectFiles.shape)\n", + "filtered_df_projectFiles.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "e9f3c204-082c-4a38-af88-fa0bc9caf433", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(5477, 10)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
system_idownerapp_idstatuslast_updateduuidarchive_pathremote_outcomeupdate_tokenparameters.Creator
0ls6.tacc.utexas.eduvdjrepcalc-ls6-2.0u8FINISHED2025-01-25 15:43:51.678c7cd08ad-a560-4574-a363-b9cc4c5e051d-007/projects/5456400192359305711-242ac118-0001-01...FINISHEDeb27e311-4a37-4aeb-b649-056704dd2711schristley
1ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FINISHED2025-01-24 04:20:37.8919188bf80-e868-4e05-a6b4-308c044108d7-007/projects/5456400192359305711-242ac118-0001-01...FINISHED5e2528fd-25d6-4473-9287-6a67a8de8391schristley
2ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FAILED2025-01-22 15:04:46.891773a5cb7-b369-4517-a221-83d57e3899e5-007/projects/5199144433477554666-242ac116-0001-01...FAILED_SKIP_ARCHIVE78b89c14-3dec-4aa8-acf8-d2592064e3a4scott_public
3ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-14 22:31:02.980c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007/projects/5456400192359305711-242ac118-0001-01...FINISHED1e2f122d-5e5b-4f14-931f-ca55803115ffschristley
4ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-09 04:21:12.476ad02cb34-250e-48cb-a06e-973e431b62ee-007/projects/6589143665654501871-242ac118-0001-01...FINISHED1069949d-1d9a-453f-80b8-7372019aba31schristley
\n", + "
" + ], + "text/plain": [ + " system_id owner app_id status \\\n", + "0 ls6.tacc.utexas.edu vdj repcalc-ls6-2.0u8 FINISHED \n", + "1 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FINISHED \n", + "2 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FAILED \n", + "3 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "4 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "\n", + " last_updated uuid \\\n", + "0 2025-01-25 15:43:51.678 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 \n", + "1 2025-01-24 04:20:37.891 9188bf80-e868-4e05-a6b4-308c044108d7-007 \n", + "2 2025-01-22 15:04:46.891 773a5cb7-b369-4517-a221-83d57e3899e5-007 \n", + "3 2025-01-14 22:31:02.980 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 \n", + "4 2025-01-09 04:21:12.476 ad02cb34-250e-48cb-a06e-973e431b62ee-007 \n", + "\n", + " archive_path remote_outcome \\\n", + "0 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "1 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "2 /projects/5199144433477554666-242ac116-0001-01... FAILED_SKIP_ARCHIVE \n", + "3 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "4 /projects/6589143665654501871-242ac118-0001-01... FINISHED \n", + "\n", + " update_token parameters.Creator \n", + "0 eb27e311-4a37-4aeb-b649-056704dd2711 schristley \n", + "1 5e2528fd-25d6-4473-9287-6a67a8de8391 schristley \n", + "2 78b89c14-3dec-4aa8-acf8-d2592064e3a4 scott_public \n", + "3 1e2f122d-5e5b-4f14-931f-ca55803115ff schristley \n", + "4 1069949d-1d9a-453f-80b8-7372019aba31 schristley " + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(filtered_jobs_all_df.shape)\n", + "filtered_jobs_all_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "477c89ac-850a-4290-b65f-3cc448e398a1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/projects/5456400192359305711-242ac118-0001-012/analyses/2025-01-25-02-57-35-54-my-job-24-jan-2025-8:57:09-pm'" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_jobs_all_df.iloc[0]['archive_path']" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "8cd2247b-1d55-423c-a1ce-c0bc57cff592", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
359237235888630211276305-242ac118-0001-0125456400192359305711-242ac118-0001-0128704853344789196305-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneUTSW3_S41_L001_R2_001.fastq.gzNone2025-01-13T16:40:05.824-06:00
359245435653088092876305-242ac118-0001-0125456400192359305711-242ac118-0001-0126303726458799395311-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneUTSW3_S41_L001_R1_001.fastq.gzNone2025-01-13T16:40:05.774-06:00
359254056925636403916305-242ac118-0001-0125456400192359305711-242ac118-0001-0125515926026855116305-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneUTSW33_S42_L001_R1_001.fastq.gzNone2025-01-13T16:40:09.288-06:00
35926364542252032716305-242ac118-0001-0125456400192359305711-242ac118-0001-0121826420270572236305-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneUTSW33_S42_L001_R2_001.fastq.gzNone2025-01-13T16:40:09.197-06:00
359271356193445436789231-242ac118-0001-0125456400192359305711-242ac118-0001-01298898524775116305-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneUTSW34_S43_L001_R1_001.fastq.gzNone2025-01-13T16:40:12.427-06:00
359283037157745745269231-242ac118-0001-0125456400192359305711-242ac118-0001-0121563855114198389231-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneUTSW34_S43_L001_R2_001.fastq.gzNone2025-01-13T16:40:12.462-06:00
359298703939788065402385-242ac118-0001-0125456400192359305711-242ac118-0001-0128250904445705589231-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone0436_S17_L001_R1_001.fastq.gzNone2025-01-13T16:40:16.264-06:00
359305832496452651642385-242ac118-0001-0125456400192359305711-242ac118-0001-0127280888273881722385-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone0436_S17_L001_R2_001.fastq.gzNone2025-01-13T16:40:16.297-06:00
359312306929598057082385-242ac118-0001-0125456400192359305711-242ac118-0001-0123790067704711802385-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone0830_S18_L001_R1_001.fastq.gzNone2025-01-13T16:40:19.713-06:00
359321173454200583623151-242ac118-0001-0125456400192359305711-242ac118-0001-012311831389719162385-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone0830_S18_L001_R2_001.fastq.gzNone2025-01-13T16:40:19.708-06:00
359333108766464161223151-242ac118-0001-0125456400192359305711-242ac118-0001-0121655950826616263151-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone1484_S19_L001_R1_001.fastq.gzNone2025-01-13T16:40:23.114-06:00
359345421348655019463151-242ac118-0001-0125456400192359305711-242ac118-0001-0123945254294730183151-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone1484_S19_L001_R2_001.fastq.gzNone2025-01-13T16:40:23.089-06:00
359356681449109992903151-242ac118-0001-0125456400192359305711-242ac118-0001-0125205741296760263151-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone1977_S20_L001_R1_001.fastq.gzNone2025-01-13T16:40:26.325-06:00
359367745355458885063151-242ac118-0001-0125456400192359305711-242ac118-0001-0126281072258659783151-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone1977_S20_L001_R2_001.fastq.gzNone2025-01-13T16:40:26.297-06:00
359378329106378551848465-242ac118-0001-0125456400192359305711-242ac118-0001-0128594599342323143151-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone2228_S21_L001_R1_001.fastq.gzNone2025-01-13T16:40:30.584-06:00
359385841031823979048465-242ac118-0001-0125456400192359305711-242ac118-0001-0127325028924092968465-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone2228_S21_L001_R2_001.fastq.gzNone2025-01-13T16:40:30.584-06:00
359392014473661280808465-242ac118-0001-0125456400192359305711-242ac118-0001-0123476695277204008465-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone2315_S22_L001_R1_001.fastq.gzNone2025-01-13T16:40:33.910-06:00
359402661285435182617071-242ac118-0001-0125456400192359305711-242ac118-0001-0121196701587246617071-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone2315_S22_L001_R2_001.fastq.gzNone2025-01-13T16:40:33.916-06:00
359416100051000725017071-242ac118-0001-0125456400192359305711-242ac118-0001-0124621895056133657071-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone2991_S23_L001_R1_001.fastq.gzNone2025-01-13T16:40:37.285-06:00
359428601653389564374545-242ac118-0001-0125456400192359305711-242ac118-0001-0128376899063680537071-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone2991_S23_L001_R2_001.fastq.gzNone2025-01-13T16:40:37.282-06:00
359435338423137409494545-242ac118-0001-0125456400192359305711-242ac118-0001-0126793987554023894545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R1_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359441335427718191574545-242ac118-0001-0125456400192359305711-242ac118-0001-0122833383462017494545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R2_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359451840700597200490991-242ac118-0001-0125456400192359305711-242ac118-0001-012366925519251050991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R1_001.fastq.gzNone2025-01-13T16:40:43.277-06:00
359465023614960920170991-242ac118-0001-0125456400192359305711-242ac118-0001-0123549539235260010991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R2_001.fastq.gzNone2025-01-13T16:40:43.281-06:00
359477830832104257678865-242ac118-0001-0125456400192359305711-242ac118-0001-0128017190735231118865-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneprimers.fastaNone2025-01-13T16:41:49.035-06:00
\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "35923 7235888630211276305-242ac118-0001-012 \n", + "35924 5435653088092876305-242ac118-0001-012 \n", + "35925 4056925636403916305-242ac118-0001-012 \n", + "35926 364542252032716305-242ac118-0001-012 \n", + "35927 1356193445436789231-242ac118-0001-012 \n", + "35928 3037157745745269231-242ac118-0001-012 \n", + "35929 8703939788065402385-242ac118-0001-012 \n", + "35930 5832496452651642385-242ac118-0001-012 \n", + "35931 2306929598057082385-242ac118-0001-012 \n", + "35932 1173454200583623151-242ac118-0001-012 \n", + "35933 3108766464161223151-242ac118-0001-012 \n", + "35934 5421348655019463151-242ac118-0001-012 \n", + "35935 6681449109992903151-242ac118-0001-012 \n", + "35936 7745355458885063151-242ac118-0001-012 \n", + "35937 8329106378551848465-242ac118-0001-012 \n", + "35938 5841031823979048465-242ac118-0001-012 \n", + "35939 2014473661280808465-242ac118-0001-012 \n", + "35940 2661285435182617071-242ac118-0001-012 \n", + "35941 6100051000725017071-242ac118-0001-012 \n", + "35942 8601653389564374545-242ac118-0001-012 \n", + "35943 5338423137409494545-242ac118-0001-012 \n", + "35944 1335427718191574545-242ac118-0001-012 \n", + "35945 1840700597200490991-242ac118-0001-012 \n", + "35946 5023614960920170991-242ac118-0001-012 \n", + "35947 7830832104257678865-242ac118-0001-012 \n", + "\n", + " projectUuid \\\n", + "35923 5456400192359305711-242ac118-0001-012 \n", + "35924 5456400192359305711-242ac118-0001-012 \n", + "35925 5456400192359305711-242ac118-0001-012 \n", + "35926 5456400192359305711-242ac118-0001-012 \n", + "35927 5456400192359305711-242ac118-0001-012 \n", + "35928 5456400192359305711-242ac118-0001-012 \n", + "35929 5456400192359305711-242ac118-0001-012 \n", + "35930 5456400192359305711-242ac118-0001-012 \n", + "35931 5456400192359305711-242ac118-0001-012 \n", + "35932 5456400192359305711-242ac118-0001-012 \n", + "35933 5456400192359305711-242ac118-0001-012 \n", + "35934 5456400192359305711-242ac118-0001-012 \n", + "35935 5456400192359305711-242ac118-0001-012 \n", + "35936 5456400192359305711-242ac118-0001-012 \n", + "35937 5456400192359305711-242ac118-0001-012 \n", + "35938 5456400192359305711-242ac118-0001-012 \n", + "35939 5456400192359305711-242ac118-0001-012 \n", + "35940 5456400192359305711-242ac118-0001-012 \n", + "35941 5456400192359305711-242ac118-0001-012 \n", + "35942 5456400192359305711-242ac118-0001-012 \n", + "35943 5456400192359305711-242ac118-0001-012 \n", + "35944 5456400192359305711-242ac118-0001-012 \n", + "35945 5456400192359305711-242ac118-0001-012 \n", + "35946 5456400192359305711-242ac118-0001-012 \n", + "35947 5456400192359305711-242ac118-0001-012 \n", + "\n", + " associationIds_1 \\\n", + "35923 8704853344789196305-242ac112-0001-002 \n", + "35924 6303726458799395311-242ac112-0001-002 \n", + "35925 5515926026855116305-242ac112-0001-002 \n", + "35926 1826420270572236305-242ac112-0001-002 \n", + "35927 98898524775116305-242ac112-0001-002 \n", + "35928 1563855114198389231-242ac112-0001-002 \n", + "35929 8250904445705589231-242ac112-0001-002 \n", + "35930 7280888273881722385-242ac112-0001-002 \n", + "35931 3790067704711802385-242ac112-0001-002 \n", + "35932 311831389719162385-242ac112-0001-002 \n", + "35933 1655950826616263151-242ac112-0001-002 \n", + "35934 3945254294730183151-242ac112-0001-002 \n", + "35935 5205741296760263151-242ac112-0001-002 \n", + "35936 6281072258659783151-242ac112-0001-002 \n", + "35937 8594599342323143151-242ac112-0001-002 \n", + "35938 7325028924092968465-242ac112-0001-002 \n", + "35939 3476695277204008465-242ac112-0001-002 \n", + "35940 1196701587246617071-242ac112-0001-002 \n", + "35941 4621895056133657071-242ac112-0001-002 \n", + "35942 8376899063680537071-242ac112-0001-002 \n", + "35943 6793987554023894545-242ac112-0001-002 \n", + "35944 2833383462017494545-242ac112-0001-002 \n", + "35945 366925519251050991-242ac112-0001-002 \n", + "35946 3549539235260010991-242ac112-0001-002 \n", + "35947 8017190735231118865-242ac112-0001-002 \n", + "\n", + " associationIds_2 owner task_type \\\n", + "35923 5456400192359305711-242ac118-0001-012 vdj None \n", + "35924 5456400192359305711-242ac118-0001-012 vdj None \n", + "35925 5456400192359305711-242ac118-0001-012 vdj None \n", + "35926 5456400192359305711-242ac118-0001-012 vdj None \n", + "35927 5456400192359305711-242ac118-0001-012 vdj None \n", + "35928 5456400192359305711-242ac118-0001-012 vdj None \n", + "35929 5456400192359305711-242ac118-0001-012 vdj None \n", + "35930 5456400192359305711-242ac118-0001-012 vdj None \n", + "35931 5456400192359305711-242ac118-0001-012 vdj None \n", + "35932 5456400192359305711-242ac118-0001-012 vdj None \n", + "35933 5456400192359305711-242ac118-0001-012 vdj None \n", + "35934 5456400192359305711-242ac118-0001-012 vdj None \n", + "35935 5456400192359305711-242ac118-0001-012 vdj None \n", + "35936 5456400192359305711-242ac118-0001-012 vdj None \n", + "35937 5456400192359305711-242ac118-0001-012 vdj None \n", + "35938 5456400192359305711-242ac118-0001-012 vdj None \n", + "35939 5456400192359305711-242ac118-0001-012 vdj None \n", + "35940 5456400192359305711-242ac118-0001-012 vdj None \n", + "35941 5456400192359305711-242ac118-0001-012 vdj None \n", + "35942 5456400192359305711-242ac118-0001-012 vdj None \n", + "35943 5456400192359305711-242ac118-0001-012 vdj None \n", + "35944 5456400192359305711-242ac118-0001-012 vdj None \n", + "35945 5456400192359305711-242ac118-0001-012 vdj None \n", + "35946 5456400192359305711-242ac118-0001-012 vdj None \n", + "35947 5456400192359305711-242ac118-0001-012 vdj None \n", + "\n", + " file_name mimeType last_updated \n", + "35923 UTSW3_S41_L001_R2_001.fastq.gz None 2025-01-13T16:40:05.824-06:00 \n", + "35924 UTSW3_S41_L001_R1_001.fastq.gz None 2025-01-13T16:40:05.774-06:00 \n", + "35925 UTSW33_S42_L001_R1_001.fastq.gz None 2025-01-13T16:40:09.288-06:00 \n", + "35926 UTSW33_S42_L001_R2_001.fastq.gz None 2025-01-13T16:40:09.197-06:00 \n", + "35927 UTSW34_S43_L001_R1_001.fastq.gz None 2025-01-13T16:40:12.427-06:00 \n", + "35928 UTSW34_S43_L001_R2_001.fastq.gz None 2025-01-13T16:40:12.462-06:00 \n", + "35929 0436_S17_L001_R1_001.fastq.gz None 2025-01-13T16:40:16.264-06:00 \n", + "35930 0436_S17_L001_R2_001.fastq.gz None 2025-01-13T16:40:16.297-06:00 \n", + "35931 0830_S18_L001_R1_001.fastq.gz None 2025-01-13T16:40:19.713-06:00 \n", + "35932 0830_S18_L001_R2_001.fastq.gz None 2025-01-13T16:40:19.708-06:00 \n", + "35933 1484_S19_L001_R1_001.fastq.gz None 2025-01-13T16:40:23.114-06:00 \n", + "35934 1484_S19_L001_R2_001.fastq.gz None 2025-01-13T16:40:23.089-06:00 \n", + "35935 1977_S20_L001_R1_001.fastq.gz None 2025-01-13T16:40:26.325-06:00 \n", + "35936 1977_S20_L001_R2_001.fastq.gz None 2025-01-13T16:40:26.297-06:00 \n", + "35937 2228_S21_L001_R1_001.fastq.gz None 2025-01-13T16:40:30.584-06:00 \n", + "35938 2228_S21_L001_R2_001.fastq.gz None 2025-01-13T16:40:30.584-06:00 \n", + "35939 2315_S22_L001_R1_001.fastq.gz None 2025-01-13T16:40:33.910-06:00 \n", + "35940 2315_S22_L001_R2_001.fastq.gz None 2025-01-13T16:40:33.916-06:00 \n", + "35941 2991_S23_L001_R1_001.fastq.gz None 2025-01-13T16:40:37.285-06:00 \n", + "35942 2991_S23_L001_R2_001.fastq.gz None 2025-01-13T16:40:37.282-06:00 \n", + "35943 4468_S24_L001_R1_001.fastq.gz None 2025-01-13T16:40:40.230-06:00 \n", + "35944 4468_S24_L001_R2_001.fastq.gz None 2025-01-13T16:40:40.230-06:00 \n", + "35945 6634_S25_L001_R1_001.fastq.gz None 2025-01-13T16:40:43.277-06:00 \n", + "35946 6634_S25_L001_R2_001.fastq.gz None 2025-01-13T16:40:43.281-06:00 \n", + "35947 primers.fasta None 2025-01-13T16:41:49.035-06:00 " + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "project_uuid = '5456400192359305711-242ac118-0001-012'\n", + "filtered_df_projectFiles[filtered_df_projectFiles.projectUuid == project_uuid]" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "c15b5301-6b71-48a4-bdbc-fae7e018cfab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
system_idownerapp_idstatuslast_updateduuidarchive_pathremote_outcomeupdate_tokenparameters.Creator
0ls6.tacc.utexas.eduvdjrepcalc-ls6-2.0u8FINISHED2025-01-25 15:43:51.678c7cd08ad-a560-4574-a363-b9cc4c5e051d-007/projects/5456400192359305711-242ac118-0001-01...FINISHEDeb27e311-4a37-4aeb-b649-056704dd2711schristley
1ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FINISHED2025-01-24 04:20:37.8919188bf80-e868-4e05-a6b4-308c044108d7-007/projects/5456400192359305711-242ac118-0001-01...FINISHED5e2528fd-25d6-4473-9287-6a67a8de8391schristley
2ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FAILED2025-01-22 15:04:46.891773a5cb7-b369-4517-a221-83d57e3899e5-007/projects/5199144433477554666-242ac116-0001-01...FAILED_SKIP_ARCHIVE78b89c14-3dec-4aa8-acf8-d2592064e3a4scott_public
3ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-14 22:31:02.980c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007/projects/5456400192359305711-242ac118-0001-01...FINISHED1e2f122d-5e5b-4f14-931f-ca55803115ffschristley
4ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-09 04:21:12.476ad02cb34-250e-48cb-a06e-973e431b62ee-007/projects/6589143665654501871-242ac118-0001-01...FINISHED1069949d-1d9a-453f-80b8-7372019aba31schristley
.................................
15776my-lonestarjfonnermy-vdj_pipe-0.0.4FINISHED2014-03-31 16:38:39.0000001396301879424-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/foo123-201...FINISHED3b188d18-7955-49b6-bc21-10a557ced542NaN
15777my-lonestarjfonnermy-vdj_pipe-0.0.4FINISHED2014-03-31 15:44:00.0000001396298592090-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FINISHEDc9dd99e9-2ef2-4fd7-b211-26b56162b21eNaN
15778my-lonestarjfonnermy-vdj_pipe-0.0.4FINISHED2014-03-31 15:35:18.0000001396298085562-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FINISHED282196b2-9972-4615-944d-777e1ee7826cNaN
15779my-lonestarjfonnermy-vdj_pipe-0.0.4FAILED2014-03-31 15:28:36.0000001396297676287-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FAILEDdc81e8a3-9869-47cc-8bee-3d254bb805d1NaN
15780my-lonestarjfonnermy-vdj_pipe-0.0.4FAILED2014-03-31 14:50:18.0000001396295290656-5056a550b8-0001-007/scratch/01114/jfonner/vdj/analyses/vdj_test20...FAILED08f920a6-e4c1-4029-9ac2-e1de96e7d23aNaN
\n", + "

15781 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " system_id owner app_id status \\\n", + "0 ls6.tacc.utexas.edu vdj repcalc-ls6-2.0u8 FINISHED \n", + "1 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FINISHED \n", + "2 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FAILED \n", + "3 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "4 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "... ... ... ... ... \n", + "15776 my-lonestar jfonner my-vdj_pipe-0.0.4 FINISHED \n", + "15777 my-lonestar jfonner my-vdj_pipe-0.0.4 FINISHED \n", + "15778 my-lonestar jfonner my-vdj_pipe-0.0.4 FINISHED \n", + "15779 my-lonestar jfonner my-vdj_pipe-0.0.4 FAILED \n", + "15780 my-lonestar jfonner my-vdj_pipe-0.0.4 FAILED \n", + "\n", + " last_updated uuid \\\n", + "0 2025-01-25 15:43:51.678 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 \n", + "1 2025-01-24 04:20:37.891 9188bf80-e868-4e05-a6b4-308c044108d7-007 \n", + "2 2025-01-22 15:04:46.891 773a5cb7-b369-4517-a221-83d57e3899e5-007 \n", + "3 2025-01-14 22:31:02.980 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 \n", + "4 2025-01-09 04:21:12.476 ad02cb34-250e-48cb-a06e-973e431b62ee-007 \n", + "... ... ... \n", + "15776 2014-03-31 16:38:39.000 0001396301879424-5056a550b8-0001-007 \n", + "15777 2014-03-31 15:44:00.000 0001396298592090-5056a550b8-0001-007 \n", + "15778 2014-03-31 15:35:18.000 0001396298085562-5056a550b8-0001-007 \n", + "15779 2014-03-31 15:28:36.000 0001396297676287-5056a550b8-0001-007 \n", + "15780 2014-03-31 14:50:18.000 0001396295290656-5056a550b8-0001-007 \n", + "\n", + " archive_path remote_outcome \\\n", + "0 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "1 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "2 /projects/5199144433477554666-242ac116-0001-01... FAILED_SKIP_ARCHIVE \n", + "3 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "4 /projects/6589143665654501871-242ac118-0001-01... FINISHED \n", + "... ... ... \n", + "15776 /scratch/01114/jfonner/vdj/analyses/foo123-201... FINISHED \n", + "15777 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FINISHED \n", + "15778 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FINISHED \n", + "15779 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FAILED \n", + "15780 /scratch/01114/jfonner/vdj/analyses/vdj_test20... FAILED \n", + "\n", + " update_token parameters.Creator \n", + "0 eb27e311-4a37-4aeb-b649-056704dd2711 schristley \n", + "1 5e2528fd-25d6-4473-9287-6a67a8de8391 schristley \n", + "2 78b89c14-3dec-4aa8-acf8-d2592064e3a4 scott_public \n", + "3 1e2f122d-5e5b-4f14-931f-ca55803115ff schristley \n", + "4 1069949d-1d9a-453f-80b8-7372019aba31 schristley \n", + "... ... ... \n", + "15776 3b188d18-7955-49b6-bc21-10a557ced542 NaN \n", + "15777 c9dd99e9-2ef2-4fd7-b211-26b56162b21e NaN \n", + "15778 282196b2-9972-4615-944d-777e1ee7826c NaN \n", + "15779 dc81e8a3-9869-47cc-8bee-3d254bb805d1 NaN \n", + "15780 08f920a6-e4c1-4029-9ac2-e1de96e7d23a NaN \n", + "\n", + "[15781 rows x 10 columns]" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jobs_all_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16f7f9c3-9fd9-47fe-824a-9234ca9d8aa1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e58ab1da-a1f4-4eae-a61e-7009dd057a5e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "97468a6f-9372-43f9-8e4c-6fc5f678b8ee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\"/community/cache/6378122916818653676-242ac117-0001-012/statistics/6977444714660359700-242ac117-0001-012\"\n", + "set()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
app_idstatusproject_folderprojectUUIDJobUuidremote_outcomeproject_creatorlast_updated
0repcalc-ls6-2.0u8FINISHEDprojects5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-007FINISHEDschristley2025-01-25 15:43:51.678
1igblast-ls6-1.20u6FINISHEDprojects5456400192359305711-242ac118-0001-0129188bf80-e868-4e05-a6b4-308c044108d7-007FINISHEDschristley2025-01-24 04:20:37.891
2igblast-ls6-1.20u6FAILEDprojects5199144433477554666-242ac116-0001-012773a5cb7-b369-4517-a221-83d57e3899e5-007FAILED_SKIP_ARCHIVEscott_public2025-01-22 15:04:46.891
3vdj_pipe-ls6-0.1.7u2FINISHEDprojects5456400192359305711-242ac118-0001-012c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007FINISHEDschristley2025-01-14 22:31:02.980
4vdj_pipe-ls6-0.1.7u2FINISHEDprojects6589143665654501871-242ac118-0001-012ad02cb34-250e-48cb-a06e-973e431b62ee-007FINISHEDschristley2025-01-09 04:21:12.476
...........................
982immuneml-ls6-2.2FINISHEDarchivejobsbbb69fb7-5325-4b02-96c1-1cae7152d9e9-007FINISHEDscott_test12022-04-04 14:03:49.907
983immuneml-ls6-2.2FINISHEDarchivejobscf37345f-e7da-4f85-a404-6663d688f576-007FINISHEDscott_test12022-04-01 16:19:13.528
984immuneml-ls6-2.2FINISHEDarchivejobs6419b7a4-15a2-4732-82d1-79553d1435c1-007FINISHEDscott_test12022-04-01 15:27:23.091
985vdj_pipe-stampede2-0.1.7u4FINISHEDprojects8557415777028206100-242ac118-0001-0122d35173f-30e5-4908-bf33-5784bcc42dcd-007FINISHEDareceveur2022-04-01 09:37:15.693
986vdj_pipe-stampede2-0.1.7u4FINISHEDprojects8557415777028206100-242ac118-0001-0122c48d618-bf09-483d-aa43-44872f0b416b-007FINISHEDareceveur2022-03-31 22:01:50.356
\n", + "

987 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " app_id status project_folder \\\n", + "0 repcalc-ls6-2.0u8 FINISHED projects \n", + "1 igblast-ls6-1.20u6 FINISHED projects \n", + "2 igblast-ls6-1.20u6 FAILED projects \n", + "3 vdj_pipe-ls6-0.1.7u2 FINISHED projects \n", + "4 vdj_pipe-ls6-0.1.7u2 FINISHED projects \n", + ".. ... ... ... \n", + "982 immuneml-ls6-2.2 FINISHED archive \n", + "983 immuneml-ls6-2.2 FINISHED archive \n", + "984 immuneml-ls6-2.2 FINISHED archive \n", + "985 vdj_pipe-stampede2-0.1.7u4 FINISHED projects \n", + "986 vdj_pipe-stampede2-0.1.7u4 FINISHED projects \n", + "\n", + " projectUUID \\\n", + "0 5456400192359305711-242ac118-0001-012 \n", + "1 5456400192359305711-242ac118-0001-012 \n", + "2 5199144433477554666-242ac116-0001-012 \n", + "3 5456400192359305711-242ac118-0001-012 \n", + "4 6589143665654501871-242ac118-0001-012 \n", + ".. ... \n", + "982 jobs \n", + "983 jobs \n", + "984 jobs \n", + "985 8557415777028206100-242ac118-0001-012 \n", + "986 8557415777028206100-242ac118-0001-012 \n", + "\n", + " JobUuid remote_outcome \\\n", + "0 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 FINISHED \n", + "1 9188bf80-e868-4e05-a6b4-308c044108d7-007 FINISHED \n", + "2 773a5cb7-b369-4517-a221-83d57e3899e5-007 FAILED_SKIP_ARCHIVE \n", + "3 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 FINISHED \n", + "4 ad02cb34-250e-48cb-a06e-973e431b62ee-007 FINISHED \n", + ".. ... ... \n", + "982 bbb69fb7-5325-4b02-96c1-1cae7152d9e9-007 FINISHED \n", + "983 cf37345f-e7da-4f85-a404-6663d688f576-007 FINISHED \n", + "984 6419b7a4-15a2-4732-82d1-79553d1435c1-007 FINISHED \n", + "985 2d35173f-30e5-4908-bf33-5784bcc42dcd-007 FINISHED \n", + "986 2c48d618-bf09-483d-aa43-44872f0b416b-007 FINISHED \n", + "\n", + " project_creator last_updated \n", + "0 schristley 2025-01-25 15:43:51.678 \n", + "1 schristley 2025-01-24 04:20:37.891 \n", + "2 scott_public 2025-01-22 15:04:46.891 \n", + "3 schristley 2025-01-14 22:31:02.980 \n", + "4 schristley 2025-01-09 04:21:12.476 \n", + ".. ... ... \n", + "982 scott_test1 2022-04-04 14:03:49.907 \n", + "983 scott_test1 2022-04-01 16:19:13.528 \n", + "984 scott_test1 2022-04-01 15:27:23.091 \n", + "985 areceveur 2022-04-01 09:37:15.693 \n", + "986 areceveur 2022-03-31 22:01:50.356 \n", + "\n", + "[987 rows x 8 columns]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_jobs_list = []\n", + "other_folders = set()\n", + "for i in range(len(jobs_all_df)):\n", + " archive_path = jobs_all_df['archive_path'][i]\n", + " # Check if the 'archive_path' is not None or NaN and contains at least one '/'\n", + " if pd.notna(archive_path) and '/' in archive_path:\n", + " split_path = archive_path.split('/')\n", + " if len(split_path) > 3: # Ensure there is at least two elements after splitting\n", + " app_id = jobs_all_df['app_id'][i]\n", + " status = jobs_all_df['status'][i]\n", + " project_folder = split_path[1]\n", + " if project_folder in ['community']:\n", + " projectUUID = split_path[3] #nor sure if 3rd or 5th\n", + " print(json.dumps(archive_path, indent = 4))\n", + " break\n", + " elif project_folder in ['projects']:\n", + " projectUUID = split_path[2]\n", + " else:\n", + " projectUUID = split_path[2]\n", + " JobUuid = jobs_all_df['uuid'][i]\n", + " remote_outcome = jobs_all_df['remote_outcome'][i]\n", + " project_creator = jobs_all_df['parameters.Creator'][i]\n", + " last_updated = jobs_all_df['last_updated'][i]\n", + " all_jobs_list.append({\n", + " 'app_id': app_id,\n", + " 'status': status,\n", + " 'project_folder': project_folder,\n", + " 'projectUUID': projectUUID,\n", + " 'JobUuid': JobUuid,\n", + " 'remote_outcome': remote_outcome,\n", + " 'project_creator': project_creator,\n", + " 'last_updated': last_updated\n", + " })\n", + " # print(split_path)\n", + "df_allJobs = pd.DataFrame(all_jobs_list)\n", + "print(other_folders)\n", + "df_allJobs" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "f63f8ac0-2463-4014-bcb2-312a8debb568", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
app_idstatusproject_folderprojectUUIDJobUuidremote_outcomeproject_creatorlast_updated
0repcalc-ls6-2.0u8FINISHEDprojects5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-007FINISHEDschristley2025-01-25 15:43:51.678
1igblast-ls6-1.20u6FINISHEDprojects5456400192359305711-242ac118-0001-0129188bf80-e868-4e05-a6b4-308c044108d7-007FINISHEDschristley2025-01-24 04:20:37.891
2igblast-ls6-1.20u6FAILEDprojects5199144433477554666-242ac116-0001-012773a5cb7-b369-4517-a221-83d57e3899e5-007FAILED_SKIP_ARCHIVEscott_public2025-01-22 15:04:46.891
3vdj_pipe-ls6-0.1.7u2FINISHEDprojects5456400192359305711-242ac118-0001-012c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007FINISHEDschristley2025-01-14 22:31:02.980
4vdj_pipe-ls6-0.1.7u2FINISHEDprojects6589143665654501871-242ac118-0001-012ad02cb34-250e-48cb-a06e-973e431b62ee-007FINISHEDschristley2025-01-09 04:21:12.476
...........................
963repcalc-stampede2-1.0u7FINISHEDprojects3087851897075012076-242ac118-0001-012fdcee8a4-b88b-4f44-b347-21c6c31f8401-007FINISHEDdiary2022-04-09 07:02:15.731
964igblast-stampede2-1.14u6FINISHEDprojects3087851897075012076-242ac118-0001-0124419f0fc-67dc-4f21-a66a-ff178393a4de-007FINISHEDdiary2022-04-07 16:42:24.925
967presto-stampede2-0.5u2FINISHEDprojects3087851897075012076-242ac118-0001-0123eb742ae-6caa-43c1-b65b-3fa35ad7754b-007FINISHEDdiary2022-04-06 10:42:46.132
985vdj_pipe-stampede2-0.1.7u4FINISHEDprojects8557415777028206100-242ac118-0001-0122d35173f-30e5-4908-bf33-5784bcc42dcd-007FINISHEDareceveur2022-04-01 09:37:15.693
986vdj_pipe-stampede2-0.1.7u4FINISHEDprojects8557415777028206100-242ac118-0001-0122c48d618-bf09-483d-aa43-44872f0b416b-007FINISHEDareceveur2022-03-31 22:01:50.356
\n", + "

812 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " app_id status project_folder \\\n", + "0 repcalc-ls6-2.0u8 FINISHED projects \n", + "1 igblast-ls6-1.20u6 FINISHED projects \n", + "2 igblast-ls6-1.20u6 FAILED projects \n", + "3 vdj_pipe-ls6-0.1.7u2 FINISHED projects \n", + "4 vdj_pipe-ls6-0.1.7u2 FINISHED projects \n", + ".. ... ... ... \n", + "963 repcalc-stampede2-1.0u7 FINISHED projects \n", + "964 igblast-stampede2-1.14u6 FINISHED projects \n", + "967 presto-stampede2-0.5u2 FINISHED projects \n", + "985 vdj_pipe-stampede2-0.1.7u4 FINISHED projects \n", + "986 vdj_pipe-stampede2-0.1.7u4 FINISHED projects \n", + "\n", + " projectUUID \\\n", + "0 5456400192359305711-242ac118-0001-012 \n", + "1 5456400192359305711-242ac118-0001-012 \n", + "2 5199144433477554666-242ac116-0001-012 \n", + "3 5456400192359305711-242ac118-0001-012 \n", + "4 6589143665654501871-242ac118-0001-012 \n", + ".. ... \n", + "963 3087851897075012076-242ac118-0001-012 \n", + "964 3087851897075012076-242ac118-0001-012 \n", + "967 3087851897075012076-242ac118-0001-012 \n", + "985 8557415777028206100-242ac118-0001-012 \n", + "986 8557415777028206100-242ac118-0001-012 \n", + "\n", + " JobUuid remote_outcome \\\n", + "0 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 FINISHED \n", + "1 9188bf80-e868-4e05-a6b4-308c044108d7-007 FINISHED \n", + "2 773a5cb7-b369-4517-a221-83d57e3899e5-007 FAILED_SKIP_ARCHIVE \n", + "3 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 FINISHED \n", + "4 ad02cb34-250e-48cb-a06e-973e431b62ee-007 FINISHED \n", + ".. ... ... \n", + "963 fdcee8a4-b88b-4f44-b347-21c6c31f8401-007 FINISHED \n", + "964 4419f0fc-67dc-4f21-a66a-ff178393a4de-007 FINISHED \n", + "967 3eb742ae-6caa-43c1-b65b-3fa35ad7754b-007 FINISHED \n", + "985 2d35173f-30e5-4908-bf33-5784bcc42dcd-007 FINISHED \n", + "986 2c48d618-bf09-483d-aa43-44872f0b416b-007 FINISHED \n", + "\n", + " project_creator last_updated \n", + "0 schristley 2025-01-25 15:43:51.678 \n", + "1 schristley 2025-01-24 04:20:37.891 \n", + "2 scott_public 2025-01-22 15:04:46.891 \n", + "3 schristley 2025-01-14 22:31:02.980 \n", + "4 schristley 2025-01-09 04:21:12.476 \n", + ".. ... ... \n", + "963 diary 2022-04-09 07:02:15.731 \n", + "964 diary 2022-04-07 16:42:24.925 \n", + "967 diary 2022-04-06 10:42:46.132 \n", + "985 areceveur 2022-04-01 09:37:15.693 \n", + "986 areceveur 2022-03-31 22:01:50.356 \n", + "\n", + "[812 rows x 8 columns]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_allJobs_filtered = df_allJobs[df_allJobs.projectUUID.isin(filtered_metadata_perms_df['uuid'])]\n", + "df_allJobs_filtered\n" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "a926b58d-bb4f-488f-a454-1bf0b20c6ae3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "project_folder\n", + "projects 812\n", + "archive 175\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_allJobs.project_folder.value_counts()\n", + "# df_allJobs.projectUUID" + ] + }, + { + "cell_type": "markdown", + "id": "e3c7ad43-2cd5-4467-85fc-242c77a4843f", + "metadata": {}, + "source": [ + "## Find Verified Users With Their ProjectUUID" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "b8c591c5-53eb-4bf4-95b1-759cdbd6348e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidusernameisVerifiedlast_updated
00001422658903072-5056a550b8-0001-012jdoeTrue2015-01-30T17:01:42.936-06:00
10001422658903349-5056a550b8-0001-012jfonnerTrue2015-01-30T17:01:43.349-06:00
\n", + "
" + ], + "text/plain": [ + " uuid username isVerified \\\n", + "0 0001422658903072-5056a550b8-0001-012 jdoe True \n", + "1 0001422658903349-5056a550b8-0001-012 jfonner True \n", + "\n", + " last_updated \n", + "0 2015-01-30T17:01:42.936-06:00 \n", + "1 2015-01-30T17:01:43.349-06:00 " + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_df_userVerification.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "399d4564-1d61-4f2a-8010-63a99c2392f2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlast_updatedpermissionusernameuuidtenant_idyear
3142014-01-29 10:28:16READ_WRITEjfonner0001389977207738-5056a550b8-0001-012vdjserver.org2014
4172014-01-29 14:06:38READ_WRITEadshkl;dasfhkdf0001391025968832-5056a550b8-0001-012vdjserver.org2014
\n", + "
" + ], + "text/plain": [ + " id last_updated permission username \\\n", + "3 14 2014-01-29 10:28:16 READ_WRITE jfonner \n", + "4 17 2014-01-29 14:06:38 READ_WRITE adshkl;dasfhkdf \n", + "\n", + " uuid tenant_id year \n", + "3 0001389977207738-5056a550b8-0001-012 vdjserver.org 2014 \n", + "4 0001391025968832-5056a550b8-0001-012 vdjserver.org 2014 " + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_metadata_perms_df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "46894eb7-9330-4b8f-981f-fc7d05363fcb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidfirstNamelastNameemailcitystatecountrycreatedlastUpdated
00001389976523746-5056a550b8-0001-012WalterScarboroughwscarbor@tacc.utexas.eduAustinTXUSA2014-01-17T10:35:23.649-06:002016-04-27T15:07:26.261-05:00
10001391029872321-5056a550b8-0001-012Test19test19@test.comNone2014-01-29T15:11:12.321-06:002014-01-29T15:12:33.955-06:00
\n", + "
" + ], + "text/plain": [ + " uuid firstName lastName \\\n", + "0 0001389976523746-5056a550b8-0001-012 Walter Scarborough \n", + "1 0001391029872321-5056a550b8-0001-012 Test 19 \n", + "\n", + " email city state country \\\n", + "0 wscarbor@tacc.utexas.edu Austin TX USA \n", + "1 test19@test.com None \n", + "\n", + " created lastUpdated \n", + "0 2014-01-17T10:35:23.649-06:00 2016-04-27T15:07:26.261-05:00 \n", + "1 2014-01-29T15:11:12.321-06:00 2014-01-29T15:12:33.955-06:00 " + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_profile.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "id": "ae1e132c-0c06-426a-a0ea-5f50debb29be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
00001395955349445-5056a550b8-0001-012NoneNonevdjauthNoneNone2014-03-27T16:22:29.444-05:00
10001396029083309-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395939852441-5056a550b8-0001-002Nonevdjauthuploadedgitprep-latest.zipapplication/zip2014-03-28T12:51:23.309-05:00
20001396029805022-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001395956517022-5056a550b8-0001-002NonevdjauthuploadedInduction-28.zipapplication/zip2014-03-28T13:03:25.022-05:00
30001396030144907-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396030144691-5056a550b8-0001-002Nonevdjauthuploadedtest10.txttext/plain2014-03-28T13:09:04.907-05:00
40001396039988083-5056a550b8-0001-0120001395346788177-5056a550b8-0001-0120001396039987794-5056a550b8-0001-002Nonevdjauthuploadedtest11.txttext/plain2014-03-28T15:53:08.083-05:00
\n", + "
" + ], + "text/plain": [ + " uuid projectUuid \\\n", + "0 0001395955349445-5056a550b8-0001-012 \n", + "1 0001396029083309-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "2 0001396029805022-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "3 0001396030144907-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "4 0001396039988083-5056a550b8-0001-012 0001395346788177-5056a550b8-0001-012 \n", + "\n", + " associationIds_1 associationIds_2 owner task_type \\\n", + "0 None None vdjauth \n", + "1 0001395939852441-5056a550b8-0001-002 None vdjauth uploaded \n", + "2 0001395956517022-5056a550b8-0001-002 None vdjauth uploaded \n", + "3 0001396030144691-5056a550b8-0001-002 None vdjauth uploaded \n", + "4 0001396039987794-5056a550b8-0001-002 None vdjauth uploaded \n", + "\n", + " file_name mimeType last_updated \n", + "0 None None 2014-03-27T16:22:29.444-05:00 \n", + "1 gitprep-latest.zip application/zip 2014-03-28T12:51:23.309-05:00 \n", + "2 Induction-28.zip application/zip 2014-03-28T13:03:25.022-05:00 \n", + "3 test10.txt text/plain 2014-03-28T13:09:04.907-05:00 \n", + "4 test11.txt text/plain 2014-03-28T15:53:08.083-05:00 " + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_projectFiles.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "bb4abce9-ef95-479f-bab2-c72f07dd9869", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerprojectUuidjobUuidlastUpdated
00001400192074855-5056a550b8-0001-012vdj0001399309581559-5056a550b8-0001-0120001399315558601-5056a550b8-0001-0072014-05-15T17:14:34.855-05:00
10001400254373114-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400254372814-5056a550b8-0001-0072014-05-16T10:32:53.114-05:00
\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "0 0001400192074855-5056a550b8-0001-012 vdj \n", + "1 0001400254373114-5056a550b8-0001-012 vdj \n", + "\n", + " projectUuid jobUuid \\\n", + "0 0001399309581559-5056a550b8-0001-012 0001399315558601-5056a550b8-0001-007 \n", + "1 0001400250478554-5056a550b8-0001-012 0001400254372814-5056a550b8-0001-007 \n", + "\n", + " lastUpdated \n", + "0 2014-05-15T17:14:34.855-05:00 \n", + "1 2014-05-16T10:32:53.114-05:00 " + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_projectJob.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "b9085eba-89d3-448f-aae6-0059289917a4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1501, 7)" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_metadata_perms_df[filtered_metadata_perms_df.uuid.isin(df_projectFiles.projectUuid)].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "id": "4d4276fe-0bc7-467e-a2cb-06b15dd7557a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(37689, 7)" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_metadata_perms_df[filtered_metadata_perms_df.uuid.isin(df_projectFiles.uuid)].shape" + ] + }, + { + "cell_type": "markdown", + "id": "acc4f9cf-2af5-48aa-958c-35cc30c185d8", + "metadata": {}, + "source": [ + "## get filtered metadata for ProjectFiles" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "ff9b7b11-f196-431f-8c91-57d33e05b5aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexidlast_updatedpermissionusernameuuidtenant_idyear
039682014-03-27 16:12:46READ_WRITEwscarbor0001395346788177-5056a550b8-0001-012vdjserver.org2014
140722014-04-03 10:24:15READ_WRITEvdj0001396538655269-5056a550b8-0001-012vdjserver.org2014
2641302014-04-28 13:14:19READ_WRITEwscarbor0001398708859776-5056a550b8-0001-012vdjserver.org2014
3761422014-04-28 14:37:17READ_WRITEwscarbor0001398713837326-5056a550b8-0001-012vdjserver.org2014
4781532014-04-28 15:05:44READ_WRITEwscarbor0001398715544223-5056a550b8-0001-012vdjserver.org2014
...........................
149675639214407882025-01-08 11:36:56READ_WRITEsamwol6589143665654501871-242ac118-0001-012vdjserver.org2025
149775745314418492025-01-08 11:46:30READ_WRITEsamwol2024481073312951825-242ac118-0001-012vdjserver.org2025
149875851214429082025-01-08 11:53:00READ_WRITEsamwol7301395953249218065-242ac118-0001-012vdjserver.org2025
149975949614438922025-01-08 11:58:21READ_WRITEsamwol1710750072586572271-242ac118-0001-012vdjserver.org2025
150076020114445972025-01-13 15:13:55READ_WRITEschristley5456400192359305711-242ac118-0001-012vdjserver.org2025
\n", + "

1501 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " index id last_updated permission username \\\n", + "0 39 68 2014-03-27 16:12:46 READ_WRITE wscarbor \n", + "1 40 72 2014-04-03 10:24:15 READ_WRITE vdj \n", + "2 64 130 2014-04-28 13:14:19 READ_WRITE wscarbor \n", + "3 76 142 2014-04-28 14:37:17 READ_WRITE wscarbor \n", + "4 78 153 2014-04-28 15:05:44 READ_WRITE wscarbor \n", + "... ... ... ... ... ... \n", + "1496 756392 1440788 2025-01-08 11:36:56 READ_WRITE samwol \n", + "1497 757453 1441849 2025-01-08 11:46:30 READ_WRITE samwol \n", + "1498 758512 1442908 2025-01-08 11:53:00 READ_WRITE samwol \n", + "1499 759496 1443892 2025-01-08 11:58:21 READ_WRITE samwol \n", + "1500 760201 1444597 2025-01-13 15:13:55 READ_WRITE schristley \n", + "\n", + " uuid tenant_id year \n", + "0 0001395346788177-5056a550b8-0001-012 vdjserver.org 2014 \n", + "1 0001396538655269-5056a550b8-0001-012 vdjserver.org 2014 \n", + "2 0001398708859776-5056a550b8-0001-012 vdjserver.org 2014 \n", + "3 0001398713837326-5056a550b8-0001-012 vdjserver.org 2014 \n", + "4 0001398715544223-5056a550b8-0001-012 vdjserver.org 2014 \n", + "... ... ... ... \n", + "1496 6589143665654501871-242ac118-0001-012 vdjserver.org 2025 \n", + "1497 2024481073312951825-242ac118-0001-012 vdjserver.org 2025 \n", + "1498 7301395953249218065-242ac118-0001-012 vdjserver.org 2025 \n", + "1499 1710750072586572271-242ac118-0001-012 vdjserver.org 2025 \n", + "1500 5456400192359305711-242ac118-0001-012 vdjserver.org 2025 \n", + "\n", + "[1501 rows x 8 columns]" + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = filtered_metadata_perms_df[filtered_metadata_perms_df.uuid.isin(df_projectFiles.projectUuid)].reset_index()\n", + "df\n" + ] + }, + { + "cell_type": "markdown", + "id": "d13b0405-926c-4db1-9752-4a43935665ab", + "metadata": {}, + "source": [ + "## Unique # of User with Project" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "id": "ae17ea46-7e91-47fc-a221-0b18116a4b4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "573" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.username.nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "ce6a1dbb-cc0c-4f82-ab3c-06c7469a0dbd", + "metadata": {}, + "source": [ + "## Unique Number of ProjectUUID" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "id": "6361278c-8a03-494b-8949-5bb2dd4ddda5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1223" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.uuid.nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "d5604775-e9a1-44f4-9eb4-61ca5205cb73", + "metadata": {}, + "source": [ + "## FInd Number of Project Each User Has" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "id": "256738c3-befc-422b-8546-444768dee6e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "username\n", + "schristley 128\n", + "wscarbor 87\n", + "wrounds 72\n", + "esalina 67\n", + "itoby 49\n", + " ... \n", + "mohanapriya_r 1\n", + "san4011 1\n", + "emsen24 1\n", + "xiaojianhan 1\n", + "rosepeterson1010 1\n", + "Name: count, Length: 573, dtype: int64" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = filtered_metadata_perms_df[filtered_metadata_perms_df.uuid.isin(df_projectFiles.projectUuid)].reset_index()\n", + "df.username.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "id": "b36ca5a7-09e1-4315-9a7e-4ad70188eecc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1501" + ] + }, + "execution_count": 206, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(info_list)\n", + "len(project_uuids)" + ] + }, + { + "cell_type": "code", + "execution_count": 226, + "id": "e6d6ffd3-5494-49b0-8615-4611942ddd3d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectUuidsubject_countfile_count
12186842772679323545105-242ac118-0001-01201
12194478944862770032145-242ac118-0001-01201
12202405101561928094191-242ac118-0001-01202
12216589143665654501871-242ac118-0001-012037
12225456400192359305711-242ac118-0001-012025
\n", + "
" + ], + "text/plain": [ + " projectUuid subject_count file_count\n", + "1218 6842772679323545105-242ac118-0001-012 0 1\n", + "1219 4478944862770032145-242ac118-0001-012 0 1\n", + "1220 2405101561928094191-242ac118-0001-012 0 2\n", + "1221 6589143665654501871-242ac118-0001-012 0 37\n", + "1222 5456400192359305711-242ac118-0001-012 0 25" + ] + }, + "execution_count": 226, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from collections import defaultdict\n", + "project_uuids = filtered_metadata_perms_df[filtered_metadata_perms_df.uuid.isin(df_projectFiles.projectUuid)].uuid.unique()\n", + "info_list = []\n", + "all_names = defaultdict(int)\n", + "for project_uuid in project_uuids:\n", + " subject_cnt = 0\n", + " file_cnt = 0\n", + " for item in jsonarray:\n", + " if project_uuid in item.get('associationIds', None):\n", + " all_names[item['name']] += 1\n", + " if item['name'] in ['subject', 'projectFile']:\n", + " \n", + " if item['name'] == 'subject':\n", + " subject_cnt += 1\n", + " elif item['name'] == 'projectFile':\n", + " file_cnt += 1\n", + " info_list.append({\n", + " 'projectUuid': project_uuid,\n", + " 'subject_count':subject_cnt,\n", + " 'file_count':file_cnt\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_count = pd.DataFrame(info_list)\n", + "# Print the DataFrame\n", + "df_count.tail()\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "id": "23948e21-1af4-4d54-aafe-b9a7354d43ac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defaultdict(int,\n", + " {'projectFile': 24297,\n", + " 'projectJob': 5500,\n", + " 'processMetadata': 3368,\n", + " 'projectJobFile': 384891,\n", + " 'subject': 2307,\n", + " 'cellProcessing': 958,\n", + " 'nucleicAcidProcessing': 1346,\n", + " 'projectJobArchive': 453,\n", + " 'sample': 8316,\n", + " 'diagnosis': 102,\n", + " 'sampleGroup': 223,\n", + " 'sampleColumns': 49,\n", + " 'subjectColumns': 41,\n", + " 'repertoire': 1362,\n", + " 'projectLoad': 16,\n", + " 'rearrangementLoad': 1806,\n", + " 'cellProcessingColumns': 27,\n", + " 'nucleicAcidProcessingColumns': 29,\n", + " 'diagnosisColumns': 7,\n", + " 'bioProcessingColumns': 1,\n", + " 'bioProcessing': 5,\n", + " 'sample_processing': 324,\n", + " 'data_processing': 420})" + ] + }, + "execution_count": 262, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_names" + ] + }, + { + "cell_type": "code", + "execution_count": 234, + "id": "253030fe-29bf-4f0f-baeb-9d6591f5c189", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 0, 0, ..., 2, 37, 25], shape=(1223,))" + ] + }, + "execution_count": 234, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_count.file_count.values\n", + "# df_count[ df_count.projectUuid=='520125623043878425-242ac11c-0001-012']" + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "id": "506a547d-21a8-40e2-8505-ba627da50cbf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlast_updatedpermissionusernameuuidtenant_idyearprojectUuidsubject_countfile_count
0682014-03-27 16:12:46READ_WRITEwscarbor0001395346788177-5056a550b8-0001-012vdjserver.org20140001395346788177-5056a550b8-0001-01200
1722014-04-03 10:24:15READ_WRITEvdj0001396538655269-5056a550b8-0001-012vdjserver.org20140001396538655269-5056a550b8-0001-01200
21302014-04-28 13:14:19READ_WRITEwscarbor0001398708859776-5056a550b8-0001-012vdjserver.org20140001398708859776-5056a550b8-0001-01200
31422014-04-28 14:37:17READ_WRITEwscarbor0001398713837326-5056a550b8-0001-012vdjserver.org20140001398713837326-5056a550b8-0001-01200
41532014-04-28 15:05:44READ_WRITEwscarbor0001398715544223-5056a550b8-0001-012vdjserver.org20140001398715544223-5056a550b8-0001-01200
.................................
149614407882025-01-08 11:36:56READ_WRITEsamwol6589143665654501871-242ac118-0001-012vdjserver.org20256589143665654501871-242ac118-0001-012037
149714418492025-01-08 11:46:30READ_WRITEsamwol2024481073312951825-242ac118-0001-012vdjserver.org20252024481073312951825-242ac118-0001-0120101
149814429082025-01-08 11:53:00READ_WRITEsamwol7301395953249218065-242ac118-0001-012vdjserver.org20257301395953249218065-242ac118-0001-0120101
149914438922025-01-08 11:58:21READ_WRITEsamwol1710750072586572271-242ac118-0001-012vdjserver.org20251710750072586572271-242ac118-0001-012091
150014445972025-01-13 15:13:55READ_WRITEschristley5456400192359305711-242ac118-0001-012vdjserver.org20255456400192359305711-242ac118-0001-012025
\n", + "

1501 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " id last_updated permission username \\\n", + "0 68 2014-03-27 16:12:46 READ_WRITE wscarbor \n", + "1 72 2014-04-03 10:24:15 READ_WRITE vdj \n", + "2 130 2014-04-28 13:14:19 READ_WRITE wscarbor \n", + "3 142 2014-04-28 14:37:17 READ_WRITE wscarbor \n", + "4 153 2014-04-28 15:05:44 READ_WRITE wscarbor \n", + "... ... ... ... ... \n", + "1496 1440788 2025-01-08 11:36:56 READ_WRITE samwol \n", + "1497 1441849 2025-01-08 11:46:30 READ_WRITE samwol \n", + "1498 1442908 2025-01-08 11:53:00 READ_WRITE samwol \n", + "1499 1443892 2025-01-08 11:58:21 READ_WRITE samwol \n", + "1500 1444597 2025-01-13 15:13:55 READ_WRITE schristley \n", + "\n", + " uuid tenant_id year \\\n", + "0 0001395346788177-5056a550b8-0001-012 vdjserver.org 2014 \n", + "1 0001396538655269-5056a550b8-0001-012 vdjserver.org 2014 \n", + "2 0001398708859776-5056a550b8-0001-012 vdjserver.org 2014 \n", + "3 0001398713837326-5056a550b8-0001-012 vdjserver.org 2014 \n", + "4 0001398715544223-5056a550b8-0001-012 vdjserver.org 2014 \n", + "... ... ... ... \n", + "1496 6589143665654501871-242ac118-0001-012 vdjserver.org 2025 \n", + "1497 2024481073312951825-242ac118-0001-012 vdjserver.org 2025 \n", + "1498 7301395953249218065-242ac118-0001-012 vdjserver.org 2025 \n", + "1499 1710750072586572271-242ac118-0001-012 vdjserver.org 2025 \n", + "1500 5456400192359305711-242ac118-0001-012 vdjserver.org 2025 \n", + "\n", + " projectUuid subject_count file_count \n", + "0 0001395346788177-5056a550b8-0001-012 0 0 \n", + "1 0001396538655269-5056a550b8-0001-012 0 0 \n", + "2 0001398708859776-5056a550b8-0001-012 0 0 \n", + "3 0001398713837326-5056a550b8-0001-012 0 0 \n", + "4 0001398715544223-5056a550b8-0001-012 0 0 \n", + "... ... ... ... \n", + "1496 6589143665654501871-242ac118-0001-012 0 37 \n", + "1497 2024481073312951825-242ac118-0001-012 0 101 \n", + "1498 7301395953249218065-242ac118-0001-012 0 101 \n", + "1499 1710750072586572271-242ac118-0001-012 0 91 \n", + "1500 5456400192359305711-242ac118-0001-012 0 25 \n", + "\n", + "[1501 rows x 10 columns]" + ] + }, + "execution_count": 240, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df = pd.merge(filtered_metadata_perms_df, df_count, left_on='uuid', right_on='projectUuid', how='inner')\n", + "merged_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 244, + "id": "41a33317-0037-4e57-a0f6-51dd7b30fcec", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unique Numner of Users with Subjects:89\n", + "Unique Numner of Users with Files:528\n" + ] + } + ], + "source": [ + "n_subjects = merged_df[merged_df.subject_count > 0].username.nunique()\n", + "print(f\"Unique Numner of Users with Subjects:{n_subjects}\")\n", + "\n", + "n_files = merged_df[merged_df.file_count > 0].username.nunique()\n", + "print(f\"Unique Numner of Users with Files:{n_files}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "id": "ed40415c-48b6-4ba5-900a-478fece2cb6d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
app_idstatusproject_folderprojectUUIDJobUuidremote_outcomeproject_creatorlast_updated
0repcalc-ls6-2.0u8FINISHEDprojects5456400192359305711-242ac118-0001-012c7cd08ad-a560-4574-a363-b9cc4c5e051d-007FINISHEDschristley2025-01-25 15:43:51.678
1igblast-ls6-1.20u6FINISHEDprojects5456400192359305711-242ac118-0001-0129188bf80-e868-4e05-a6b4-308c044108d7-007FINISHEDschristley2025-01-24 04:20:37.891
2igblast-ls6-1.20u6FAILEDprojects5199144433477554666-242ac116-0001-012773a5cb7-b369-4517-a221-83d57e3899e5-007FAILED_SKIP_ARCHIVEscott_public2025-01-22 15:04:46.891
3vdj_pipe-ls6-0.1.7u2FINISHEDprojects5456400192359305711-242ac118-0001-012c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007FINISHEDschristley2025-01-14 22:31:02.980
4vdj_pipe-ls6-0.1.7u2FINISHEDprojects6589143665654501871-242ac118-0001-012ad02cb34-250e-48cb-a06e-973e431b62ee-007FINISHEDschristley2025-01-09 04:21:12.476
...........................
963repcalc-stampede2-1.0u7FINISHEDprojects3087851897075012076-242ac118-0001-012fdcee8a4-b88b-4f44-b347-21c6c31f8401-007FINISHEDdiary2022-04-09 07:02:15.731
964igblast-stampede2-1.14u6FINISHEDprojects3087851897075012076-242ac118-0001-0124419f0fc-67dc-4f21-a66a-ff178393a4de-007FINISHEDdiary2022-04-07 16:42:24.925
967presto-stampede2-0.5u2FINISHEDprojects3087851897075012076-242ac118-0001-0123eb742ae-6caa-43c1-b65b-3fa35ad7754b-007FINISHEDdiary2022-04-06 10:42:46.132
985vdj_pipe-stampede2-0.1.7u4FINISHEDprojects8557415777028206100-242ac118-0001-0122d35173f-30e5-4908-bf33-5784bcc42dcd-007FINISHEDareceveur2022-04-01 09:37:15.693
986vdj_pipe-stampede2-0.1.7u4FINISHEDprojects8557415777028206100-242ac118-0001-0122c48d618-bf09-483d-aa43-44872f0b416b-007FINISHEDareceveur2022-03-31 22:01:50.356
\n", + "

812 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " app_id status project_folder \\\n", + "0 repcalc-ls6-2.0u8 FINISHED projects \n", + "1 igblast-ls6-1.20u6 FINISHED projects \n", + "2 igblast-ls6-1.20u6 FAILED projects \n", + "3 vdj_pipe-ls6-0.1.7u2 FINISHED projects \n", + "4 vdj_pipe-ls6-0.1.7u2 FINISHED projects \n", + ".. ... ... ... \n", + "963 repcalc-stampede2-1.0u7 FINISHED projects \n", + "964 igblast-stampede2-1.14u6 FINISHED projects \n", + "967 presto-stampede2-0.5u2 FINISHED projects \n", + "985 vdj_pipe-stampede2-0.1.7u4 FINISHED projects \n", + "986 vdj_pipe-stampede2-0.1.7u4 FINISHED projects \n", + "\n", + " projectUUID \\\n", + "0 5456400192359305711-242ac118-0001-012 \n", + "1 5456400192359305711-242ac118-0001-012 \n", + "2 5199144433477554666-242ac116-0001-012 \n", + "3 5456400192359305711-242ac118-0001-012 \n", + "4 6589143665654501871-242ac118-0001-012 \n", + ".. ... \n", + "963 3087851897075012076-242ac118-0001-012 \n", + "964 3087851897075012076-242ac118-0001-012 \n", + "967 3087851897075012076-242ac118-0001-012 \n", + "985 8557415777028206100-242ac118-0001-012 \n", + "986 8557415777028206100-242ac118-0001-012 \n", + "\n", + " JobUuid remote_outcome \\\n", + "0 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 FINISHED \n", + "1 9188bf80-e868-4e05-a6b4-308c044108d7-007 FINISHED \n", + "2 773a5cb7-b369-4517-a221-83d57e3899e5-007 FAILED_SKIP_ARCHIVE \n", + "3 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 FINISHED \n", + "4 ad02cb34-250e-48cb-a06e-973e431b62ee-007 FINISHED \n", + ".. ... ... \n", + "963 fdcee8a4-b88b-4f44-b347-21c6c31f8401-007 FINISHED \n", + "964 4419f0fc-67dc-4f21-a66a-ff178393a4de-007 FINISHED \n", + "967 3eb742ae-6caa-43c1-b65b-3fa35ad7754b-007 FINISHED \n", + "985 2d35173f-30e5-4908-bf33-5784bcc42dcd-007 FINISHED \n", + "986 2c48d618-bf09-483d-aa43-44872f0b416b-007 FINISHED \n", + "\n", + " project_creator last_updated \n", + "0 schristley 2025-01-25 15:43:51.678 \n", + "1 schristley 2025-01-24 04:20:37.891 \n", + "2 scott_public 2025-01-22 15:04:46.891 \n", + "3 schristley 2025-01-14 22:31:02.980 \n", + "4 schristley 2025-01-09 04:21:12.476 \n", + ".. ... ... \n", + "963 diary 2022-04-09 07:02:15.731 \n", + "964 diary 2022-04-07 16:42:24.925 \n", + "967 diary 2022-04-06 10:42:46.132 \n", + "985 areceveur 2022-04-01 09:37:15.693 \n", + "986 areceveur 2022-03-31 22:01:50.356 \n", + "\n", + "[812 rows x 8 columns]" + ] + }, + "execution_count": 267, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "id": "a88aa2d2-cda8-433a-9f3e-d8afece08175", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unique Number of Projects with Jobs:160\n", + "Unique Number of users with Jobs:106\n" + ] + } + ], + "source": [ + "project_with_files = merged_df[merged_df.file_count > 0]\n", + "temp = df_allJobs_filtered[df_allJobs_filtered.projectUUID.isin(project_with_files.projectUuid)]\n", + "temp\n", + "print(f\"Unique Number of Projects with Jobs:{temp.projectUUID.nunique()}\")\n", + "k = merged_df[merged_df.projectUuid.isin(temp.projectUUID)].username.nunique()\n", + "\n", + "print(f\"Unique Number of users with Jobs:{k}\")\n", + "project_uuid_with_jobs = temp.projectUUID.unique()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 270, + "id": "456ae649-8367-4176-baa7-b89af477ce9c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['lliu', 'schristley', 'itoby', 'lgcowell', 'jostmey',\n", + " 'scott_public', 'ockenhoc', 'jjy76', 'eyecatch', 'jjy_ff',\n", + " 'yxy0824', 'areceveur', 'diary', 'liamckealy', 'fvale',\n", + " 'tapgeorge1', 'mmedapj', 'fjessica_paran', 'aislinn.jennings',\n", + " 'alaaselim99', 'felipelopesassis', 'paran.ux', 'lmorales',\n", + " 'dexwel25', 'jtearnest', 'zhouhao961004', 'parkak11', 'marius101',\n", + " 'raghwendra088', 'bi213', 'keiophadbc', 'keiophadbcdt', 'laostzu',\n", + " 'mcavallaro2', 'sikhlas', 'sysbio_ibm', 'pnamxencor8349',\n", + " 'mabrockman', 'npacalin', 'vdjexplore007', 'marilia', 'htejedam',\n", + " 'phaedras', 'kshiming', 'yuka', 'beryl_cummings', 'havanap1',\n", + " 'ashvindprabahran', 'th', 'counseler', 'shaojunliu',\n", + " 'victorialopez', 'okd03', 'clancey', 'aboli', 'benbaran',\n", + " 'umm_ikc', 'khani', 'im2211', 'nmurray', 'bdandres',\n", + " 'mvcarlosdominguez', 'silviapc', 'smherrin', 'araapp',\n", + " 'raffaele.iorio', 'menezko', 'jennifer_gonzalez77', 'antibody2976',\n", + " 'ustilago', 'lili', 'ugg1', 'andreher', 'huangtao', 'calebsmall72',\n", + " 'jianche', 'krloritz', 'tomcaniels', 'mark.kelly', '2u35',\n", + " 'cs2023', 'kunhuilu', 'stewarte', 'shbeck', 'altvatet',\n", + " 'mapt19222', 'pswanenberg13', 'pat21778', 'bgarcia17', 'landmat',\n", + " 'loubna_boutkhil', 'emmatouizer', 'tanno-hd', 'san4011',\n", + " 'nakhan05', 'harrywhite', 'laraschlegel2009', 's234499',\n", + " 'terresroxane', 'tcukkry', 'mcprado', 'minici.claudia',\n", + " 'nianbinli', 'crushseven', 'sharsh23', 'samwol'], dtype=object)" + ] + }, + "execution_count": 270, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#project_uuids = merged_df[merged_df.file_count > 0].projectUuid.unique()\n", + "# all_users = {}\n", + "# for project_uuid in project_uuids:\n", + " \n", + "merged_df[merged_df.projectUuid.isin(temp.projectUUID)].username.unique() \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 279, + "id": "dd14bd18-083f-416f-88bd-56b73984819e", + "metadata": {}, + "outputs": [], + "source": [ + "nakhans_job = merged_df[merged_df.username =='nakhan05'].uuid.values" + ] + }, + { + "cell_type": "code", + "execution_count": 280, + "id": "2a9d911e-06cb-4cc6-b38d-3dfb5468f4e0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
app_idstatusproject_folderprojectUUIDJobUuidremote_outcomeproject_creatorlast_updated
5repcalc-ls6-2.0u8FINISHEDprojects5261827584879226385-242ac118-0001-01256208591-7b9d-4ed7-8ef8-8c9519b515e2-007FINISHEDschristley2024-12-18 04:16:58.081
6igblast-ls6-1.20u6FINISHEDprojects5261827584879226385-242ac118-0001-012abdc1398-2d71-4d96-940e-cc5a66a15de8-007FINISHEDschristley2024-12-17 13:30:24.879
7igblast-ls6-1.20u6FINISHEDprojects5261827584879226385-242ac118-0001-0123fdac741-cb54-4457-8748-925a2f8226d5-007FINISHEDschristley2024-12-16 10:33:08.101
8igblast-ls6-1.20u6FINISHEDprojects5261827584879226385-242ac118-0001-0122912131a-c8ba-4290-9190-8ab330b2e1ac-007FINISHEDschristley2024-12-14 05:53:45.705
9repcalc-ls6-2.0u8FINISHEDprojects5261827584879226385-242ac118-0001-012cb6ca396-d185-4a47-b0da-0e6fa326f410-007FINISHEDnakhan052024-12-13 01:44:54.856
19igblast-ls6-1.20u6FINISHEDprojects5261827584879226385-242ac118-0001-012cfbbaa44-ea65-4c59-9e50-ca365f0e8f04-007FINISHEDschristley2024-11-12 08:28:52.057
38igblast-ls6-1.20u6FINISHEDprojects5333187196986060305-242ac118-0001-012a1715da0-ff82-4fbe-b1ae-b2c763304c4f-007FINISHEDschristley2024-09-10 08:19:33.314
41repcalc-ls6-2.0u8FINISHEDprojects5333187196986060305-242ac118-0001-012b5d671cd-f9bb-4cd7-9a68-07c22cd2c402-007FINISHEDschristley2024-08-31 20:05:27.119
42igblast-ls6-1.20u6FINISHEDprojects5333187196986060305-242ac118-0001-012ea60242a-48f5-4e50-bf7c-0f5798c1d091-007FINISHEDschristley2024-08-30 17:44:41.069
45igblast-ls6-1.20u6FINISHEDprojects5333187196986060305-242ac118-0001-0127a32567f-725c-4a20-9ef4-e4e8f6a88c6f-007FINISHEDnakhan052024-08-02 23:30:17.374
46repcalc-ls6-2.0u8FINISHEDprojects5333187196986060305-242ac118-0001-012fd19fec3-770b-4f8d-9c7a-947ad309e50b-007FINISHEDnakhan052024-08-02 05:48:35.868
47igblast-ls6-1.20u6FINISHEDprojects5333187196986060305-242ac118-0001-012601420c9-1c21-4cc8-8847-968b2f5a8171-007FINISHEDnakhan052024-07-30 14:56:12.540
49igblast-ls6-1.20u6FINISHEDprojects5333187196986060305-242ac118-0001-012688c2784-564d-4a26-8126-08368063ff34-007FINISHEDnakhan052024-07-15 19:32:50.587
50igblast-ls6-1.20u6FINISHEDprojects5333187196986060305-242ac118-0001-0128af92a89-b38b-4ef9-8aab-526cf3a3c444-007FINISHEDschristley2024-07-09 18:55:53.449
\n", + "
" + ], + "text/plain": [ + " app_id status project_folder \\\n", + "5 repcalc-ls6-2.0u8 FINISHED projects \n", + "6 igblast-ls6-1.20u6 FINISHED projects \n", + "7 igblast-ls6-1.20u6 FINISHED projects \n", + "8 igblast-ls6-1.20u6 FINISHED projects \n", + "9 repcalc-ls6-2.0u8 FINISHED projects \n", + "19 igblast-ls6-1.20u6 FINISHED projects \n", + "38 igblast-ls6-1.20u6 FINISHED projects \n", + "41 repcalc-ls6-2.0u8 FINISHED projects \n", + "42 igblast-ls6-1.20u6 FINISHED projects \n", + "45 igblast-ls6-1.20u6 FINISHED projects \n", + "46 repcalc-ls6-2.0u8 FINISHED projects \n", + "47 igblast-ls6-1.20u6 FINISHED projects \n", + "49 igblast-ls6-1.20u6 FINISHED projects \n", + "50 igblast-ls6-1.20u6 FINISHED projects \n", + "\n", + " projectUUID \\\n", + "5 5261827584879226385-242ac118-0001-012 \n", + "6 5261827584879226385-242ac118-0001-012 \n", + "7 5261827584879226385-242ac118-0001-012 \n", + "8 5261827584879226385-242ac118-0001-012 \n", + "9 5261827584879226385-242ac118-0001-012 \n", + "19 5261827584879226385-242ac118-0001-012 \n", + "38 5333187196986060305-242ac118-0001-012 \n", + "41 5333187196986060305-242ac118-0001-012 \n", + "42 5333187196986060305-242ac118-0001-012 \n", + "45 5333187196986060305-242ac118-0001-012 \n", + "46 5333187196986060305-242ac118-0001-012 \n", + "47 5333187196986060305-242ac118-0001-012 \n", + "49 5333187196986060305-242ac118-0001-012 \n", + "50 5333187196986060305-242ac118-0001-012 \n", + "\n", + " JobUuid remote_outcome project_creator \\\n", + "5 56208591-7b9d-4ed7-8ef8-8c9519b515e2-007 FINISHED schristley \n", + "6 abdc1398-2d71-4d96-940e-cc5a66a15de8-007 FINISHED schristley \n", + "7 3fdac741-cb54-4457-8748-925a2f8226d5-007 FINISHED schristley \n", + "8 2912131a-c8ba-4290-9190-8ab330b2e1ac-007 FINISHED schristley \n", + "9 cb6ca396-d185-4a47-b0da-0e6fa326f410-007 FINISHED nakhan05 \n", + "19 cfbbaa44-ea65-4c59-9e50-ca365f0e8f04-007 FINISHED schristley \n", + "38 a1715da0-ff82-4fbe-b1ae-b2c763304c4f-007 FINISHED schristley \n", + "41 b5d671cd-f9bb-4cd7-9a68-07c22cd2c402-007 FINISHED schristley \n", + "42 ea60242a-48f5-4e50-bf7c-0f5798c1d091-007 FINISHED schristley \n", + "45 7a32567f-725c-4a20-9ef4-e4e8f6a88c6f-007 FINISHED nakhan05 \n", + "46 fd19fec3-770b-4f8d-9c7a-947ad309e50b-007 FINISHED nakhan05 \n", + "47 601420c9-1c21-4cc8-8847-968b2f5a8171-007 FINISHED nakhan05 \n", + "49 688c2784-564d-4a26-8126-08368063ff34-007 FINISHED nakhan05 \n", + "50 8af92a89-b38b-4ef9-8aab-526cf3a3c444-007 FINISHED schristley \n", + "\n", + " last_updated \n", + "5 2024-12-18 04:16:58.081 \n", + "6 2024-12-17 13:30:24.879 \n", + "7 2024-12-16 10:33:08.101 \n", + "8 2024-12-14 05:53:45.705 \n", + "9 2024-12-13 01:44:54.856 \n", + "19 2024-11-12 08:28:52.057 \n", + "38 2024-09-10 08:19:33.314 \n", + "41 2024-08-31 20:05:27.119 \n", + "42 2024-08-30 17:44:41.069 \n", + "45 2024-08-02 23:30:17.374 \n", + "46 2024-08-02 05:48:35.868 \n", + "47 2024-07-30 14:56:12.540 \n", + "49 2024-07-15 19:32:50.587 \n", + "50 2024-07-09 18:55:53.449 " + ] + }, + "execution_count": 280, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_allJobs_filtered[df_allJobs_filtered.projectUUID.isin(nakhans_job)]" + ] + }, + { + "cell_type": "code", + "execution_count": 285, + "id": "166cc6a2-2a68-401e-8f09-38ed49f5a354", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerfirstNamelastNameemailcitystatecountrycreatedlastUpdated
00001389976523746-5056a550b8-0001-012wscarborWalterScarboroughwscarbor@tacc.utexas.eduAustinTXUSA2014-01-17T10:35:23.649-06:002016-04-27T15:07:26.261-05:00
10001391029872321-5056a550b8-0001-012test19Test19test19@test.comNone2014-01-29T15:11:12.321-06:002014-01-29T15:12:33.955-06:00
20001391717057917-5056a550b8-0001-012test31test31@test.comNoneNoneNone2014-02-06T14:04:17.917-06:002014-02-06T14:04:17.917-06:00
30001391719926131-5056a550b8-0001-012test33NoneNonetest33@test.comNoneNoneNone2014-02-06T14:52:06.131-06:002014-02-06T14:52:06.131-06:00
40001391720404124-5056a550b8-0001-012test34NedFlanderstest34@test.comSpringfieldILNone2014-02-06T15:00:04.123-06:002014-02-06T15:00:46.376-06:00
.................................
18176242932598575984145-242ac118-0001-012rgarciaRodrigoGarcía Valienter.garciavaliente@amsterdamumc.nlAmsterdamNetherlands2025-01-02T11:11:52.894-06:002025-01-02T11:11:52.894-06:00
18182755888095932968465-242ac118-0001-012rgarciavRodrigoGarcía Valienter.garciavaliente@amsterdamumc.nl2025-01-02T11:41:52.070-06:002025-01-02T11:41:52.070-06:00
18195481029658171207185-242ac118-0001-012erichardsonEveRichardsonerichardson@lji.orgSan DiegoCaliforniaUnited States2025-01-07T18:01:25.657-06:002025-01-07T18:01:25.657-06:00
18204458895817601248785-242ac118-0001-012samwolsamuel.wollenburg@utsouthwestern.edu2025-01-07T20:24:59.390-06:002025-01-07T20:24:59.390-06:00
18219076859566261923345-242ac118-0001-012chrisjames1992Chinweike ChristopherUdoyechinweikechristopher.udoye@uksh.deLübeckSchleswig-HolsteinGermany2025-01-17T07:54:02.133-06:002025-01-17T07:54:02.133-06:00
\n", + "

1822 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "0 0001389976523746-5056a550b8-0001-012 wscarbor \n", + "1 0001391029872321-5056a550b8-0001-012 test19 \n", + "2 0001391717057917-5056a550b8-0001-012 test31 \n", + "3 0001391719926131-5056a550b8-0001-012 test33 \n", + "4 0001391720404124-5056a550b8-0001-012 test34 \n", + "... ... ... \n", + "1817 6242932598575984145-242ac118-0001-012 rgarcia \n", + "1818 2755888095932968465-242ac118-0001-012 rgarciav \n", + "1819 5481029658171207185-242ac118-0001-012 erichardson \n", + "1820 4458895817601248785-242ac118-0001-012 samwol \n", + "1821 9076859566261923345-242ac118-0001-012 chrisjames1992 \n", + "\n", + " firstName lastName \\\n", + "0 Walter Scarborough \n", + "1 Test 19 \n", + "2 \n", + "3 None None \n", + "4 Ned Flanders \n", + "... ... ... \n", + "1817 Rodrigo García Valiente \n", + "1818 Rodrigo García Valiente \n", + "1819 Eve Richardson \n", + "1820 \n", + "1821 Chinweike Christopher Udoye \n", + "\n", + " email city state \\\n", + "0 wscarbor@tacc.utexas.edu Austin TX \n", + "1 test19@test.com \n", + "2 test31@test.com None None \n", + "3 test33@test.com None None \n", + "4 test34@test.com Springfield IL \n", + "... ... ... ... \n", + "1817 r.garciavaliente@amsterdamumc.nl Amsterdam \n", + "1818 r.garciavaliente@amsterdamumc.nl \n", + "1819 erichardson@lji.org San Diego California \n", + "1820 samuel.wollenburg@utsouthwestern.edu \n", + "1821 chinweikechristopher.udoye@uksh.de Lübeck Schleswig-Holstein \n", + "\n", + " country created \\\n", + "0 USA 2014-01-17T10:35:23.649-06:00 \n", + "1 None 2014-01-29T15:11:12.321-06:00 \n", + "2 None 2014-02-06T14:04:17.917-06:00 \n", + "3 None 2014-02-06T14:52:06.131-06:00 \n", + "4 None 2014-02-06T15:00:04.123-06:00 \n", + "... ... ... \n", + "1817 Netherlands 2025-01-02T11:11:52.894-06:00 \n", + "1818 2025-01-02T11:41:52.070-06:00 \n", + "1819 United States 2025-01-07T18:01:25.657-06:00 \n", + "1820 2025-01-07T20:24:59.390-06:00 \n", + "1821 Germany 2025-01-17T07:54:02.133-06:00 \n", + "\n", + " lastUpdated \n", + "0 2016-04-27T15:07:26.261-05:00 \n", + "1 2014-01-29T15:12:33.955-06:00 \n", + "2 2014-02-06T14:04:17.917-06:00 \n", + "3 2014-02-06T14:52:06.131-06:00 \n", + "4 2014-02-06T15:00:46.376-06:00 \n", + "... ... \n", + "1817 2025-01-02T11:11:52.894-06:00 \n", + "1818 2025-01-02T11:41:52.070-06:00 \n", + "1819 2025-01-07T18:01:25.657-06:00 \n", + "1820 2025-01-07T20:24:59.390-06:00 \n", + "1821 2025-01-17T07:54:02.133-06:00 \n", + "\n", + "[1822 rows x 10 columns]" + ] + }, + "execution_count": 285, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_profile" + ] + }, + { + "cell_type": "code", + "execution_count": 291, + "id": "037e902f-fad7-45b5-b60b-89d8b9c2597e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
owneremail
0wscarborwscarbor@tacc.utexas.edu
12jfonnerjfonner@tacc.utexas.edu
20wscarbor2test@test.com
23mlevinmlevin@svarnetics.org
24esalinaedward.salinas@utsouthwestern.edu
.........
1806nianbinlilinianbin97@tmu.edu.cn
1811jkoedijkj.b.koedijk@prinsesmaximacentrum.nl
1814crushsevenZiyue.Yan@alivexbiotech.com
1816sharsh23harshit.s@thinkbio.ai
1820samwolsamuel.wollenburg@utsouthwestern.edu
\n", + "

571 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " owner email\n", + "0 wscarbor wscarbor@tacc.utexas.edu\n", + "12 jfonner jfonner@tacc.utexas.edu\n", + "20 wscarbor2 test@test.com\n", + "23 mlevin mlevin@svarnetics.org\n", + "24 esalina edward.salinas@utsouthwestern.edu\n", + "... ... ...\n", + "1806 nianbinli linianbin97@tmu.edu.cn\n", + "1811 jkoedijk j.b.koedijk@prinsesmaximacentrum.nl\n", + "1814 crushseven Ziyue.Yan@alivexbiotech.com\n", + "1816 sharsh23 harshit.s@thinkbio.ai\n", + "1820 samwol samuel.wollenburg@utsouthwestern.edu\n", + "\n", + "[571 rows x 2 columns]" + ] + }, + "execution_count": 291, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_profile[df_profile.owner.isin(merged_df.username)][['owner', 'email']]" + ] + }, + { + "cell_type": "code", + "execution_count": 292, + "id": "49a84300-7b89-48f6-add8-2e28479ef6b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Emailupdated_email
012ysliu2 at stu.edu.cn12ysliu2@stu.edu.cn
118982180702 at msn.cn18982180702@msn.cn
22008110020 at alumni.sjtu.edu.cn2008110020@alumni.sjtu.edu.cn
32383920158 at qq.com2383920158@qq.com
42deepayan at gmail.com2deepayan@gmail.com
.........
571zhanxw at gmail.comzhanxw@gmail.com
572zhe.sang at gmail.comzhe.sang@gmail.com
573zicheng at utexas.eduzicheng@utexas.edu
574zluo819 at gmail.comzluo819@gmail.com
575zyf950619 at gmail.comzyf950619@gmail.com
\n", + "

576 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Email updated_email\n", + "0 12ysliu2 at stu.edu.cn 12ysliu2@stu.edu.cn\n", + "1 18982180702 at msn.cn 18982180702@msn.cn\n", + "2 2008110020 at alumni.sjtu.edu.cn 2008110020@alumni.sjtu.edu.cn\n", + "3 2383920158 at qq.com 2383920158@qq.com\n", + "4 2deepayan at gmail.com 2deepayan@gmail.com\n", + ".. ... ...\n", + "571 zhanxw at gmail.com zhanxw@gmail.com\n", + "572 zhe.sang at gmail.com zhe.sang@gmail.com\n", + "573 zicheng at utexas.edu zicheng@utexas.edu\n", + "574 zluo819 at gmail.com zluo819@gmail.com\n", + "575 zyf950619 at gmail.com zyf950619@gmail.com\n", + "\n", + "[576 rows x 2 columns]" + ] + }, + "execution_count": 292, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mailing_list" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "MSM_TensorFlow_2", + "language": "python", + "name": "msm_tensorflow_2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/conversion/tapis_v2_to_v3/data_migration.ipynb b/conversion/tapis_v2_to_v3/data_migration.ipynb new file mode 100644 index 0000000..629da72 --- /dev/null +++ b/conversion/tapis_v2_to_v3/data_migration.ipynb @@ -0,0 +1,312 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "id": "ffb06a6d-cd2e-4797-a0cf-7f2a80100868", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import json\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a9064573-e9d4-44ce-9bb7-2dda54e103d2", + "metadata": {}, + "outputs": [], + "source": [ + "job_events_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverJobEvents.json')\n", + "job_permissions_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverJobPermissions.json')\n", + "jobs_all_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverJobs_all.json')\n", + "jobs_all_df['parameters.Creator'] = jobs_all_df['parameters'].apply(lambda x: json.loads(x).get('Creator', np.nan) if x else np.nan)\n", + "metadata_perms_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverMetadataPermissions.json')\n", + "\n", + "with open('/mnt/md0/Projects/vdjserver/vdjserverJsonArrayFeb042025.json', 'r') as f:\n", + " jsonarray = json.load(f)" + ] + }, + { + "cell_type": "markdown", + "id": "01e4616d-a406-4dd8-898f-1fb063545387", + "metadata": {}, + "source": [ + "## Look at Public Project Data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "142acf77-a163-4f86-9e3e-bf162614537c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidstudy_idassociationIdsstudy_titlelastUpdated
04505707319090933270-242ac113-0001-0124505707319090933270-242ac113-0001-012[]Outcome and Immune Correlates of a Phase II Tr...2022-12-18T01:14:48.159-06:00
12034535426280329706-242ac113-0001-012PRJNA300878[]Individual heritable differences result in uni...2022-12-18T01:14:48.306-06:00
25350423756993719830-242ac113-0001-0121371444213709729305-242ac11c-0001-012[]T cell receptor repertoires after adoptive tra...2022-12-18T01:14:48.028-06:00
31570295022599213546-242ac113-0001-0123276777473314001386-242ac116-0001-012[]Biophysicochemical Motifs in T cell Receptor S...2022-12-18T01:14:47.900-06:00
454655627105407466-242ac113-0001-012PRJNA248475[]B cells populating the multiple sclerosis brai...2022-12-18T01:14:48.453-06:00
\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "0 4505707319090933270-242ac113-0001-012 \n", + "1 2034535426280329706-242ac113-0001-012 \n", + "2 5350423756993719830-242ac113-0001-012 \n", + "3 1570295022599213546-242ac113-0001-012 \n", + "4 54655627105407466-242ac113-0001-012 \n", + "\n", + " study_id associationIds \\\n", + "0 4505707319090933270-242ac113-0001-012 [] \n", + "1 PRJNA300878 [] \n", + "2 1371444213709729305-242ac11c-0001-012 [] \n", + "3 3276777473314001386-242ac116-0001-012 [] \n", + "4 PRJNA248475 [] \n", + "\n", + " study_title \\\n", + "0 Outcome and Immune Correlates of a Phase II Tr... \n", + "1 Individual heritable differences result in uni... \n", + "2 T cell receptor repertoires after adoptive tra... \n", + "3 Biophysicochemical Motifs in T cell Receptor S... \n", + "4 B cells populating the multiple sclerosis brai... \n", + "\n", + " lastUpdated \n", + "0 2022-12-18T01:14:48.159-06:00 \n", + "1 2022-12-18T01:14:48.306-06:00 \n", + "2 2022-12-18T01:14:48.028-06:00 \n", + "3 2022-12-18T01:14:47.900-06:00 \n", + "4 2022-12-18T01:14:48.453-06:00 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "public_project_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'public_project':\n", + " # json_print(item)\n", + " # break\n", + " uuid = item.get('uuid', None)\n", + " study_id = item.get('value', {}).get('study_id', None)\n", + " study_title = item.get('value', {}).get('study_title', None)\n", + " associationIds = item.get('associationIds', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " public_project_list.append({\n", + " 'uuid': uuid,\n", + " 'study_id': study_id,\n", + " 'associationIds': associationIds,\n", + " 'study_title': study_title,\n", + " 'lastUpdated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_public_project = pd.DataFrame(public_project_list)\n", + "df_public_project.head()" + ] + }, + { + "cell_type": "markdown", + "id": "b42cc7fb-4a3b-4826-b5bc-54d1607fbc17", + "metadata": {}, + "source": [ + "## Convert public_project data and output into JSONL files" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "56a4a181-72fe-4a46-86fc-a6ef2ae8a275", + "metadata": {}, + "outputs": [], + "source": [ + "exclusion_names = [ 'projectLoad', 'rearrangementLoad' ]\n", + "name_map = { \"projectFile\": \"project_file\" }\n", + "permission = [ { \"username\": \"vdjserver.curation@gmail.com\", \"permission\": { \"read\":True, \"write\":True } } ]\n", + "\n", + "col_list = ['uuid', 'owner', 'associationIds', 'created', 'lastUpdated', 'name', 'value']\n", + "obj_list = {}\n", + "data_dir = 'Metadata_public_project/' \n", + "for project_uuid in df_public_project.uuid:\n", + " # Open a file in write mode\n", + " with open(f'{data_dir}{project_uuid}_metadata.jsonl', 'w') as file:\n", + " for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type in exclusion_names:\n", + " continue\n", + " if (project_uuid in item.get('uuid', None)) or (project_uuid in item.get('associationIds', None)):\n", + " if name_map.get(item_type) is not None:\n", + " item['name'] = name_map.get(item_type)\n", + " item_type = item['name']\n", + "\n", + " # migrate the object\n", + " if item_type == 'public_project':\n", + " # move old keywords\n", + " if item['value'].get('vdjserver_keywords') is not None:\n", + " if item['value'].get('vdjserver') is None:\n", + " item['value']['vdjserver'] = {}\n", + " item['value']['vdjserver']['keywords'] = item['value']['vdjserver_keywords']\n", + " del item['value']['vdjserver_keywords']\n", + " # add permissions\n", + " item['permission'] = permission\n", + " # old fields\n", + " if item['value'].get('showArchivedJobs') is not None:\n", + " del item['value']['showArchivedJobs']\n", + " if item['value'].get('owner') is not None:\n", + " del item['value']['owner']\n", + "\n", + " if item_type == 'project_file':\n", + " # eliminate old file UUID\n", + " item['associationIds'] = [ project_uuid ]\n", + "\n", + " # json_print(item) \n", + " # json_print(item)\n", + " obj = {}\n", + " for col_name in col_list:\n", + " obj[col_name] = item.get(col_name, None)\n", + " if item.get('permission') is not None:\n", + " obj['permission'] = item['permission']\n", + " # print(obj)\n", + " json.dump(obj, file)\n", + " file.write('\\n') # Add a newline after each JSON object" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "32a5a0ab-70cd-4b64-80e5-5223c1be9220", + "metadata": {}, + "outputs": [], + "source": [ + "## Write One public_project data into Json file" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f8348cd0-72d3-4efd-aab9-1eb6fa85dc0b", + "metadata": {}, + "outputs": [], + "source": [ + "col_list = ['uuid', 'owner', 'associationIds', 'created', 'lastUpdated', 'name', 'value']\n", + "obj_list = {}\n", + "project_uuid = '2034535426280329706-242ac113-0001-012'\n", + "data_dir = 'Metadata_public_project/' # Create/Change the directory\n", + "\n", + "# Open a file in write mode\n", + "with open(f'{data_dir}{project_uuid}_metadata.jsonl', 'w') as file:\n", + " for item in jsonarray:\n", + " item_type = item['name']\n", + " if (project_uuid in item.get('uuid', None)) or (project_uuid in item.get('associationIds', None)):\n", + " obj = {}\n", + " for col_name in col_list:\n", + " obj[col_name] = item.get(col_name, None)\n", + " # print(obj)\n", + " json.dump(obj, file)\n", + " file.write('\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ded5cab4-d03f-4bd0-9f4c-35f89aae6cf5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "MSM_TensorFlow_2", + "language": "python", + "name": "msm_tensorflow_2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/conversion/tapis_v2_to_v3/public_projects.ipynb b/conversion/tapis_v2_to_v3/public_projects.ipynb new file mode 100644 index 0000000..6a2a729 --- /dev/null +++ b/conversion/tapis_v2_to_v3/public_projects.ipynb @@ -0,0 +1,755 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 53, + "id": "ffb06a6d-cd2e-4797-a0cf-7f2a80100868", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import json\n" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "a9064573-e9d4-44ce-9bb7-2dda54e103d2", + "metadata": {}, + "outputs": [], + "source": [ + "job_events_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverJobEvents.json')\n", + "job_permissions_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverJobPermissions.json')\n", + "jobs_all_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverJobs_all.json')\n", + "jobs_all_df['parameters.Creator'] = jobs_all_df['parameters'].apply(lambda x: json.loads(x).get('Creator', np.nan) if x else np.nan)\n", + "metadata_perms_df = pd.read_json('/mnt/md0/Projects/vdjserver/vdjserverMetadataPermissions.json')\n", + "\n", + "jsonarray_projectJob = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverJsonArrayFeb042025.json')\n", + "\n", + "with open('/mnt/md0/Projects/vdjserver/vdjserverJsonArrayFeb042025.json', 'r') as f:\n", + " jsonarray = json.load(f)\n", + "\n", + "jsonarray_projectJob = [ obj for obj in jsonarray if obj['name'] == 'projectJob' ]\n", + "dict_projectJob = { obj['value']['projectUuid'] : obj for obj in jsonarray if obj['name'] == 'projectJob'}\n", + "\n", + "with open('/mnt/md0/Projects/vdjserver/vdjserverJobs_all.json', 'r') as f:\n", + " jsonarray_jobs = json.load(f)" + ] + }, + { + "cell_type": "markdown", + "id": "01e4616d-a406-4dd8-898f-1fb063545387", + "metadata": {}, + "source": [ + "## Look at Public Project Data" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "142acf77-a163-4f86-9e3e-bf162614537c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidstudy_idassociationIdsstudy_titlelastUpdated
04505707319090933270-242ac113-0001-0124505707319090933270-242ac113-0001-012[]Outcome and Immune Correlates of a Phase II Tr...2022-12-18T01:14:48.159-06:00
12034535426280329706-242ac113-0001-012PRJNA300878[]Individual heritable differences result in uni...2022-12-18T01:14:48.306-06:00
25350423756993719830-242ac113-0001-0121371444213709729305-242ac11c-0001-012[]T cell receptor repertoires after adoptive tra...2022-12-18T01:14:48.028-06:00
31570295022599213546-242ac113-0001-0123276777473314001386-242ac116-0001-012[]Biophysicochemical Motifs in T cell Receptor S...2022-12-18T01:14:47.900-06:00
454655627105407466-242ac113-0001-012PRJNA248475[]B cells populating the multiple sclerosis brai...2022-12-18T01:14:48.453-06:00
\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "0 4505707319090933270-242ac113-0001-012 \n", + "1 2034535426280329706-242ac113-0001-012 \n", + "2 5350423756993719830-242ac113-0001-012 \n", + "3 1570295022599213546-242ac113-0001-012 \n", + "4 54655627105407466-242ac113-0001-012 \n", + "\n", + " study_id associationIds \\\n", + "0 4505707319090933270-242ac113-0001-012 [] \n", + "1 PRJNA300878 [] \n", + "2 1371444213709729305-242ac11c-0001-012 [] \n", + "3 3276777473314001386-242ac116-0001-012 [] \n", + "4 PRJNA248475 [] \n", + "\n", + " study_title \\\n", + "0 Outcome and Immune Correlates of a Phase II Tr... \n", + "1 Individual heritable differences result in uni... \n", + "2 T cell receptor repertoires after adoptive tra... \n", + "3 Biophysicochemical Motifs in T cell Receptor S... \n", + "4 B cells populating the multiple sclerosis brai... \n", + "\n", + " lastUpdated \n", + "0 2022-12-18T01:14:48.159-06:00 \n", + "1 2022-12-18T01:14:48.306-06:00 \n", + "2 2022-12-18T01:14:48.028-06:00 \n", + "3 2022-12-18T01:14:47.900-06:00 \n", + "4 2022-12-18T01:14:48.453-06:00 " + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "public_project_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'public_project':\n", + " # json_print(item)\n", + " # break\n", + " uuid = item.get('uuid', None)\n", + " study_id = item.get('value', {}).get('study_id', None)\n", + " study_title = item.get('value', {}).get('study_title', None)\n", + " associationIds = item.get('associationIds', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " public_project_list.append({\n", + " 'uuid': uuid,\n", + " 'study_id': study_id,\n", + " 'associationIds': associationIds,\n", + " 'study_title': study_title,\n", + " 'lastUpdated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_public_project = pd.DataFrame(public_project_list)\n", + "df_public_project.head()" + ] + }, + { + "cell_type": "markdown", + "id": "e29bbbf4-98d5-4af3-9248-f9e1205bb092", + "metadata": {}, + "source": [ + "## V2 Job Data for Public Projects" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "8f4e09d2-ba9c-4dce-8478-d36413b19d0a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'name', 'tenant_id', 'tenant_queue', 'owner', 'roles',\n", + " 'system_id', 'app_id', 'app_uuid', 'status', 'last_message', 'accepted',\n", + " 'created', 'ended', 'last_updated', 'uuid', 'work_path', 'archive',\n", + " 'archive_on_app_error', 'archive_path', 'archive_system_id',\n", + " 'node_count', 'processor_count', 'memory_gb', 'max_hours', 'inputs',\n", + " 'parameters', 'remote_job_id', 'remote_sched_id', 'remote_queue',\n", + " 'remote_submitted', 'remote_started', 'remote_ended', 'remote_outcome',\n", + " 'remote_submit_retries', 'remote_status_checks', 'failed_status_checks',\n", + " 'last_status_check', 'blocked_count', 'visible', 'update_token',\n", + " 'parameters.Creator'],\n", + " dtype='object')\n", + "0 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007\n", + "1 9188bf80-e868-4e05-a6b4-308c044108d7-007\n", + "2 773a5cb7-b369-4517-a221-83d57e3899e5-007\n", + "3 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007\n", + "4 ad02cb34-250e-48cb-a06e-973e431b62ee-007\n", + " ... \n", + "15776 0001396301879424-5056a550b8-0001-007\n", + "15777 0001396298592090-5056a550b8-0001-007\n", + "15778 0001396298085562-5056a550b8-0001-007\n", + "15779 0001396297676287-5056a550b8-0001-007\n", + "15780 0001396295290656-5056a550b8-0001-007\n", + "Name: uuid, Length: 15781, dtype: object\n", + "7925 /projects/1002552565004824085-242ac117-0001-01...\n", + "Name: archive_path, dtype: object\n", + "[\n", + " {\n", + " \"_id\": {\n", + " \"$oid\": \"677ec04f5908010001246a0a\"\n", + " },\n", + " \"uuid\": \"5097479121213854191-242ac118-0001-012\",\n", + " \"owner\": \"vdj\",\n", + " \"tenantId\": \"vdjserver.org\",\n", + " \"schemaId\": null,\n", + " \"internalUsername\": null,\n", + " \"associationIds\": [\n", + " \"6589143665654501871-242ac118-0001-012\",\n", + " \"ad02cb34-250e-48cb-a06e-973e431b62ee-007\"\n", + " ],\n", + " \"lastUpdated\": \"2025-01-08T12:13:35.460-06:00\",\n", + " \"name\": \"projectJob\",\n", + " \"value\": {\n", + " \"projectUuid\": \"6589143665654501871-242ac118-0001-012\",\n", + " \"jobUuid\": \"ad02cb34-250e-48cb-a06e-973e431b62ee-007\",\n", + " \"secondaryInputs\": {\n", + " \"ForwardPrimerFileMetadata\": \"7669998360913571345-242ac118-0001-012\",\n", + " \"SequenceForwardPairedFilesMetadata\": [\n", + " \"906928664923345391-242ac118-0001-012\",\n", + " \"1625212254106480145-242ac118-0001-012\",\n", + " \"4324126753239920145-242ac118-0001-012\",\n", + " \"7771052756644720145-242ac118-0001-012\",\n", + " \"3675152322622591471-242ac118-0001-012\",\n", + " \"4146541469141954065-242ac118-0001-012\",\n", + " \"8931063987837997551-242ac118-0001-012\",\n", + " \"3039141951839277551-242ac118-0001-012\",\n", + " \"5078217388581907985-242ac118-0001-012\",\n", + " \"5866292560839643631-242ac118-0001-012\",\n", + " \"5499760051799003631-242ac118-0001-012\",\n", + " \"994425257640923631-242ac118-0001-012\",\n", + " \"6349109825059941905-242ac118-0001-012\",\n", + " \"6915240809950089711-242ac118-0001-012\",\n", + " \"500621254028169711-242ac118-0001-012\",\n", + " \"5903475632018615825-242ac118-0001-012\",\n", + " \"7032138206155575791-242ac118-0001-012\",\n", + " \"889776627127095791-242ac118-0001-012\"\n", + " ],\n", + " \"SequenceReversePairedFilesMetadata\": [\n", + " \"1907827843583185391-242ac118-0001-012\",\n", + " \"208946788250480145-242ac118-0001-012\",\n", + " \"2999902436537200145-242ac118-0001-012\",\n", + " \"5703154852639600145-242ac118-0001-012\",\n", + " \"8786807649956991471-242ac118-0001-012\",\n", + " \"1388184622631874065-242ac118-0001-012\",\n", + " \"6624007454493634065-242ac118-0001-012\",\n", + " \"6482503132388397551-242ac118-0001-012\",\n", + " \"165422283758637551-242ac118-0001-012\",\n", + " \"8165903950463963631-242ac118-0001-012\",\n", + " \"4968000150881243631-242ac118-0001-012\",\n", + " \"4730359610393563631-242ac118-0001-012\",\n", + " \"3002643106708581905-242ac118-0001-012\",\n", + " \"9170399288060809711-242ac118-0001-012\",\n", + " \"4740527069293449711-242ac118-0001-012\",\n", + " \"3855678174958775825-242ac118-0001-012\",\n", + " \"7848709270049975825-242ac118-0001-012\",\n", + " \"3463406930236215791-242ac118-0001-012\"\n", + " ],\n", + " \"SequenceFASTQMetadata\": []\n", + " }\n", + " },\n", + " \"created\": \"2025-01-08T12:13:35.460-06:00\"\n", + " },\n", + " {\n", + " \"_id\": {\n", + " \"$oid\": \"678597355908010001246b86\"\n", + " },\n", + " \"uuid\": \"1948444895656078865-242ac118-0001-012\",\n", + " \"owner\": \"vdj\",\n", + " \"tenantId\": \"vdjserver.org\",\n", + " \"schemaId\": null,\n", + " \"internalUsername\": null,\n", + " \"associationIds\": [\n", + " \"5456400192359305711-242ac118-0001-012\",\n", + " \"c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007\"\n", + " ],\n", + " \"lastUpdated\": \"2025-01-13T16:44:05.995-06:00\",\n", + " \"name\": \"projectJob\",\n", + " \"value\": {\n", + " \"projectUuid\": \"5456400192359305711-242ac118-0001-012\",\n", + " \"jobUuid\": \"c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007\",\n", + " \"secondaryInputs\": {\n", + " \"ForwardPrimerFileMetadata\": \"7830832104257678865-242ac118-0001-012\",\n", + " \"SequenceForwardPairedFilesMetadata\": [\n", + " \"1840700597200490991-242ac118-0001-012\",\n", + " \"5338423137409494545-242ac118-0001-012\",\n", + " \"6100051000725017071-242ac118-0001-012\",\n", + " \"2014473661280808465-242ac118-0001-012\",\n", + " \"8329106378551848465-242ac118-0001-012\",\n", + " \"6681449109992903151-242ac118-0001-012\",\n", + " \"3108766464161223151-242ac118-0001-012\",\n", + " \"2306929598057082385-242ac118-0001-012\",\n", + " \"8703939788065402385-242ac118-0001-012\",\n", + " \"1356193445436789231-242ac118-0001-012\",\n", + " \"4056925636403916305-242ac118-0001-012\",\n", + " \"5435653088092876305-242ac118-0001-012\"\n", + " ],\n", + " \"SequenceReversePairedFilesMetadata\": [\n", + " \"5023614960920170991-242ac118-0001-012\",\n", + " \"1335427718191574545-242ac118-0001-012\",\n", + " \"8601653389564374545-242ac118-0001-012\",\n", + " \"2661285435182617071-242ac118-0001-012\",\n", + " \"5841031823979048465-242ac118-0001-012\",\n", + " \"7745355458885063151-242ac118-0001-012\",\n", + " \"5421348655019463151-242ac118-0001-012\",\n", + " \"1173454200583623151-242ac118-0001-012\",\n", + " \"5832496452651642385-242ac118-0001-012\",\n", + " \"3037157745745269231-242ac118-0001-012\",\n", + " \"364542252032716305-242ac118-0001-012\",\n", + " \"7235888630211276305-242ac118-0001-012\"\n", + " ],\n", + " \"SequenceFASTQMetadata\": []\n", + " }\n", + " },\n", + " \"created\": \"2025-01-13T16:44:05.995-06:00\"\n", + " },\n", + " {\n", + " \"_id\": {\n", + " \"$oid\": \"678e12315908010001246c75\"\n", + " },\n", + " \"uuid\": \"1819643224410746385-242ac118-0001-012\",\n", + " \"owner\": \"vdj\",\n", + " \"tenantId\": \"vdjserver.org\",\n", + " \"schemaId\": null,\n", + " \"internalUsername\": null,\n", + " \"associationIds\": [\n", + " \"5199144433477554666-242ac116-0001-012\",\n", + " \"773a5cb7-b369-4517-a221-83d57e3899e5-007\"\n", + " ],\n", + " \"lastUpdated\": \"2025-01-20T03:06:57.762-06:00\",\n", + " \"name\": \"projectJob\",\n", + " \"value\": {\n", + " \"projectUuid\": \"5199144433477554666-242ac116-0001-012\",\n", + " \"jobUuid\": \"773a5cb7-b369-4517-a221-83d57e3899e5-007\"\n", + " },\n", + " \"created\": \"2025-01-20T03:06:57.762-06:00\"\n", + " },\n", + " {\n", + " \"_id\": {\n", + " \"$oid\": \"6792af375908010001246c76\"\n", + " },\n", + " \"uuid\": \"2845695380777266705-242ac118-0001-012\",\n", + " \"owner\": \"vdj\",\n", + " \"tenantId\": \"vdjserver.org\",\n", + " \"schemaId\": null,\n", + " \"internalUsername\": null,\n", + " \"associationIds\": [\n", + " \"5456400192359305711-242ac118-0001-012\",\n", + " \"9188bf80-e868-4e05-a6b4-308c044108d7-007\"\n", + " ],\n", + " \"lastUpdated\": \"2025-01-23T15:05:59.570-06:00\",\n", + " \"name\": \"projectJob\",\n", + " \"value\": {\n", + " \"projectUuid\": \"5456400192359305711-242ac118-0001-012\",\n", + " \"jobUuid\": \"9188bf80-e868-4e05-a6b4-308c044108d7-007\"\n", + " },\n", + " \"created\": \"2025-01-23T15:05:59.570-06:00\"\n", + " }\n", + "]\n", + "{\n", + " \"id\": 503865,\n", + " \"name\": \"My Job 24-Jan-2025 8:57:09 pm\",\n", + " \"tenant_id\": \"vdjserver.org\",\n", + " \"tenant_queue\": \"aloe.jobq.vdjserver.org.submit.DefaultQueue\",\n", + " \"owner\": \"vdj\",\n", + " \"roles\": \"Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/VDJ_vdj_vdj_adc_PRODUCTION,Internal/VDJ_vdj_vdj_test_PRODUCTION,Internal/VDJ_vdj_iedb_PRODUCTION,Internal/VDJ_vdj_vdj_staging_PRODUCTION,Internal/VDJ_vdj_vdj_dev_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-8e282f4085e4_PRODUCTION,Internal/VDJ_vdj_keycloak1_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-d4184ecd0b88_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-341886281d15_PRODUCTION,Internal/VDJ_vdj_DefaultApplication_PRODUCTION,Internal/vdjserver-org-services-admin,Internal/VDJ_vdj__cli-vdjserver.org-vdj-9fa7cf90e11e_PRODUCTION,Internal/VDJ_vdj_aloe_beta_client_PRODUCTION,Internal/VDJ_vdj_vdj_repair_PRODUCTION,Internal/everyone,Internal/vdjserver-org-account-manager,Internal/test_foo_PRODUCTION,Internal/VDJ_vdj_aloe_beta_client_SANDBOX,Internal/VDJ_vdj__cli-vdjserver.org-vdj-544ee46758bb_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-c66d52ca544d_PRODUCTION,Internal/vdj-metav3,Internal/VDJ_vdj_keycloak2_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-999dcb3f1b33_PRODUCTION,Internal/VDJ_vdj_vdj_airr_PRODUCTION,Internal/subscriber,Internal/VDJ_vdj_vdj_PRODUCTION,Internal/test1\",\n", + " \"system_id\": \"ls6.tacc.utexas.edu\",\n", + " \"app_id\": \"repcalc-ls6-2.0u8\",\n", + " \"app_uuid\": \"6306626279335587345-242ac119-0001-005\",\n", + " \"status\": \"FINISHED\",\n", + " \"last_message\": \"Transitioning from status ARCHIVING to FINISHED in phase ARCHIVING.\",\n", + " \"accepted\": \"2025-01-25 02:57:53.911\",\n", + " \"created\": \"2025-01-25 02:57:53.915\",\n", + " \"ended\": \"2025-01-25 15:43:51.678\",\n", + " \"last_updated\": \"2025-01-25 15:43:51.678\",\n", + " \"uuid\": \"c7cd08ad-a560-4574-a363-b9cc4c5e051d-007\",\n", + " \"work_path\": \"/scratch/01114/vdj/vdj/job-c7cd08ad-a560-4574-a363-b9cc4c5e051d-007-my-job-24-jan-2025-8-57-09-pm\",\n", + " \"archive\": 1,\n", + " \"archive_on_app_error\": 0,\n", + " \"archive_path\": \"/projects/5456400192359305711-242ac118-0001-012/analyses/2025-01-25-02-57-35-54-my-job-24-jan-2025-8:57:09-pm\",\n", + " \"archive_system_id\": \"data.vdjserver.org\",\n", + " \"node_count\": 4,\n", + " \"processor_count\": 128,\n", + " \"memory_gb\": 1,\n", + " \"max_hours\": 4,\n", + " \"inputs\": \"{\\\"repcalc_image\\\":[\\\"agave://data.vdjserver.org//singularity/repcalc-2.0-imm4.4.sif\\\"],\\\"StudyMetadata\\\":\\\"agave://data.vdjserver.org//projects/5456400192359305711-242ac118-0001-012/analyses/2025-01-25-02-57-35-54-my-job-24-jan-2025-8:57:09-pm/study_metadata.json\\\",\\\"AIRRMetadata\\\":\\\"agave://data.vdjserver.org//projects/5456400192359305711-242ac118-0001-012/analyses/2025-01-23-21-05-40-71-my-job-23-jan-2025-3:05:30-pm/study_metadata.airr.json\\\",\\\"JobFiles\\\":[\\\"agave://data.vdjserver.org//projects/5456400192359305711-242ac118-0001-012/analyses/2025-01-23-21-05-40-71-my-job-23-jan-2025-3:05:30-pm/9188bf80-e868-4e05-a6b4-308c044108d7-007.zip\\\"]}\",\n", + " \"parameters\": \"{\\\"DiversityOperations\\\":[\\\"shannon\\\"],\\\"ClonalOperations\\\":[\\\"abundance\\\"],\\\"GeneSegmentOperations\\\":[\\\"absolute\\\",\\\"relative\\\",\\\"combo\\\"],\\\"LineageOperations\\\":[\\\"reconstruction\\\"],\\\"CDR3Levels\\\":[\\\"aa\\\",\\\"v,aa\\\",\\\"vj,aa\\\"],\\\"Creator\\\":\\\"schristley\\\",\\\"DiversityFlag\\\":true,\\\"DiversityFilters\\\":[\\\"productive\\\"],\\\"GeneSegmentFlag\\\":true,\\\"ClonalFilters\\\":[\\\"productive\\\"],\\\"LineageFilters\\\":[\\\"productive\\\"],\\\"GeneSegmentFilters\\\":[\\\"productive\\\"],\\\"CDR3Operations\\\":[\\\"absolute\\\",\\\"relative\\\",\\\"length\\\",\\\"shared\\\",\\\"distribution\\\"],\\\"MutationalFilters\\\":[\\\"productive\\\"],\\\"JobSelected\\\":\\\"9188bf80-e868-4e05-a6b4-308c044108d7-007\\\",\\\"DiversityLevels\\\":[\\\"gene\\\",\\\"aa\\\"],\\\"ClonalFlag\\\":true,\\\"GeneSegmentLevels\\\":[\\\"vj\\\",\\\"vdj\\\"],\\\"LineageFlag\\\":true,\\\"CDR3Flag\\\":true,\\\"MutationalFlag\\\":true,\\\"CDR3Filters\\\":[\\\"productive\\\"]}\",\n", + " \"remote_job_id\": \"2167988\",\n", + " \"remote_sched_id\": null,\n", + " \"remote_queue\": \"normal\",\n", + " \"remote_submitted\": \"2025-01-25 02:59:57.232\",\n", + " \"remote_started\": \"2025-01-25 15:38:25.306\",\n", + " \"remote_ended\": \"2025-01-25 15:43:33.007\",\n", + " \"remote_outcome\": \"FINISHED\",\n", + " \"remote_submit_retries\": 0,\n", + " \"remote_status_checks\": 192,\n", + " \"failed_status_checks\": 0,\n", + " \"last_status_check\": \"2025-01-25 15:43:33.001\",\n", + " \"blocked_count\": 0,\n", + " \"visible\": 1,\n", + " \"update_token\": \"eb27e311-4a37-4aeb-b649-056704dd2711\"\n", + "}\n", + "[\n", + " {\n", + " \"id\": 357197,\n", + " \"name\": \"cellranger_SRR11528762\",\n", + " \"tenant_id\": \"vdjserver.org\",\n", + " \"tenant_queue\": \"aloe.jobq.vdjserver.org.submit.DefaultQueue\",\n", + " \"owner\": \"vdj\",\n", + " \"roles\": \"Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/VDJ_vdj_vdj_test_PRODUCTION,Internal/VDJ_vdj_iedb_PRODUCTION,Internal/VDJ_vdj_keycloak1_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-8e282f4085e4_PRODUCTION,Internal/VDJ_vdj_vdj_dev_PRODUCTION,Internal/VDJ_vdj_vdj_staging_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-c66d52ca544d_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-d4184ecd0b88_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-341886281d15_PRODUCTION,Internal/VDJ_vdj_DefaultApplication_PRODUCTION,Internal/vdj-metav3,Internal/vdjserver-org-services-admin,Internal/VDJ_vdj__cli-vdjserver.org-vdj-9fa7cf90e11e_PRODUCTION,Internal/VDJ_vdj_keycloak2_PRODUCTION,Internal/VDJ_vdj_vdj_airr_PRODUCTION,Internal/subscriber,Internal/VDJ_vdj_aloe_beta_client_PRODUCTION,Internal/VDJ_vdj_vdj_repair_PRODUCTION,Internal/everyone,Internal/test_foo_PRODUCTION,Internal/vdjserver-org-account-manager,Internal/VDJ_vdj_vdj_PRODUCTION,Internal/VDJ_vdj_aloe_beta_client_SANDBOX,Internal/test1\",\n", + " \"system_id\": \"stampede2.tacc.utexas.edu\",\n", + " \"app_id\": \"cellranger-stampede2-6.1\",\n", + " \"app_uuid\": \"7181865981105073685-242ac118-0001-005\",\n", + " \"status\": \"FINISHED\",\n", + " \"last_message\": \"Transitioning from status ARCHIVING to FINISHED in phase ARCHIVING.\",\n", + " \"accepted\": \"2021-08-25 16:50:57.344\",\n", + " \"created\": \"2021-08-25 16:50:57.348\",\n", + " \"ended\": \"2021-08-25 19:41:23.799\",\n", + " \"last_updated\": \"2021-08-25 19:41:23.799\",\n", + " \"uuid\": \"38a47767-343d-4d2c-9374-29c3be77905c-007\",\n", + " \"work_path\": \"/scratch/01114/vdj/vdj/job-38a47767-343d-4d2c-9374-29c3be77905c-007-cellranger_srr11528762\",\n", + " \"archive\": 1,\n", + " \"archive_on_app_error\": 0,\n", + " \"archive_path\": \"/projects/1002552565004824085-242ac117-0001-012/analyses/cellranger_SRR11528762\",\n", + " \"archive_system_id\": \"data.vdjserver.org\",\n", + " \"node_count\": 1,\n", + " \"processor_count\": 48,\n", + " \"memory_gb\": 1,\n", + " \"max_hours\": 4,\n", + " \"inputs\": \"{\\\"ForwardPairedFile\\\":\\\"agave://data.vdjserver.org//projects/1002552565004824085-242ac117-0001-012/files/SRR11528762_1.fastq.gz\\\",\\\"ReversePairedFile\\\":\\\"agave://data.vdjserver.org//projects/1002552565004824085-242ac117-0001-012/files/SRR11528762_2.fastq.gz\\\"}\",\n", + " \"parameters\": \"{\\\"Creator\\\":\\\"vdj\\\",\\\"FilesMetadata\\\":[\\\"1234\\\"],\\\"repertoire_id\\\":\\\"SRR11528762\\\",\\\"species\\\":\\\"mouse\\\"}\",\n", + " \"remote_job_id\": \"8352680\",\n", + " \"remote_sched_id\": null,\n", + " \"remote_queue\": \"skx-normal\",\n", + " \"remote_submitted\": \"2021-08-25 17:01:27.289\",\n", + " \"remote_started\": \"2021-08-25 18:04:09.136\",\n", + " \"remote_ended\": \"2021-08-25 19:33:24.642\",\n", + " \"remote_outcome\": \"FINISHED\",\n", + " \"remote_submit_retries\": 0,\n", + " \"remote_status_checks\": 51,\n", + " \"failed_status_checks\": 0,\n", + " \"last_status_check\": \"2021-08-25 19:33:24.636\",\n", + " \"blocked_count\": 0,\n", + " \"visible\": 1,\n", + " \"update_token\": \"72574690-ddbe-41c4-a741-07f47eb21c9b\"\n", + " },\n", + " {\n", + " \"id\": 357196,\n", + " \"name\": \"cellranger_SRR11528762\",\n", + " \"tenant_id\": \"vdjserver.org\",\n", + " \"tenant_queue\": \"aloe.jobq.vdjserver.org.submit.DefaultQueue\",\n", + " \"owner\": \"vdj\",\n", + " \"roles\": \"Internal/VDJ_vdj_keycloak_PRODUCTION,Internal/VDJ_vdj_vdj_test_PRODUCTION,Internal/VDJ_vdj_iedb_PRODUCTION,Internal/VDJ_vdj_keycloak1_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-8e282f4085e4_PRODUCTION,Internal/VDJ_vdj_vdj_dev_PRODUCTION,Internal/VDJ_vdj_vdj_staging_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-c66d52ca544d_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-d4184ecd0b88_PRODUCTION,Internal/VDJ_vdj__cli-vdjserver.org-vdj-341886281d15_PRODUCTION,Internal/VDJ_vdj_DefaultApplication_PRODUCTION,Internal/vdj-metav3,Internal/vdjserver-org-services-admin,Internal/VDJ_vdj__cli-vdjserver.org-vdj-9fa7cf90e11e_PRODUCTION,Internal/VDJ_vdj_keycloak2_PRODUCTION,Internal/VDJ_vdj_vdj_airr_PRODUCTION,Internal/subscriber,Internal/VDJ_vdj_aloe_beta_client_PRODUCTION,Internal/VDJ_vdj_vdj_repair_PRODUCTION,Internal/everyone,Internal/test_foo_PRODUCTION,Internal/vdjserver-org-account-manager,Internal/VDJ_vdj_vdj_PRODUCTION,Internal/VDJ_vdj_aloe_beta_client_SANDBOX,Internal/test1\",\n", + " \"system_id\": \"stampede2.tacc.utexas.edu\",\n", + " \"app_id\": \"cellranger-stampede2-6.1\",\n", + " \"app_uuid\": \"7181865981105073685-242ac118-0001-005\",\n", + " \"status\": \"STOPPED\",\n", + " \"last_message\": \"JOBS_CMD_MSG_RECEIVED Job 0afc4677-3da4-4782-adcf-a80f66eeff3d-007 received a JOB_CANCEL command from JobActionResource-cancelCmdStatus with correlation id 0afc4677-3da4-4782-adcf-a80f66eeff3d-007. \",\n", + " \"accepted\": \"2021-08-25 16:49:17.096\",\n", + " \"created\": \"2021-08-25 16:49:17.101\",\n", + " \"ended\": \"2021-08-25 16:50:02.185\",\n", + " \"last_updated\": \"2021-08-25 16:50:02.185\",\n", + " \"uuid\": \"0afc4677-3da4-4782-adcf-a80f66eeff3d-007\",\n", + " \"work_path\": \"/scratch/01114/vdj/vdj/job-0afc4677-3da4-4782-adcf-a80f66eeff3d-007-cellranger_srr11528762\",\n", + " \"archive\": 1,\n", + " \"archive_on_app_error\": 0,\n", + " \"archive_path\": \"/projects/1002552565004824085-242ac117-0001-012/analyses/cellranger_SRR11528762\",\n", + " \"archive_system_id\": \"data.vdjserver.org\",\n", + " \"node_count\": 1,\n", + " \"processor_count\": 48,\n", + " \"memory_gb\": 1,\n", + " \"max_hours\": 4,\n", + " \"inputs\": \"{\\\"ForwardPairedFile\\\":\\\"agave://data.vdjserver.org//projects/1002552565004824085-242ac117-0001-012/files/SRR11528762_1.fastq.gz\\\",\\\"ReversePairedFile\\\":\\\"agave://data.vdjserver.org//projects/1002552565004824085-242ac117-0001-012/files/SRR11528762_2.fastq.gz\\\"}\",\n", + " \"parameters\": \"{\\\"Creator\\\":\\\"vdj\\\",\\\"FilesMetadata\\\":[\\\"1234\\\"],\\\"repertoire_id\\\":\\\"SRR11528762\\\",\\\"species\\\":\\\"human\\\"}\",\n", + " \"remote_job_id\": null,\n", + " \"remote_sched_id\": null,\n", + " \"remote_queue\": \"skx-normal\",\n", + " \"remote_submitted\": null,\n", + " \"remote_started\": null,\n", + " \"remote_ended\": null,\n", + " \"remote_outcome\": null,\n", + " \"remote_submit_retries\": 0,\n", + " \"remote_status_checks\": 0,\n", + " \"failed_status_checks\": 0,\n", + " \"last_status_check\": null,\n", + " \"blocked_count\": 0,\n", + " \"visible\": 1,\n", + " \"update_token\": \"d7d76985-33a2-4cba-a61f-3c142bd5a4cc\"\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "print(jobs_all_df.keys())\n", + "print(jobs_all_df['uuid'])\n", + "job_id = '38a47767-343d-4d2c-9374-29c3be77905c-007'\n", + "project_id = '1002552565004824085-242ac117-0001-012'\n", + "print(jobs_all_df[jobs_all_df['uuid'] == job_id]['archive_path'])\n", + "\n", + "jsonarray_projectJob = [ obj for obj in jsonarray if obj['name'] == 'projectJob' ]\n", + "dict_projectJob = { obj['uuid'] : obj for obj in jsonarray if obj['name'] == 'projectJob'}\n", + "\n", + "print(json.dumps(jsonarray_projectJob[-5:-1], indent=2))\n", + "\n", + "print(json.dumps(jsonarray_jobs[0], indent=2))\n", + "\n", + "#job = {}\n", + "job = [ obj for obj in jsonarray_jobs if obj.get('archive_path') is not None and project_id in obj.get('archive_path')]\n", + "print(json.dumps(job, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "b42cc7fb-4a3b-4826-b5bc-54d1607fbc17", + "metadata": {}, + "source": [ + "## Convert public_project data and output into JSONL files" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "56a4a181-72fe-4a46-86fc-a6ef2ae8a275", + "metadata": {}, + "outputs": [], + "source": [ + "exclusion_names = [ 'projectLoad', 'rearrangementLoad' ]\n", + "name_map = { \"projectFile\": \"project_file\" }\n", + "permission = [ { \"username\": \"vdjserver.curation@gmail.com\", \"permission\": { \"read\":True, \"write\":True } } ]\n", + "\n", + "col_list = ['uuid', 'owner', 'associationIds', 'created', 'lastUpdated', 'name', 'value']\n", + "obj_list = {}\n", + "data_dir = 'Metadata_public_project/' \n", + "for project_uuid in df_public_project.uuid:\n", + " # Open a file in write mode\n", + " with open(f'{data_dir}{project_uuid}_metadata.jsonl', 'w') as file:\n", + " for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type in exclusion_names:\n", + " continue\n", + " if (project_uuid in item.get('uuid', None)) or (project_uuid in item.get('associationIds', None)):\n", + " if name_map.get(item_type) is not None:\n", + " item['name'] = name_map.get(item_type)\n", + " item_type = item['name']\n", + "\n", + " # migrate the object\n", + " if item_type == 'public_project':\n", + " # move old keywords\n", + " if item['value'].get('vdjserver_keywords') is not None:\n", + " if item['value'].get('vdjserver') is None:\n", + " item['value']['vdjserver'] = {}\n", + " item['value']['vdjserver']['keywords'] = item['value']['vdjserver_keywords']\n", + " del item['value']['vdjserver_keywords']\n", + " # add permissions\n", + " item['permission'] = permission\n", + " # old fields\n", + " if item['value'].get('showArchivedJobs') is not None:\n", + " del item['value']['showArchivedJobs']\n", + " if item['value'].get('owner') is not None:\n", + " del item['value']['owner']\n", + "\n", + " if item_type == 'project_file':\n", + " # eliminate old file UUID\n", + " item['associationIds'] = [ project_uuid ]\n", + "\n", + " # json_print(item) \n", + " # json_print(item)\n", + " obj = {}\n", + " for col_name in col_list:\n", + " obj[col_name] = item.get(col_name, None)\n", + " if item.get('permission') is not None:\n", + " obj['permission'] = item['permission']\n", + " # print(obj)\n", + " json.dump(obj, file)\n", + " file.write('\\n') # Add a newline after each JSON object" + ] + }, + { + "cell_type": "markdown", + "id": "69f9bb9e-7afe-428e-aa97-125626f4384a", + "metadata": {}, + "source": [ + "## Convert Tapis V2 Job data into meta record" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "f8348cd0-72d3-4efd-aab9-1eb6fa85dc0b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "0\n", + "22\n", + "0\n", + "2\n", + "95\n", + "10\n", + "2\n", + "30\n", + "9\n", + "20\n", + "55\n", + "14\n", + "0\n" + ] + } + ], + "source": [ + "col_list = ['uuid', 'owner', 'associationIds', 'created', 'lastUpdated', 'name', 'value']\n", + "obj_list = {}\n", + "project_uuid = '2034535426280329706-242ac113-0001-012'\n", + "data_dir = 'Metadata_public_project_jobs/' # Create/Change the directory\n", + "\n", + "for project_uuid in df_public_project.uuid:\n", + " jobs = [ obj for obj in jsonarray_jobs if obj.get('archive_path') is not None and project_uuid in obj.get('archive_path')]\n", + " print(len(jobs))\n", + " if len(jobs) > 0:\n", + " with open(f'{data_dir}{project_uuid}_metadata.jsonl', 'w') as file:\n", + " for j in jobs:\n", + " obj = {}\n", + " obj['name'] = 'tapis_v2_job'\n", + " obj['associationIds'] = [ project_uuid ]\n", + " obj['value'] = j\n", + " obj['uuid'] = j['uuid']\n", + " del obj['value']['uuid']\n", + " obj['created'] = j['created']\n", + " obj['lastUpdated'] = j['last_updated']\n", + " \n", + " json.dump(obj, file)\n", + " file.write('\\n') # Add a newline after each JSON object\n", + " #print(dict_projectJob.get(project_uuid))\n", + "\n", + " # Open a file in write mode\n", + " #with open(f'{data_dir}{project_uuid}_metadata.jsonl', 'w') as file:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "ded5cab4-d03f-4bd0-9f4c-35f89aae6cf5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0001399309581559-5056a550b8-0001-012\n", + "{'_id': {'$oid': '53753c4ae4b0df13310ccbbe'}, 'uuid': '0001400192074855-5056a550b8-0001-012', 'owner': 'vdj', 'tenantId': 'vdjserver.org', 'schemaId': None, 'internalUsername': None, 'associationIds': [], 'lastUpdated': '2014-05-15T17:14:34.855-05:00', 'name': 'projectJob', 'value': {'projectUuid': '0001399309581559-5056a550b8-0001-012', 'jobUuid': '0001399315558601-5056a550b8-0001-007'}, 'created': '2014-05-15T17:14:34.855-05:00'}\n" + ] + } + ], + "source": [ + "for k in dict_projectJob:\n", + " print(k)\n", + " print(dict_projectJob[k])\n", + " break\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afa42397-5786-4488-9915-01a538dedc21", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "MSM_TensorFlow_2", + "language": "python", + "name": "msm_tensorflow_2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/conversion/tapis_v2_to_v3/user_migration.ipynb b/conversion/tapis_v2_to_v3/user_migration.ipynb new file mode 100644 index 0000000..db3203d --- /dev/null +++ b/conversion/tapis_v2_to_v3/user_migration.ipynb @@ -0,0 +1,1326 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 28, + "id": "583622a1-6051-4e27-8003-ead9262cc186", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import json\n", + "from collections import defaultdict" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3aae211f-39ac-4c82-962f-56201a2d3c93", + "metadata": {}, + "outputs": [], + "source": [ + "job_events_df = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverJobEvents.json')\n", + "job_permissions_df = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverJobPermissions.json')\n", + "jobs_all_df = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverJobs_all.json')\n", + "jobs_all_df['parameters.Creator'] = jobs_all_df['parameters'].apply(lambda x: json.loads(x).get('Creator', np.nan) if x else np.nan)\n", + "metadata_perms_df = pd.read_json('/mnt/data2/Projects/vdjserver/vdjserverMetadataPermissions.json')\n", + "\n", + "with open('/mnt/data2/Projects/vdjserver/vdjserverJsonArrayFeb042025.json', 'r') as f:\n", + " jsonarray = json.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e21784e4-8c22-444e-8ef3-427e7a7e9d1e", + "metadata": {}, + "outputs": [], + "source": [ + "def json_print(item):\n", + " print(json.dumps(item, indent = 4))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3ec05085-2c72-43a2-b980-08d8dcc8f46a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Email updated_email\n", + "0 12ysliu2 at stu.edu.cn 12ysliu2@stu.edu.cn\n", + "1 18982180702 at msn.cn 18982180702@msn.cn\n", + "2 2008110020 at alumni.sjtu.edu.cn 2008110020@alumni.sjtu.edu.cn\n", + "3 2383920158 at qq.com 2383920158@qq.com\n", + "4 2deepayan at gmail.com 2deepayan@gmail.com\n", + ".. ... ...\n", + "571 zhanxw at gmail.com zhanxw@gmail.com\n", + "572 zhe.sang at gmail.com zhe.sang@gmail.com\n", + "573 zicheng at utexas.edu zicheng@utexas.edu\n", + "574 zluo819 at gmail.com zluo819@gmail.com\n", + "575 zyf950619 at gmail.com zyf950619@gmail.com\n", + "\n", + "[576 rows x 2 columns]\n" + ] + } + ], + "source": [ + "mailing_list = pd.read_csv(\"/mnt/data2/Projects/vdjserver/VDJServer_mailing_list.txt\", sep = ';', skiprows = 8)\n", + "mailing_list.columns = ['Email']\n", + "mailing_list = mailing_list.iloc[:-5]\n", + "mailing_list['updated_email'] = mailing_list['Email'].apply(lambda row: row.replace(\" at \", \"@\"))\n", + "\n", + "# mailing_list['updated_email'].to_csv('email_list.txt', index = False)\n", + "print(mailing_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "78a11a56-88d1-4c85-bb92-ac4923a1fb69", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['adc_cache', 'adc_cache_repertoire', 'adc_cache_study', 'adc_system_repositories', 'archive_project', 'async_query', 'bioProcessing', 'bioProcessingColumns', 'cellProcessing', 'cellProcessingColumns', 'communityDataSRA', 'data_processing', 'deletedProject', 'diagnosis', 'diagnosisColumns', 'feedback', 'garbage', 'irplus_analysis', 'job', 'nucleicAcidProcessing', 'nucleicAcidProcessingColumns', 'passwordReset', 'private_project', 'processMetadata', 'profile', 'project', 'projectFile', 'projectJob', 'projectJobArchive', 'projectJobFile', 'projectLoad', 'projectPublishInProcess', 'projectUnpublishInProcess', 'publicProject', 'public_project', 'rearrangementLoad', 'repertoire', 'sample', 'sampleColumns', 'sampleGroup', 'sample_processing', 'statistics_cache', 'statistics_cache_repertoire', 'statistics_cache_study', 'subject', 'subjectColumns', 'test', 'testMetadata', 'testmetadata', 'testmetadatamp', 'userVerification', 'vdjpipeWorkflow']\n", + "52\n" + ] + } + ], + "source": [ + "item_types = set()\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " item_types.add(item_type)\n", + "print(sorted(item_types))\n", + "print(len(item_types))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2de5a274-f7de-4483-8d62-77aa582bbab4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"_id\": {\n", + " \"$oid\": \"52e95a8de4b057612fefcc16\"\n", + " },\n", + " \"uuid\": \"0001391024781787-5056a550b8-0001-012\",\n", + " \"owner\": \"tester_account2000\",\n", + " \"tenantId\": \"vdjserver.org\",\n", + " \"schemaId\": null,\n", + " \"internalUsername\": null,\n", + " \"associationIds\": [],\n", + " \"lastUpdated\": \"2014-01-29T13:46:21.787-06:00\",\n", + " \"name\": \"project\",\n", + " \"value\": {\n", + " \"name\": \"Hello World\"\n", + " },\n", + " \"created\": \"2014-01-29T13:46:21.787-06:00\"\n", + "}\n" + ] + } + ], + "source": [ + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'project':\n", + " print(json.dumps(item, indent = 4))\n", + " break\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "e73f25ae-e08e-42da-b1c7-3d4a69141641", + "metadata": {}, + "source": [ + "## Look at Profile Metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "ea366f26-f621-4908-8fe8-e2ae89d6219e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
usernamefirstNamelastNameemaillastUpdated
1817rgarciaRodrigoGarcía Valienter.garciavaliente@amsterdamumc.nl2025-01-02T11:11:52.894-06:00
1818rgarciavRodrigoGarcía Valienter.garciavaliente@amsterdamumc.nl2025-01-02T11:41:52.070-06:00
1819erichardsonEveRichardsonerichardson@lji.org2025-01-07T18:01:25.657-06:00
1820samwolsamuel.wollenburg@utsouthwestern.edu2025-01-07T20:24:59.390-06:00
1821chrisjames1992Chinweike ChristopherUdoyechinweikechristopher.udoye@uksh.de2025-01-17T07:54:02.133-06:00
\n", + "
" + ], + "text/plain": [ + " username firstName lastName \\\n", + "1817 rgarcia Rodrigo García Valiente \n", + "1818 rgarciav Rodrigo García Valiente \n", + "1819 erichardson Eve Richardson \n", + "1820 samwol \n", + "1821 chrisjames1992 Chinweike Christopher Udoye \n", + "\n", + " email lastUpdated \n", + "1817 r.garciavaliente@amsterdamumc.nl 2025-01-02T11:11:52.894-06:00 \n", + "1818 r.garciavaliente@amsterdamumc.nl 2025-01-02T11:41:52.070-06:00 \n", + "1819 erichardson@lji.org 2025-01-07T18:01:25.657-06:00 \n", + "1820 samuel.wollenburg@utsouthwestern.edu 2025-01-07T20:24:59.390-06:00 \n", + "1821 chinweikechristopher.udoye@uksh.de 2025-01-17T07:54:02.133-06:00 " + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "profile_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'profile':\n", + " uuid = item.get('uuid', None)\n", + " owner = item.get('owner', None)\n", + " first_name = item.get('value', {}).get('firstName', None)\n", + " last_name = item.get('value', {}).get('lastName', None)\n", + " email = item.get('value', {}).get('email', None)\n", + " city = item.get('value', {}).get('city', None)\n", + " state = item.get('value', {}).get('state', None)\n", + " country = item.get('value', {}).get('country', None)\n", + " created = item.get('created', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " # Append the extracted data as a dictionary to the list\n", + " profile_list.append({\n", + " 'username': owner,\n", + " 'firstName': first_name,\n", + " 'lastName': last_name,\n", + " 'email': email,\n", + " \n", + " 'lastUpdated': last_updated\n", + " })\n", + "# Create a DataFrame from the list of extracted data\n", + "df_profile = pd.DataFrame(profile_list)\n", + "# Print the DataFrame\n", + "df_profile.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfa35570-35db-4df1-b4a0-1e7dd5b07c19", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "251c61d1-6db3-447d-a2cb-0fff3a20b65d", + "metadata": {}, + "source": [ + "## Look at project Metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "216b0ccc-30c8-4c16-b4da-0b1c7fcc7bb6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectUuidownerproject_namelast_updated
00001391024781787-5056a550b8-0001-012tester_account2000Hello World2014-01-29T13:46:21.787-06:00
10001391025968832-5056a550b8-0001-012tester_account2000Project 22014-01-29T14:06:08.832-06:00
20001391628100698-5056a550b8-0001-012adminDemo12014-02-05T13:21:40.698-06:00
30001392911686649-5056a550b8-0001-012test51testProj2014-02-20T09:54:46.649-06:00
40001392912386049-5056a550b8-0001-012test51testProj22014-02-20T10:06:26.048-06:00
\n", + "
" + ], + "text/plain": [ + " projectUuid owner project_name \\\n", + "0 0001391024781787-5056a550b8-0001-012 tester_account2000 Hello World \n", + "1 0001391025968832-5056a550b8-0001-012 tester_account2000 Project 2 \n", + "2 0001391628100698-5056a550b8-0001-012 admin Demo1 \n", + "3 0001392911686649-5056a550b8-0001-012 test51 testProj \n", + "4 0001392912386049-5056a550b8-0001-012 test51 testProj2 \n", + "\n", + " last_updated \n", + "0 2014-01-29T13:46:21.787-06:00 \n", + "1 2014-01-29T14:06:08.832-06:00 \n", + "2 2014-02-05T13:21:40.698-06:00 \n", + "3 2014-02-20T09:54:46.649-06:00 \n", + "4 2014-02-20T10:06:26.048-06:00 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "project_list = []\n", + "for item in jsonarray:\n", + " item_type = item.get('name')\n", + " if item_type == 'project': # filter for project items\n", + " uuid = item.get('uuid', None)\n", + " owner = item.get('owner', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " # Nested 'name' inside 'value'\n", + " project_name = item.get('value', {}).get('name', None)\n", + " project_list.append({\n", + " 'projectUuid': uuid,\n", + " 'owner': owner,\n", + " 'project_name': project_name,\n", + " 'last_updated': last_updated\n", + " })\n", + "\n", + "import pandas as pd\n", + "df_projects = pd.DataFrame(project_list)\n", + "df_projects.head()\n" + ] + }, + { + "cell_type": "markdown", + "id": "1b0dd91f-9072-4933-b945-eb572d2adf89", + "metadata": {}, + "source": [ + "## Look at ProjectFile data\n", + " - Contains only ProjectUUID\n", + " - Contains file upload information for the project\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "ab5aef3c-fae5-4802-9f0d-558089c32a09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidprojectUuidassociationIds_1associationIds_2ownertask_typefile_namemimeTypelast_updated
359435338423137409494545-242ac118-0001-0125456400192359305711-242ac118-0001-0126793987554023894545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R1_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359441335427718191574545-242ac118-0001-0125456400192359305711-242ac118-0001-0122833383462017494545-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone4468_S24_L001_R2_001.fastq.gzNone2025-01-13T16:40:40.230-06:00
359451840700597200490991-242ac118-0001-0125456400192359305711-242ac118-0001-012366925519251050991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R1_001.fastq.gzNone2025-01-13T16:40:43.277-06:00
359465023614960920170991-242ac118-0001-0125456400192359305711-242ac118-0001-0123549539235260010991-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNone6634_S25_L001_R2_001.fastq.gzNone2025-01-13T16:40:43.281-06:00
359477830832104257678865-242ac118-0001-0125456400192359305711-242ac118-0001-0128017190735231118865-242ac112-0001-0025456400192359305711-242ac118-0001-012vdjNoneprimers.fastaNone2025-01-13T16:41:49.035-06:00
\n", + "
" + ], + "text/plain": [ + " uuid \\\n", + "35943 5338423137409494545-242ac118-0001-012 \n", + "35944 1335427718191574545-242ac118-0001-012 \n", + "35945 1840700597200490991-242ac118-0001-012 \n", + "35946 5023614960920170991-242ac118-0001-012 \n", + "35947 7830832104257678865-242ac118-0001-012 \n", + "\n", + " projectUuid \\\n", + "35943 5456400192359305711-242ac118-0001-012 \n", + "35944 5456400192359305711-242ac118-0001-012 \n", + "35945 5456400192359305711-242ac118-0001-012 \n", + "35946 5456400192359305711-242ac118-0001-012 \n", + "35947 5456400192359305711-242ac118-0001-012 \n", + "\n", + " associationIds_1 \\\n", + "35943 6793987554023894545-242ac112-0001-002 \n", + "35944 2833383462017494545-242ac112-0001-002 \n", + "35945 366925519251050991-242ac112-0001-002 \n", + "35946 3549539235260010991-242ac112-0001-002 \n", + "35947 8017190735231118865-242ac112-0001-002 \n", + "\n", + " associationIds_2 owner task_type \\\n", + "35943 5456400192359305711-242ac118-0001-012 vdj None \n", + "35944 5456400192359305711-242ac118-0001-012 vdj None \n", + "35945 5456400192359305711-242ac118-0001-012 vdj None \n", + "35946 5456400192359305711-242ac118-0001-012 vdj None \n", + "35947 5456400192359305711-242ac118-0001-012 vdj None \n", + "\n", + " file_name mimeType last_updated \n", + "35943 4468_S24_L001_R1_001.fastq.gz None 2025-01-13T16:40:40.230-06:00 \n", + "35944 4468_S24_L001_R2_001.fastq.gz None 2025-01-13T16:40:40.230-06:00 \n", + "35945 6634_S25_L001_R1_001.fastq.gz None 2025-01-13T16:40:43.277-06:00 \n", + "35946 6634_S25_L001_R2_001.fastq.gz None 2025-01-13T16:40:43.281-06:00 \n", + "35947 primers.fasta None 2025-01-13T16:41:49.035-06:00 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "projectFiles_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'projectFile':\n", + " uuid = item.get('uuid', None)\n", + " associationIds = item.get('associationIds', None)\n", + " projectUuid = item.get('value', {}).get('projectUuid', None)\n", + " owner = item.get('owner', None)\n", + " task_type= item.get('value', {}).get('type', None)\n", + " file_name = item.get('value', {}).get('name', None)\n", + " mimeType = item.get('value', {}).get('mimeType', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " if associationIds:\n", + " associationIds_1 = associationIds[0]\n", + " if len(associationIds)>1:\n", + " associationIds_2 = associationIds[1]\n", + " if len(associationIds) > 2:\n", + " print(\"Length associationIds: \", len(associationIds))\n", + " else:\n", + " associationIds_2 = None\n", + " else:\n", + " associationIds_1 = None\n", + " associationIds_2 = None\n", + " # Append the extracted data as a dictionary to the list\n", + " projectFiles_list.append({\n", + " 'uuid': uuid,\n", + " 'projectUuid': projectUuid,\n", + " 'associationIds_1': associationIds_1,\n", + " 'associationIds_2': associationIds_2,\n", + " 'owner': owner,\n", + " 'task_type': task_type,\n", + " 'file_name': file_name,\n", + " 'mimeType': mimeType,\n", + " 'last_updated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_projectFiles = pd.DataFrame(projectFiles_list)\n", + "# Print the DataFrame\n", + "df_projectFiles.tail()" + ] + }, + { + "cell_type": "markdown", + "id": "c5b322ff-78b3-46fd-a5cf-6a449b6e821e", + "metadata": {}, + "source": [ + "## Look at projectJob data\n", + " - Contains ProjectUUID and JobUUID" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "072ef231-e6d6-4812-a9e7-6a068944e8f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidownerprojectUuidjobUuidlastUpdated
00001400192074855-5056a550b8-0001-012vdj0001399309581559-5056a550b8-0001-0120001399315558601-5056a550b8-0001-0072014-05-15T17:14:34.855-05:00
10001400254373114-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400254372814-5056a550b8-0001-0072014-05-16T10:32:53.114-05:00
20001400273862423-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400273862119-5056a550b8-0001-0072014-05-16T15:57:42.423-05:00
30001400274448495-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400274448320-5056a550b8-0001-0072014-05-16T16:07:28.494-05:00
40001400274714655-5056a550b8-0001-012vdj0001400250478554-5056a550b8-0001-0120001400274714490-5056a550b8-0001-0072014-05-16T16:11:54.655-05:00
\n", + "
" + ], + "text/plain": [ + " uuid owner \\\n", + "0 0001400192074855-5056a550b8-0001-012 vdj \n", + "1 0001400254373114-5056a550b8-0001-012 vdj \n", + "2 0001400273862423-5056a550b8-0001-012 vdj \n", + "3 0001400274448495-5056a550b8-0001-012 vdj \n", + "4 0001400274714655-5056a550b8-0001-012 vdj \n", + "\n", + " projectUuid jobUuid \\\n", + "0 0001399309581559-5056a550b8-0001-012 0001399315558601-5056a550b8-0001-007 \n", + "1 0001400250478554-5056a550b8-0001-012 0001400254372814-5056a550b8-0001-007 \n", + "2 0001400250478554-5056a550b8-0001-012 0001400273862119-5056a550b8-0001-007 \n", + "3 0001400250478554-5056a550b8-0001-012 0001400274448320-5056a550b8-0001-007 \n", + "4 0001400250478554-5056a550b8-0001-012 0001400274714490-5056a550b8-0001-007 \n", + "\n", + " lastUpdated \n", + "0 2014-05-15T17:14:34.855-05:00 \n", + "1 2014-05-16T10:32:53.114-05:00 \n", + "2 2014-05-16T15:57:42.423-05:00 \n", + "3 2014-05-16T16:07:28.494-05:00 \n", + "4 2014-05-16T16:11:54.655-05:00 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "projectJob_list = []\n", + "for item in jsonarray:\n", + " item_type = item['name']\n", + " if item_type == 'projectJob':\n", + " # json_print(item)\n", + " uuid = item.get('uuid', None)\n", + " owner = item.get('owner', None)\n", + " projectUuid = item.get('value', {}).get('projectUuid', None)\n", + " jobUuid = item.get('value', {}).get('jobUuid', None)\n", + " last_updated = item.get('lastUpdated', None)\n", + " projectJob_list.append({\n", + " 'uuid': uuid,\n", + " 'owner': owner,\n", + " 'projectUuid': projectUuid,\n", + " 'jobUuid': jobUuid,\n", + " 'lastUpdated': last_updated\n", + " })\n", + "\n", + "# Create a DataFrame from the list of extracted data\n", + "df_projectJob = pd.DataFrame(projectJob_list)\n", + "df_projectJob.head()" + ] + }, + { + "cell_type": "markdown", + "id": "0a3633c8-1be8-4070-8d22-4919f21b0601", + "metadata": {}, + "source": [ + "## Filter Metadata file by removing users with only READ Acess and Usernames that contains test" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "2d42d186-b64f-49d0-a680-05ed516a62b1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlast_updatedpermissionusernameuuidtenant_id
3142014-01-29 10:28:16READ_WRITEjfonner0001389977207738-5056a550b8-0001-012vdjserver.org
4172014-01-29 14:06:38READ_WRITEadshkl;dasfhkdf0001391025968832-5056a550b8-0001-012vdjserver.org
5182014-02-20 10:07:51READ_WRITEVDJAuth0001392912471365-5056a550b8-0001-012vdjserver.org
6192014-02-20 10:14:20READ_WRITEVDJAuth0001392912860303-5056a550b8-0001-012vdjserver.org
7212014-02-20 11:10:54READ_WRITEwscarbor0001392914178983-5056a550b8-0001-012vdjserver.org
\n", + "
" + ], + "text/plain": [ + " id last_updated permission username \\\n", + "3 14 2014-01-29 10:28:16 READ_WRITE jfonner \n", + "4 17 2014-01-29 14:06:38 READ_WRITE adshkl;dasfhkdf \n", + "5 18 2014-02-20 10:07:51 READ_WRITE VDJAuth \n", + "6 19 2014-02-20 10:14:20 READ_WRITE VDJAuth \n", + "7 21 2014-02-20 11:10:54 READ_WRITE wscarbor \n", + "\n", + " uuid tenant_id \n", + "3 0001389977207738-5056a550b8-0001-012 vdjserver.org \n", + "4 0001391025968832-5056a550b8-0001-012 vdjserver.org \n", + "5 0001392912471365-5056a550b8-0001-012 vdjserver.org \n", + "6 0001392912860303-5056a550b8-0001-012 vdjserver.org \n", + "7 0001392914178983-5056a550b8-0001-012 vdjserver.org " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metadata_perms_df\n", + "#filter metadata keeping only items that has permission for both READ_WRITE and ALL\n", + "filtered_metadata_perms_df = metadata_perms_df[~(metadata_perms_df.permission == 'READ')]\n", + "# Filter out usernames containing 'test'\n", + "filtered_metadata_perms_df = filtered_metadata_perms_df[~filtered_metadata_perms_df['username'].str.contains('test', case=False)]\n", + "filtered_metadata_perms_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "e27e1396-6dcb-4f63-94e8-c371da07417f", + "metadata": {}, + "source": [ + "## Look at all the Jobs " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "b961c52a-4d34-4e37-9e9a-97ae0899d47b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
system_idownerapp_idstatuslast_updateduuidarchive_pathremote_outcomeupdate_tokenparameters.Creator
0ls6.tacc.utexas.eduvdjrepcalc-ls6-2.0u8FINISHED2025-01-25 15:43:51.678c7cd08ad-a560-4574-a363-b9cc4c5e051d-007/projects/5456400192359305711-242ac118-0001-01...FINISHEDeb27e311-4a37-4aeb-b649-056704dd2711schristley
1ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FINISHED2025-01-24 04:20:37.8919188bf80-e868-4e05-a6b4-308c044108d7-007/projects/5456400192359305711-242ac118-0001-01...FINISHED5e2528fd-25d6-4473-9287-6a67a8de8391schristley
2ls6.tacc.utexas.eduvdjigblast-ls6-1.20u6FAILED2025-01-22 15:04:46.891773a5cb7-b369-4517-a221-83d57e3899e5-007/projects/5199144433477554666-242ac116-0001-01...FAILED_SKIP_ARCHIVE78b89c14-3dec-4aa8-acf8-d2592064e3a4scott_public
3ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-14 22:31:02.980c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007/projects/5456400192359305711-242ac118-0001-01...FINISHED1e2f122d-5e5b-4f14-931f-ca55803115ffschristley
4ls6.tacc.utexas.eduvdjvdj_pipe-ls6-0.1.7u2FINISHED2025-01-09 04:21:12.476ad02cb34-250e-48cb-a06e-973e431b62ee-007/projects/6589143665654501871-242ac118-0001-01...FINISHED1069949d-1d9a-453f-80b8-7372019aba31schristley
\n", + "
" + ], + "text/plain": [ + " system_id owner app_id status \\\n", + "0 ls6.tacc.utexas.edu vdj repcalc-ls6-2.0u8 FINISHED \n", + "1 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FINISHED \n", + "2 ls6.tacc.utexas.edu vdj igblast-ls6-1.20u6 FAILED \n", + "3 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "4 ls6.tacc.utexas.edu vdj vdj_pipe-ls6-0.1.7u2 FINISHED \n", + "\n", + " last_updated uuid \\\n", + "0 2025-01-25 15:43:51.678 c7cd08ad-a560-4574-a363-b9cc4c5e051d-007 \n", + "1 2025-01-24 04:20:37.891 9188bf80-e868-4e05-a6b4-308c044108d7-007 \n", + "2 2025-01-22 15:04:46.891 773a5cb7-b369-4517-a221-83d57e3899e5-007 \n", + "3 2025-01-14 22:31:02.980 c0ab5f4a-97b0-4dc3-93e8-0908c95cb3a4-007 \n", + "4 2025-01-09 04:21:12.476 ad02cb34-250e-48cb-a06e-973e431b62ee-007 \n", + "\n", + " archive_path remote_outcome \\\n", + "0 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "1 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "2 /projects/5199144433477554666-242ac116-0001-01... FAILED_SKIP_ARCHIVE \n", + "3 /projects/5456400192359305711-242ac118-0001-01... FINISHED \n", + "4 /projects/6589143665654501871-242ac118-0001-01... FINISHED \n", + "\n", + " update_token parameters.Creator \n", + "0 eb27e311-4a37-4aeb-b649-056704dd2711 schristley \n", + "1 5e2528fd-25d6-4473-9287-6a67a8de8391 schristley \n", + "2 78b89c14-3dec-4aa8-acf8-d2592064e3a4 scott_public \n", + "3 1e2f122d-5e5b-4f14-931f-ca55803115ff schristley \n", + "4 1069949d-1d9a-453f-80b8-7372019aba31 schristley " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jobs_all_df.head()\n", + "## Filter AllJobs columns\n", + "#### Keeping only important ones\n", + "keep_columns = ['system_id', 'owner', 'app_id', 'status', 'last_updated', 'uuid', 'archive_path', 'remote_outcome', 'update_token', 'parameters.Creator']\n", + "jobs_all_df = jobs_all_df[keep_columns]\n", + "jobs_all_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "7abe1c74-90f1-4172-a964-5fd247f914d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uuidusernamefirstNamelastNameemaillastUpdated
00001389976523746-5056a550b8-0001-012wscarborWalterScarboroughwscarbor@tacc.utexas.edu2016-04-27T15:07:26.261-05:00
10001391029872321-5056a550b8-0001-012test19Test19test19@test.com2014-01-29T15:12:33.955-06:00
20001391717057917-5056a550b8-0001-012test31test31@test.com2014-02-06T14:04:17.917-06:00
30001391719926131-5056a550b8-0001-012test33NoneNonetest33@test.com2014-02-06T14:52:06.131-06:00
40001391720404124-5056a550b8-0001-012test34NedFlanderstest34@test.com2014-02-06T15:00:46.376-06:00
.....................
18176242932598575984145-242ac118-0001-012rgarciaRodrigoGarcía Valienter.garciavaliente@amsterdamumc.nl2025-01-02T11:11:52.894-06:00
18182755888095932968465-242ac118-0001-012rgarciavRodrigoGarcía Valienter.garciavaliente@amsterdamumc.nl2025-01-02T11:41:52.070-06:00
18195481029658171207185-242ac118-0001-012erichardsonEveRichardsonerichardson@lji.org2025-01-07T18:01:25.657-06:00
18204458895817601248785-242ac118-0001-012samwolsamuel.wollenburg@utsouthwestern.edu2025-01-07T20:24:59.390-06:00
18219076859566261923345-242ac118-0001-012chrisjames1992Chinweike ChristopherUdoyechinweikechristopher.udoye@uksh.de2025-01-17T07:54:02.133-06:00
\n", + "

1822 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " uuid username \\\n", + "0 0001389976523746-5056a550b8-0001-012 wscarbor \n", + "1 0001391029872321-5056a550b8-0001-012 test19 \n", + "2 0001391717057917-5056a550b8-0001-012 test31 \n", + "3 0001391719926131-5056a550b8-0001-012 test33 \n", + "4 0001391720404124-5056a550b8-0001-012 test34 \n", + "... ... ... \n", + "1817 6242932598575984145-242ac118-0001-012 rgarcia \n", + "1818 2755888095932968465-242ac118-0001-012 rgarciav \n", + "1819 5481029658171207185-242ac118-0001-012 erichardson \n", + "1820 4458895817601248785-242ac118-0001-012 samwol \n", + "1821 9076859566261923345-242ac118-0001-012 chrisjames1992 \n", + "\n", + " firstName lastName \\\n", + "0 Walter Scarborough \n", + "1 Test 19 \n", + "2 \n", + "3 None None \n", + "4 Ned Flanders \n", + "... ... ... \n", + "1817 Rodrigo García Valiente \n", + "1818 Rodrigo García Valiente \n", + "1819 Eve Richardson \n", + "1820 \n", + "1821 Chinweike Christopher Udoye \n", + "\n", + " email lastUpdated \n", + "0 wscarbor@tacc.utexas.edu 2016-04-27T15:07:26.261-05:00 \n", + "1 test19@test.com 2014-01-29T15:12:33.955-06:00 \n", + "2 test31@test.com 2014-02-06T14:04:17.917-06:00 \n", + "3 test33@test.com 2014-02-06T14:52:06.131-06:00 \n", + "4 test34@test.com 2014-02-06T15:00:46.376-06:00 \n", + "... ... ... \n", + "1817 r.garciavaliente@amsterdamumc.nl 2025-01-02T11:11:52.894-06:00 \n", + "1818 r.garciavaliente@amsterdamumc.nl 2025-01-02T11:41:52.070-06:00 \n", + "1819 erichardson@lji.org 2025-01-07T18:01:25.657-06:00 \n", + "1820 samuel.wollenburg@utsouthwestern.edu 2025-01-07T20:24:59.390-06:00 \n", + "1821 chinweikechristopher.udoye@uksh.de 2025-01-17T07:54:02.133-06:00 \n", + "\n", + "[1822 rows x 6 columns]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_profile" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "035be43b-2e9c-4dde-b045-6d0e14c018ef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Email ID: leyu.liu@staidsonbio.com\n", + "\n", + "\n", + "Dear lliu, \n", + "\n", + "You have total 2 projects in VDJ server. For My 1st NGS you have 0 project files and 5 job files, For Rabbit IG you have 0 project files and 0 job files, avalibale on our database. If you want them to be transferred over to our VDJ server V2 then please let us know.\n", + "\n", + "Thanks\n", + "VDJServer Teams.\n" + ] + }, + { + "data": { + "text/plain": [ + "{'6656727206927929831-242ac11c-0001-012': {'project_name': 'My 1st NGS',\n", + " 'projectFile': 0,\n", + " 'projectJob': 5,\n", + " 'processMetadata': 5,\n", + " 'projectJobFile': 44,\n", + " 'subject': 0,\n", + " 'sample': 0,\n", + " 'sampleGroup': 0},\n", + " '5877004408142163475-242ac11b-0001-012': {'project_name': 'Rabbit IG'}}" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filtered_job_metadata_perms_df = filtered_metadata_perms_df[filtered_metadata_perms_df.uuid.isin(df_projects.projectUuid)]\n", + "user = 'lliu'\n", + "user_info = filtered_job_metadata_perms_df[filtered_job_metadata_perms_df.username == user]\n", + "total_projects = user_info.shape[0]\n", + "user_info = pd.merge(user_info, df_profile, on='username', how='inner')\n", + "email_id = user_info.email.iloc[0]\n", + "print(f\"Email ID: {email_id}\\n\\n\")\n", + "all_project_info = {}\n", + "for project_uuid in user_info.uuid:\n", + " project_name = df_projects[df_projects.projectUuid == project_uuid].project_name.iloc[0]\n", + " all_names = {}\n", + " all_names['project_name'] = project_name\n", + " for item in jsonarray:\n", + " if project_uuid in item.get('associationIds', None):\n", + " if item['name'] in all_names:\n", + " all_names[item['name']] += 1\n", + " else:\n", + " all_names[item['name']] = 0\n", + " all_project_info[project_uuid] = all_names\n", + " \n", + "text = f'Dear {user}, \\n\\nYou have total {total_projects} projects in VDJ server.'\n", + "\n", + "for k, v in all_project_info.items():\n", + " project_name = v.get('project_name', 'Not Available')\n", + " n_project_file = v.get('projectFile', 0)\n", + " n_job_file = v.get('projectJob', 0)\n", + " text += f' For {project_name} you have {n_project_file} project files and {n_job_file} job files,'\n", + "text += f' avalibale on our database. If you want them to be transferred over to our VDJ server V2 then please let us know.\\n\\nThanks\\nVDJServer Teams.'\n", + "\n", + "print(text)\n", + "all_project_info" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "general_env", + "language": "python", + "name": "general_env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}