Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
59b44ba
I sure hope this doesn't break everything C:
EmySanEsc Sep 29, 2021
8c64964
CoRa_Main_v2.5.jl! Ahora con más legibilidad
EmySanEsc Oct 23, 2021
f893c7c
I'm a clutz, and forgot to add these changes @.@
EmySanEsc Oct 23, 2021
3759fab
a
EmySanEsc Oct 23, 2021
225847c
Typo :/
EmySanEsc Oct 23, 2021
f6141a7
Progress (?
EmySanEsc Oct 29, 2021
b314f9c
It works!
EmySanEsc Oct 29, 2021
ce75dd9
Mass Cora is up and running! Allows for many parameters to be sequent…
EmySanEsc Nov 11, 2021
c33b023
This should work :3c
EmySanEsc Apr 7, 2022
fcbfcbc
I can't believe I actually fixed this
EmySanEsc May 5, 2022
704df2c
La última cena
EmySanEsc Jun 2, 2022
59eafe8
UwU!
EmySanEsc Jul 28, 2022
874ca27
UwU!
EmySanEsc Sep 26, 2022
e86ea5b
This should work!
EmySanEsc Oct 7, 2022
1d9050d
Long long overdue
EmySanEsc Feb 20, 2023
0713876
FN_CoRa_v2.jl, now it doesn't try 8M times for a single dot c:
EmySanEsc Feb 21, 2023
10a2a18
Continuacion_De_Corridas_ATF now with checking for too many NaNs in a…
EmySanEsc Feb 22, 2023
77a2559
Small additions to make more user friendly
EmySanEsc Feb 22, 2023
885c4a8
All 8 models with 1250Set2 ran
EmySanEsc Feb 25, 2023
3dc99a3
This should work :crying_cat_face:
EmySanEsc Jan 26, 2024
a31db84
pamRDS files
EmySanEsc Jan 26, 2024
f72a5ae
Create grouping groups.Rmd
EmySanEsc Jan 29, 2024
3d4b708
Fixing declared bug
EmySanEsc Aug 5, 2024
74ebcae
Tutorial files
EmySanEsc Aug 5, 2024
8d75f92
Update grouping groups.Rmd
EmySanEsc Sep 17, 2024
2ea5409
Final Update
EmySanEsc Sep 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .RData
Binary file not shown.
512 changes: 512 additions & 0 deletions .Rhistory

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "julia",
"request": "launch",
"name": "Run active Julia file",
"program": "${file}",
"stopOnEntry": false,
"cwd": "${workspaceFolder}",
"juliaEnv": "${command:activeJuliaEnvironment}"
}
]
}
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"julia.execution.codeInREPL": true
}
44 changes: 44 additions & 0 deletions 1250Set2.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
Comparison <- function(a, b){
if(is.nan(a) & is.nan(b)){
return(0)
}else if(is.nan(a) | is.nan(b)){
return(1)
}else{
return(abs(a - b))
}
}

ABTC <- function(Data){
AreaMatrix <- matrix(0, nrow = ncol(Data), ncol = ncol(Data))
AreaMatrix[lower.tri(AreaMatrix, diag = T)] <- unlist(sapply(c(1:(ncol(Data) - 1)), function(a) apply(sapply(Data[,(a):ncol(Data)], function(b) mapply(Comparison, Data[,a], b)), 2, sum)))
AreaMatrix[upper.tri(AreaMatrix)] <- t(AreaMatrix)[upper.tri(AreaMatrix)]
return(AreaMatrix)
}

ATFv1 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_ATFv1_1250Set2_mY_mY.txt", header = T)
ATFv2 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_ATFv2_1250Set2_mY_mY.txt", header = T)
BMFv1 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_BMFv1_1250Set2_mY_mY.txt", header = T)
BMFv2 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_BMFv2_1250Set2_mY_mY.txt", header = T)
BNFv1 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_BNFv1_1250Set2_mY_mY.txt", header = T)
BNFv2 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_BNFv2_1250Set2_mY_mY.txt", header = T)
FADv1 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_FADv1_1250Set2_mY_mY.txt", header = T)
FADv2 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_FADv2_1250Set2_mY_mY.txt", header = T)

ATFv1_Names <- paste0("ATFv1_", 1:nrow(ATFv1))
ATFv2_Names <- paste0("ATFv2_", 1:nrow(ATFv2))
BMFv1_Names <- paste0("BMFv1_", 1:nrow(BMFv1))
BMFv2_Names <- paste0("BMFv2_", 1:nrow(BMFv2))
BNFv1_Names <- paste0("BNFv1_", 1:nrow(BNFv1))
BNFv2_Names <- paste0("BNFv2_", 1:nrow(BNFv2))
FADv1_Names <- paste0("FADv1_", 1:nrow(FADv1))
FADv2_Names <- paste0("FADv2_", 1:nrow(FADv2))

CoRas <- as.data.frame(t(rbind(ATFv1[, 2:ncol(ATFv1)], ATFv2[, 2:ncol(ATFv2)], BMFv1[, 2:ncol(BMFv1)], BMFv2[, 2:ncol(BMFv2)], BNFv1[, 2:ncol(BNFv1)], BNFv2[, 2:ncol(BNFv2)], FADv1[, 2:ncol(FADv1)], FADv2[, 2:ncol(FADv2)])))
CoRas[CoRas > 1] <- NaN
colnames(CoRas) <- c(ATFv1_Names, ATFv2_Names, BMFv1_Names, BMFv2_Names, BNFv1_Names, BNFv2_Names, FADv1_Names, FADv2_Names)
CoRas <- CoRas[, !(colSums(is.na(CoRas)) == nrow(CoRas))]

CalcABTC <- ABTC(CoRas)
colnames(CalcABTC) <- colnames(CoRas)

write.csv(CalcABTC, "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CalcABTC1250Set2.csv", row.names = FALSE)
73 changes: 73 additions & 0 deletions 1250Set2Part2-Repeats1-30.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
library(stringr)
library(cluster)
library(ggplot2)
library(ggdendro, lib.loc = "/mnt/Adenina/mgomez/esanchez/R/x86_64-pc-linux-gnu-library/4.2/")



ATFv1 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_ATFv1_1250Set2_mY_mY.txt", header = T)
ATFv2 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_ATFv2_1250Set2_mY_mY.txt", header = T)
BMFv1 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_BMFv1_1250Set2_mY_mY.txt", header = T)
BMFv2 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_BMFv2_1250Set2_mY_mY.txt", header = T)
BNFv1 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_BNFv1_1250Set2_mY_mY.txt", header = T)
BNFv2 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_BNFv2_1250Set2_mY_mY.txt", header = T)
FADv1 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_FADv1_1250Set2_mY_mY.txt", header = T)
FADv2 <- read.table(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CoRaTables/OUT_ExSSs_FADv2_1250Set2_mY_mY.txt", header = T)

ATFv1_Names <- paste0("ATFv1_", 1:nrow(ATFv1))
ATFv2_Names <- paste0("ATFv2_", 1:nrow(ATFv2))
BMFv1_Names <- paste0("BMFv1_", 1:nrow(BMFv1))
BMFv2_Names <- paste0("BMFv2_", 1:nrow(BMFv2))
BNFv1_Names <- paste0("BNFv1_", 1:nrow(BNFv1))
BNFv2_Names <- paste0("BNFv2_", 1:nrow(BNFv2))
FADv1_Names <- paste0("FADv1_", 1:nrow(FADv1))
FADv2_Names <- paste0("FADv2_", 1:nrow(FADv2))

CoRas <- as.data.frame(t(rbind(ATFv1[, 2:ncol(ATFv1)], ATFv2[, 2:ncol(ATFv2)], BMFv1[, 2:ncol(BMFv1)], BMFv2[, 2:ncol(BMFv2)], BNFv1[, 2:ncol(BNFv1)], BNFv2[, 2:ncol(BNFv2)], FADv1[, 2:ncol(FADv1)], FADv2[, 2:ncol(FADv2)])))
CoRas[CoRas > 1] <- NaN
colnames(CoRas) <- c(ATFv1_Names, ATFv2_Names, BMFv1_Names, BMFv2_Names, BNFv1_Names, BNFv2_Names, FADv1_Names, FADv2_Names)
CoRas <- CoRas[, !(colSums(is.na(CoRas)) == nrow(CoRas))]
rm(ATFv1, ATFv2, BMFv1, BMFv2, BNFv1, BNFv2, FADv1, FADv2, ATFv1_Names, ATFv2_Names, BMFv1_Names, BMFv2_Names, BNFv1_Names, BNFv2_Names, FADv1_Names, FADv2_Names)

print("I am now going to read the CalcABTC1250Set2.csv file")

CalcABTC <- read.csv("/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/CalcABTC1250Set2.csv", header = T, sep = ",")
colnames(CalcABTC) <- colnames(CoRas)
rownames(CalcABTC) <- colnames(CalcABTC)

print("I have finished reading the CalcABTC1250Set2.csv file")

subsample <- function(data, percentage){
indexes <- sort(sample(1:nrow(data), round(nrow(data)/100 * percentage), replace = FALSE))
return(data[indexes, indexes])
}
subMaxK <- c()
subWholeK <- c()
set.seed(42)
i <- 30
print(paste0("I will start the process with the subsample of", i, "%"))
subABTC <- subsample(CalcABTC, i)
subModels <- unique(str_split_fixed(colnames(subABTC), "_", 2)[,1])
subIndexes <- sapply(subModels, function(M) grepl(paste0(M, "_"), colnames(subABTC)))
tryCatch(
expr = {
subModelsKCalc <- lapply(lapply(subModels, function(M1) subABTC[subIndexes[, M1], subIndexes[, M1]]), function(M2) clusGap(M2, FUN = kmeans, nstart = 15, K.max = 60, B = 100))
subMaxK30 <- sum(sapply(subModelsKCalc, function(Model) maxSE(Model$Tab[, "gap"], Model$Tab[, "SE.sim"], method = "firstSEmax", SE.factor = 1)))
},
error = function(e){
subMaxK30 <- NaN
}
)
print(paste0("I finished the calculation for the individual models' clusters with a subsample of ", i))
tryCatch(
expr = {
temp <- clusGap(subABTC, FUN = kmeans, nstart = 15, K.max = subMaxK30, B = 150)
subWholeK30<- maxSE(temp$Tab[,"gap"], temp$Tab[, "SE.sim"], method = "firstSEmax", SE.factor = 1)
},
error = function(e){
subWholeK30 <- NaN
}
)
print(paste0("I finished the calculation for the clusters of the entire subsample of ", i))

save.image(file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/ClusteringCalcs2-30.RData")
48 changes: 48 additions & 0 deletions 1250Set2Part3-Repeats-10.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
library(stringr)
library(cluster)
library(ggplot2)
library(ggdendro, lib.loc = "/mnt/Adenina/mgomez/esanchez/R/x86_64-pc-linux-gnu-library/")
library(seriation, lib.loc = "/mnt/Adenina/mgomez/esanchez/R/x86_64-pc-linux-gnu-library/")
library(stringr)
library(dplyr)

load("/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/ClusteringCalcs2-30-r10.RData")

DissimMat <- CalcABTC

pamAUTCMaxK <- pam(DissimMat, diss = T, k = subMaxK30)
ClusterMaxK <- pamAUTCMaxK$clustering
DissimMatMaxK <- DissimMat[names(sort(ClusterMaxK)), names(sort(ClusterMaxK))]
MaxKdissplot <- ggdissplot(as.dist(DissimMatMaxK), labels = sort(ClusterMaxK), method = NA, cluster_labels = T, cluster_lines = T, diag = F) + scale_fill_gradient(low = "midnightblue", high = "white", na.value = "grey75") + ggtitle("Individual Model Clustering, 30% of sample analized for clustering")

ggsave(filename = "MaxKdissplot.png", path = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/30_Plots/r10/", plot = last_plot())

DissimMat <- CalcABTC
pamAUTCWholeK <- pam(DissimMat, diss = T, k = subWholeK30)
ClusterWholeK <- pamAUTCWholeK$clustering
DissimMatWholeK <- DissimMat[names(sort(ClusterWholeK)), names(sort(ClusterWholeK))]
WholeKdissplot <- ggdissplot(as.dist(DissimMatWholeK), labels = sort(ClusterWholeK), method = NA, cluster_labels = T, cluster_lines = T, diag = F) + scale_fill_gradient(low = "midnightblue", high = "white", na.value = "grey75") + ggtitle("AllxAll Clustering, 30% of sample analized for clustering")

ggsave(filename = "WholeKdissplot.png", path = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/30_Plots/r10/", plot = last_plot())

clustermedoids <- CalcABTC[pamAUTCMaxK$medoids,pamAUTCMaxK$medoids]
colnames(clustermedoids) <- paste0("Cluster ", pamAUTCMaxK$clustering[pamAUTCMaxK$medoids])
rownames(clustermedoids) <- colnames(clustermedoids)
dendro <- hclust(as.dist(clustermedoids), method = "average")
MaxKDendrogram <- ggdendrogram(dendro) + ggtitle("Signature Behaviours according to Intra-Motif Clustering, 30% subsample for Clustering") + theme(plot.title = element_text(size = 32))

ggsave(filename = "MaxKDendrogram.png", path = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/30_Plots/r10/", plot = last_plot())

clustermedoids <- CalcABTC[pamAUTCWholeK$medoids,pamAUTCWholeK$medoids]
colnames(clustermedoids) <- paste0("Cluster ", pamAUTCWholeK$clustering[pamAUTCWholeK$medoids])
rownames(clustermedoids) <- colnames(clustermedoids)
dendro <- hclust(as.dist(clustermedoids), method = "average")
WholeKDendrogram <- ggdendrogram(dendro) + ggtitle("Signature Behaviours according to AllxAll Clustering, 30% subsample for Clustering") + theme(plot.title = element_text(size = 32))

ggsave(filename = "WholeKDendrogram.png", path = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/30_Plots/r10/", plot = last_plot())

pamAUTCMaxK_10 <- pamAUTCMaxK
pamAUTCWholeK_10 <- pamAUTCWholeK

saveRDS(pamAUTCMaxK_10, file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/30_Plots/r10/pamAUTCMaxK_10.RDS")
saveRDS(pamAUTCWholeK_10, file = "/mnt/Adenina/mgomez/esanchez/AUTC_Cluster/30_Plots/r10/pamAUTCWholeK_10.RDS")
26 changes: 26 additions & 0 deletions ARGS_ATFv2_Mass_Par_Fig2B.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"g","mY","gY","mU","gU","mW","gW","e0","eP","eM","mUs"
4.374262,0.125,240.4963,104.3550875,0.08475673,38.55807,0.04315675,0.0347173,3.92317875,446.67445,NA
4.439187,0.125,316.5751,34.6034,0.06448492,26.63861,0.04585885,0.08009755,2.52508875,319.0758,NA
4.20302,0.125,44.5206,9.1463375,0.05853416,24.25154,0.038968,0.02377785,13.3902,2.05505,NA
0.502344,0.125,253.1273,34.86275,0.08797378,82.73957,0.0377635,0.027542,9.30354,93.88545,NA
5.811915,0.125,447.2133,19.5748,0.06852534,73.84067,0.041248,0.01174935,5.42341125,283.1947,NA
3.199536,0.125,356.9257,5.242175,0.05798523,70.70673,0.09399695,0.09751705,29.40470625,482.9976,NA
0.924562,0.125,982.1279,67.71995,0.06403935,56.75868,0.0215646,0.0101163,18.51168,181.96465,NA
2.531954,0.125,130.5546,55.6405875,0.07759177,96.8361,0.05991965,0.0493312,23.91616125,136.4136,NA
0.333814,0.125,788.0254,24.9816625,0.08742756,70.95543,0.02825665,0.07897025,33.38528625,329.51345,NA
7.139948,0.125,577.8157,53.1071625,0.00057135,37.36001,0.0893851,0.06316115,7.52194875,209.5313,NA
5.663944,0.125,243.0415,90.3403875,0.0234109,86.53185,0.0221448,0.02509665,15.94825875,292.1487,NA
0.126489,0.125,630.2047,104.8545,0.08060145,35.39489,0.0160718,0.0993884,12.429075,196.0187,NA
6.814784,0.125,898.0867,111.752275,0.00899461,66.88286,0.05912405,0.04600645,1.6740825,238.43805,NA
7.131739,0.125,337.5527,11.7688875,0.04739663,56.04681,0.0682055,0.0230094,33.84166125,433.184,NA
9.736591,0.125,708.0543,22.4482,0.02812814,68.62534,0.06795265,0.0058608,23.59035,412.0414,NA
3.098781,0.125,461.9717,115.322275,0.09122547,70.43955,0.08840335,0.03175715,14.2336575,127.8691,NA
5.715657,0.125,640.3921,26.123125,0.07733903,15.70768,0.0461511,0.07627765,32.73836625,484.67725,NA
0.062508,0.125,710.7849,96.608475,0.03532782,57.91278,0.05533,0.08662505,0.95145,397.875,NA
3.759732,0.125,577.4332,92.3736875,0.02374509,66.57312,0.0212703,0.00962875,11.550225,321.00555,NA
9.207923,0.125,629.3464,3.7453,0.03559966,89.17623,0.0396337,0.06440355,30.28381125,378.2148,NA
3.250526,0.125,617.5827,98.0250375,0.08033664,75.06272,0.0629033,0.0650924,19.182285,409.7009,NA
8.514362,0.125,139.9739,99.4551,0.03371781,56.4644,0.07647985,0.00489435,36.20981625,481.0114,NA
8.203469,0.125,463.4682,30.2735125,0.0779367,56.95735,0.07451955,0.02009555,13.48105875,233.8335,NA
5.56174,0.125,280.9068,106.4765,0.00947487,23.04777,0.08030445,0.07048315,7.88084625,252.6994,NA
8.984655,0.125,289.2868,5.0697125,0.04006831,95.85524,0.09862445,0.0430645,0.5589675,128.22615,NA
Loading