Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions R/generateHypotheses.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ calculatePathlength <- function(binaryInteractionTable){
#' @param redundancy_cutoff Numeric, maximum overlap distance between two hypotheses
#' (0=identical,1=subset,between 1 and 2=some shared subunits, 2=no shared subunits).
#' Defaults to 1.
#'
#' @param clust_method character string, Which method to use for clustering (see function hclust for more info).
#' Defaults to "complete".
#' @return data.table in the format of complex hypotheses.
#' Has the following columns:
#' \itemize{
Expand All @@ -124,7 +125,8 @@ calculatePathlength <- function(binaryInteractionTable){

generateComplexTargets <- function(dist_info,
max_distance=1,
redundancy_cutoff=1){
redundancy_cutoff=1,
clust_method = "complete"){

all_proteins <- unique(c(dist_info$x,dist_info$y))

Expand All @@ -142,14 +144,17 @@ generateComplexTargets <- function(dist_info,

## Remove redundant hypotheses
complex_table <- .collapseWideHypothesis(hypothesis = initial_complexes,
redundancy_cutoff = redundancy_cutoff)
redundancy_cutoff = redundancy_cutoff,
clust_method = clust_method)
return(complex_table)
}

#' Collapse redundant hypotheses
#' @description Remove redundancy in existing complex hypotheses
#' @import data.table
#' @param hypothesis data.table with complex hypotheses
#' @param clust_method character string, Which method to use for clustering (see function hclust for more info).
#' Defaults to "complete".
#' Must have the following columns:
#' \itemize{
#' \item complex_id: character strings, a unique id for every complex
Expand Down Expand Up @@ -180,14 +185,16 @@ generateComplexTargets <- function(dist_info,
#'
#'
collapseHypothesis <- function(hypothesis,
redundancy_cutoff=1){
redundancy_cutoff=1,
clust_method = "complete"){
hyp <- copy(hypothesis)
hyp[,n_subunits := length(protein_id), by="complex_id"]
hyp[,subunits_detected := paste(protein_id,collapse=";"),by="complex_id"]
hyp <- unique(hyp, by="complex_id")
hyp[,protein_id := NULL]

hypothesis_unique <- .collapseWideHypothesis(hyp, redundancy_cutoff = redundancy_cutoff)
hypothesis_unique <- .collapseWideHypothesis(hyp, redundancy_cutoff = redundancy_cutoff,
clust_method = clust_method)
return(hypothesis_unique)
}

Expand Down
13 changes: 9 additions & 4 deletions R/generateHypothesesHelpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@ getDistanceMatrix <- function(complexFeatures){
#' @description complexClustering.
#' @param complexFeature data.table containing filtered complex feature results.
#' @param dist_mat distance matrix from getDistanceMatrix
#' @param clust_method character string, Which method to use for clustering (see function hclust for more info).
#' Defaults to "complete".
#' @return cluster object
#' @export
complexClustering <- function(complexFeatures,dist_mat){
hc <- hclust(dist_mat)
complexClustering <- function(complexFeatures,dist_mat, clust_method = "complete"){
hc <- hclust(dist_mat, method = clust_method)
hc$labels = complexFeatures$consecutive_feature_identifier
hc
}
Expand All @@ -73,13 +75,16 @@ complexClustering <- function(complexFeatures,dist_mat){
#' @import data.table
#' @param hypothesis data.table with complex hypotheses
#' @param redundancy_cutoff numeric maximum overlap distance between two hypotheses (0=identical,1=subset,between 1 and 2=some shared subunits, 2=no shared subunits), default=1
#' @param clust_method character string, Which method to use for clustering (see function hclust for more info).
#' Defaults to "complete".
#' @return data.table in the format of complex hypotheses

.collapseWideHypothesis <- function(hypothesis,
redundancy_cutoff=1){
redundancy_cutoff=1,
clust_method = "complete"){

dist_hyp <- getDistanceMatrix(hypothesis)
clust_hyp <- complexClustering(hypothesis,dist_hyp)
clust_hyp <- complexClustering(hypothesis,dist_hyp, clust_method)
tree_cut=cutree(clust_hyp,h=redundancy_cutoff)
hypothesis[,consecutive_feature_identifier := .I]
hypothesis[,unique_feature_identifier := 0]
Expand Down