diff --git a/DESCRIPTION b/DESCRIPTION index f90b3a7..2a9dc20 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,9 +9,10 @@ Description: A ggplot2 based biplot. It provides a drop-in replacement for biplot.princomp(). It implements a biplot and scree plot using ggplot2. Depends: - ggplot2, plyr, scales, grid + ggplot2, plyr, scales, grid, ggrepel License: GPL-2 URL: http://github.com/vqv/ggbiplot Collate: 'ggbiplot.r' 'ggscreeplot.r' +RoxygenNote: 6.1.1 diff --git a/R/ggbiplot.r b/R/ggbiplot.r index e0e06e6..b891492 100644 --- a/R/ggbiplot.r +++ b/R/ggbiplot.r @@ -31,12 +31,16 @@ #' @param ellipse.prob size of the ellipse in Normal probability #' @param labels optional vector of labels for the observations #' @param labels.size size of the text used for the labels +#' @param arrows.col color of the arrows #' @param alpha alpha transparency value for the points (0 = transparent, 1 = opaque) #' @param circle draw a correlation circle? (only applies when prcomp was called with scale = TRUE and when var.scale = 1) +#' @param var.subset vector of labels to show on biplot (NULL = show all labels) +#' @param var.repel whether or not to repel geom_text #' @param var.axes draw arrows for the variables? #' @param varname.size size of the text for variable names #' @param varname.adjust adjustment factor the placement of the variable names, >= 1 means farther from the arrow #' @param varname.abbrev whether or not to abbreviate the variable names +#' @param varname.col color of the labels (text only) #' #' @return a ggplot2 plot #' @export @@ -45,24 +49,26 @@ #' wine.pca <- prcomp(wine, scale. = TRUE) #' print(ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, groups = wine.class, ellipse = TRUE, circle = TRUE)) #' -ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, - obs.scale = 1 - scale, var.scale = scale, - groups = NULL, ellipse = FALSE, ellipse.prob = 0.68, - labels = NULL, labels.size = 3, alpha = 1, - var.axes = TRUE, - circle = FALSE, circle.prob = 0.69, - varname.size = 3, varname.adjust = 1.5, - varname.abbrev = FALSE, ...) +ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, + obs.scale = 1 - scale, var.scale = scale, + groups = NULL, ellipse = FALSE, ellipse.prob = 0.68, + labels = NULL, labels.size = 3, + alpha = 1, var.axes = TRUE, + circle = FALSE, circle.prob = 0.69, + var.repel = TRUE, var.subset = NULL, + varname.col = "darkred", arrows.col = varname.col, + varname.size = 3, varname.adjust = 1.5, + varname.abbrev = FALSE, ...) { library(ggplot2) library(plyr) library(scales) library(grid) - + stopifnot(length(choices) == 2) - + # Recover the SVD - if(inherits(pcobj, 'prcomp')){ + if(inherits(pcobj, 'prcomp')){ nobs.factor <- sqrt(nrow(pcobj$x) - 1) d <- pcobj$sdev u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*') @@ -78,75 +84,79 @@ ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*') v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/") } else if(inherits(pcobj, "lda")) { - nobs.factor <- sqrt(pcobj$N) - d <- pcobj$svd - u <- predict(pcobj)$x/nobs.factor - v <- pcobj$scaling - d.total <- sum(d^2) + nobs.factor <- sqrt(pcobj$N) + d <- pcobj$svd + u <- predict(pcobj)$x/nobs.factor + v <- pcobj$scaling + d.total <- sum(d^2) } else { stop('Expected a object of class prcomp, princomp, PCA, or lda') } - + # Scores choices <- pmin(choices, ncol(u)) df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*')) - + # Directions v <- sweep(v, 2, d^var.scale, FUN='*') df.v <- as.data.frame(v[, choices]) - + names(df.u) <- c('xvar', 'yvar') names(df.v) <- names(df.u) - + if(pc.biplot) { df.u <- df.u * nobs.factor } - + # Scale the radius of the correlation circle so that it corresponds to # a data ellipse for the standardized PC scores r <- sqrt(qchisq(circle.prob, df = 2)) * prod(colMeans(df.u^2))^(1/4) - + # Scale directions v.scale <- rowSums(v^2) df.v <- r * df.v / sqrt(max(v.scale)) - + # Change the labels for the axes if(obs.scale == 0) { u.axis.labs <- paste('standardized PC', choices, sep='') } else { u.axis.labs <- paste('PC', choices, sep='') } - + # Append the proportion of explained variance to the axis labels u.axis.labs <- paste(u.axis.labs, sprintf('(%0.1f%% explained var.)', 100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2))) - + # Score Labels if(!is.null(labels)) { df.u$labels <- labels } - + # Grouping variable if(!is.null(groups)) { df.u$groups <- groups } - + # Variable Names if(varname.abbrev) { df.v$varname <- abbreviate(rownames(v)) } else { df.v$varname <- rownames(v) } - + # Variables for text label placement df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar)) df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2) - + # Base plot g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) + - xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal() - + xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal() + + if(!is.null(var.subset)){ + df.v <- df.v[df.v$varname %in% var.subset, ] + } + if(var.axes) { # Draw circle if(circle) @@ -156,22 +166,22 @@ ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, g <- g + geom_path(data = circle, color = muted('white'), size = 1/2, alpha = 1/3) } - + # Draw directions g <- g + geom_segment(data = df.v, aes(x = 0, y = 0, xend = xvar, yend = yvar), arrow = arrow(length = unit(1/2, 'picas')), - color = muted('red')) + color = arrows.col) } - + # Draw either labels or points if(!is.null(df.u$labels)) { if(!is.null(df.u$groups)) { g <- g + geom_text(aes(label = labels, color = groups), size = labels.size) } else { - g <- g + geom_text(aes(label = labels), size = labels.size) + g <- g + geom_text(aes(label = labels), size = labels.size) } } else { if(!is.null(df.u$groups)) { @@ -180,12 +190,12 @@ ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, g <- g + geom_point(alpha = alpha) } } - + # Overlay a concentration ellipse if there are groups if(!is.null(df.u$groups) && ellipse) { theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) circle <- cbind(cos(theta), sin(theta)) - + ell <- ddply(df.u, 'groups', function(x) { if(nrow(x) <= 2) { return(NULL) @@ -199,22 +209,39 @@ ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, names(ell)[1:2] <- c('xvar', 'yvar') g <- g + geom_path(data = ell, aes(color = groups, group = groups)) } - + # Label the variable axes if(var.axes) { - g <- g + - geom_text(data = df.v, - aes(label = varname, x = xvar, y = yvar, - angle = angle, hjust = hjust), - color = 'darkred', size = varname.size) + if(var.repel){ + + g <- g + + geom_text_repel(data = df.v, + aes(label = varname, x = xvar, y = yvar, + #angle = angle, + hjust = hjust), + segment.alpha = .5, + segment.color = varname.col, + colour = varname.col, + size = varname.size) + + }else{ + + g <- g + + geom_text_repel(data = df.v, + aes(label = varname, x = xvar, y = yvar, + angle = angle, hjust = hjust), + colour = varname.col, + size = varname.size) + } + } # Change the name of the legend for groups # if(!is.null(groups)) { # g <- g + scale_color_brewer(name = deparse(substitute(groups)), # palette = 'Dark2') # } - + # TODO: Add a second set of axes - + return(g) } diff --git a/ggbiplot.Rproj b/ggbiplot.Rproj index dead601..cc1983d 100644 --- a/ggbiplot.Rproj +++ b/ggbiplot.Rproj @@ -15,3 +15,4 @@ LaTeX: XeLaTeX BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/man/ggbiplot.Rd b/man/ggbiplot.Rd index 82e2070..e4a4c14 100644 --- a/man/ggbiplot.Rd +++ b/man/ggbiplot.Rd @@ -1,74 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ggbiplot.r \name{ggbiplot} \alias{ggbiplot} \title{Biplot for Principal Components using ggplot2} \usage{ - ggbiplot(pcobj, choices = 1:2, scale = 1, pc.biplot = - TRUE, obs.scale = 1 - scale, var.scale = scale, groups = - NULL, ellipse = FALSE, ellipse.prob = 0.68, labels = - NULL, labels.size = 3, alpha = 1, var.axes = TRUE, circle - = FALSE, circle.prob = 0.69, varname.size = 3, - varname.adjust = 1.5, varname.abbrev = FALSE, ...) +ggbiplot(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, + obs.scale = 1 - scale, var.scale = scale, groups = NULL, + ellipse = FALSE, ellipse.prob = 0.68, labels = NULL, + labels.size = 3, alpha = 1, var.axes = TRUE, circle = FALSE, + circle.prob = 0.69, var.repel = TRUE, var.subset = NULL, + varname.col = "darkred", arrows.col = varname.col, + varname.size = 3, varname.adjust = 1.5, varname.abbrev = FALSE, + ...) } \arguments{ - \item{pcobj}{an object returned by prcomp() or - princomp()} +\item{pcobj}{an object returned by prcomp() or princomp()} - \item{choices}{which PCs to plot} +\item{choices}{which PCs to plot} - \item{scale}{covariance biplot (scale = 1), form biplot - (scale = 0). When scale = 1, the inner product between - the variables approximates the covariance and the - distance between the points approximates the Mahalanobis - distance.} +\item{scale}{covariance biplot (scale = 1), form biplot (scale = 0). When scale = 1, the inner product between the variables approximates the covariance and the distance between the points approximates the Mahalanobis distance.} - \item{obs.scale}{scale factor to apply to observations} +\item{pc.biplot}{for compatibility with biplot.princomp()} - \item{var.scale}{scale factor to apply to variables} +\item{obs.scale}{scale factor to apply to observations} - \item{pc.biplot}{for compatibility with - biplot.princomp()} +\item{var.scale}{scale factor to apply to variables} - \item{groups}{optional factor variable indicating the - groups that the observations belong to. If provided the - points will be colored according to groups} +\item{groups}{optional factor variable indicating the groups that the observations belong to. If provided the points will be colored according to groups} - \item{ellipse}{draw a normal data ellipse for each - group?} +\item{ellipse}{draw a normal data ellipse for each group?} - \item{ellipse.prob}{size of the ellipse in Normal - probability} +\item{ellipse.prob}{size of the ellipse in Normal probability} - \item{labels}{optional vector of labels for the - observations} +\item{labels}{optional vector of labels for the observations} - \item{labels.size}{size of the text used for the labels} +\item{labels.size}{size of the text used for the labels} - \item{alpha}{alpha transparency value for the points (0 = - TRUEransparent, 1 = opaque)} +\item{alpha}{alpha transparency value for the points (0 = transparent, 1 = opaque)} - \item{circle}{draw a correlation circle? (only applies - when prcomp was called with scale = TRUE and when - var.scale = 1)} +\item{var.axes}{draw arrows for the variables?} - \item{var.axes}{draw arrows for the variables?} +\item{circle}{draw a correlation circle? (only applies when prcomp was called with scale = TRUE and when var.scale = 1)} - \item{varname.size}{size of the text for variable names} +\item{var.repel}{whether or not to repel geom_text} - \item{varname.adjust}{adjustment factor the placement of - the variable names, >= 1 means farther from the arrow} +\item{var.subset}{vector of labels to show on biplot (NULL = show all labels)} - \item{varname.abbrev}{whether or not to abbreviate the - variable names} +\item{varname.col}{color of the labels (text only)} + +\item{arrows.col}{color of the arrows} + +\item{varname.size}{size of the text for variable names} + +\item{varname.adjust}{adjustment factor the placement of the variable names, >= 1 means farther from the arrow} + +\item{varname.abbrev}{whether or not to abbreviate the variable names} } \value{ - a ggplot2 plot +a ggplot2 plot } \description{ - Biplot for Principal Components using ggplot2 +Biplot for Principal Components using ggplot2 } \examples{ -data(wine) -wine.pca <- prcomp(wine, scale. = TRUE) -print(ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, groups = wine.class, ellipse = TRUE, circle = TRUE)) -} + data(wine) + wine.pca <- prcomp(wine, scale. = TRUE) + print(ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, groups = wine.class, ellipse = TRUE, circle = TRUE)) +}