diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf..8453a6f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,4 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^README\.Rmd$ +^README-.*\.png$ diff --git a/R/ggbiplot.r b/R/ggbiplot.r index 2111924..b8cb99f 100644 --- a/R/ggbiplot.r +++ b/R/ggbiplot.r @@ -31,7 +31,7 @@ #' @param ellipse.prob size of the ellipse in Normal probability #' @param labels optional vector of labels for the observations #' @param labels.size size of the text used for the labels -#' @param alpha alpha transparency value for the points (0 = TRUEransparent, 1 = opaque) +#' @param alpha alpha transparency value for the points (0 = transparent, 1 = opaque) #' @param circle draw a correlation circle? (only applies when prcomp was called with scale = TRUE and when var.scale = 1) #' @param var.axes draw arrows for the variables? #' @param varname.size size of the text for variable names @@ -45,179 +45,217 @@ #' wine.pca <- prcomp(wine, scale. = TRUE) #' print(ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, groups = wine.class, ellipse = TRUE, circle = TRUE)) #' -ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, - obs.scale = 1 - scale, var.scale = scale, - groups = NULL, ellipse = FALSE, ellipse.prob = 0.68, - labels = NULL, labels.size = 3, alpha = 1, - var.axes = TRUE, - circle = FALSE, circle.prob = 0.69, - varname.size = 3, varname.adjust = 1.5, - varname.abbrev = FALSE, ...) +ggbiplot2 <- function (pcobj, choices = 1:2, coi = "all", scale = 1, pc.biplot = TRUE, # incl. new arg + obs.scale = 1 - scale, var.scale = scale, groups = NULL, + ellipse = FALSE, ellipse.prob = 0.68, ellipse.alpha = 1, labels = NULL, labels.size = 3, + alpha = 1, var.axes = TRUE, circle = FALSE, circle.prob = 0.69, + varname.size = 3, varname.adjust = 1.5, varname.abbrev = FALSE, + arrow.color = muted("red"), arrow.linetype = "solid", arrow.alpha = 1, # incl. new args + ...) { library(ggplot2) library(plyr) library(scales) library(grid) - - stopifnot(length(choices) == 2) - + # Recover the SVD - if(inherits(pcobj, 'prcomp')){ + stopifnot(length(choices) == 2) + switch1 <- FALSE + if (inherits(pcobj, "prcomp")) { nobs.factor <- sqrt(nrow(pcobj$x) - 1) d <- pcobj$sdev - u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*') + u <- sweep(pcobj$x, 2, 1/(d * nobs.factor), FUN = "*") v <- pcobj$rotation - } else if(inherits(pcobj, 'princomp')) { + } else if (inherits(pcobj, "princomp")) { + # incl. switch to use later on for determination of package used for ordination. + switch1 <- TRUE nobs.factor <- sqrt(pcobj$n.obs) d <- pcobj$sdev - u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*') + u <- sweep(pcobj$scores, 2, 1/(d * nobs.factor), FUN = "*") v <- pcobj$loadings - } else if(inherits(pcobj, 'PCA')) { + ln <- length(names(pcobj$scale)) + } else if (inherits(pcobj, "PCA")) { nobs.factor <- sqrt(nrow(pcobj$call$X)) d <- unlist(sqrt(pcobj$eig)[1]) - u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*') - v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/") - } else if(inherits(pcobj, "lda")) { - nobs.factor <- sqrt(pcobj$N) - d <- pcobj$svd - u <- predict(pcobj)$x/nobs.factor - v <- pcobj$scaling - d.total <- sum(d^2) + u <- sweep(pcobj$ind$coord, 2, 1/(d * nobs.factor), FUN = "*") + v <- sweep(pcobj$var$coord, 2, sqrt(pcobj$eig[1:ncol(pcobj$var$coord), + 1]), FUN = "/") + } else if (inherits(pcobj, "lda")) { + nobs.factor <- sqrt(pcobj$N) + d <- pcobj$svd + u <- predict(pcobj)$x/nobs.factor + v <- pcobj$scaling + d.total <- sum(d^2) } else { - stop('Expected a object of class prcomp, princomp, PCA, or lda') + stop("Expected a object of class prcomp, princomp, PCA, or lda") } - + + # manage columns of interest (coi) + if(switch1){ + if(coi=="all"){ + coi = list(c(names(pcobj$scale))) + cnoi = NULL + } else { + if(!is.list(coi)){ + stop("'coi' must be of type list") + } + cnoi <- names(pcobj$scale)[!names(pcobj$scale) %in% unlist(coi)] + } + lcoi <- length(coi) + lcnoi <- length(cnoi) + } + # Scores choices <- pmin(choices, ncol(u)) - df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*')) - + df.u <- as.data.frame(sweep(u[, choices], 2, d[choices]^obs.scale, + FUN = "*")) + # Directions - v <- sweep(v, 2, d^var.scale, FUN='*') + v <- sweep(v, 2, d^var.scale, FUN = "*") df.v <- as.data.frame(v[, choices]) - - names(df.u) <- c('xvar', 'yvar') + + names(df.u) <- c("xvar", "yvar") names(df.v) <- names(df.u) - - if(pc.biplot) { + if (pc.biplot) { df.u <- df.u * nobs.factor } - + # Scale the radius of the correlation circle so that it corresponds to # a data ellipse for the standardized PC scores r <- sqrt(qchisq(circle.prob, df = 2)) * prod(colMeans(df.u^2))^(1/4) - + # Scale directions v.scale <- rowSums(v^2) - df.v <- r * df.v / sqrt(max(v.scale)) - + df.v <- r * df.v/sqrt(max(v.scale)) + # Change the labels for the axes - if(obs.scale == 0) { - u.axis.labs <- paste('standardized PC', choices, sep='') + if (obs.scale == 0) { + u.axis.labs <- paste("standardized PC", choices, sep = "") } else { - u.axis.labs <- paste('PC', choices, sep='') + u.axis.labs <- paste("PC", choices, sep = "") } - + # Append the proportion of explained variance to the axis labels - u.axis.labs <- paste(u.axis.labs, - sprintf('(%0.1f%% explained var.)', - 100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2))) - + u.axis.labs <- paste(u.axis.labs, sprintf("(%0.1f%% explained var.)", + 100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2))) + # Score Labels - if(!is.null(labels)) { + if (!is.null(labels)) { df.u$labels <- labels } - + # Grouping variable - if(!is.null(groups)) { + if (!is.null(groups)) { df.u$groups <- groups } - + # Variable Names - if(varname.abbrev) { + if (varname.abbrev) { df.v$varname <- abbreviate(rownames(v)) } else { df.v$varname <- rownames(v) } - + # Variables for text label placement - df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar)) - df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2) - + df.v$angle <- with(df.v, (180/pi) * atan(yvar/xvar)) + df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar))/2) + + # colour management + if(switch1){ + alphas <- rep(1, ln) + colours <- rep(muted("red"), ln) + if(lcoi+1 != length(arrow.color) || lcoi+1!= length(arrow.linetype) || lcoi+1!= length(arrow.alpha)){ + warning("styles can maybe not be changed meaningfully when the respective arguments don't have same lenghts.") + } + for(i in 1:(lcoi+1)){ + if(i==1){ + alphas[ names(pcobj$scale) %in% cnoi] <- arrow.alpha[i] + colours[ names(pcobj$scale) %in% cnoi] <- arrow.color[i] + linetypes[ names(pcobj$scale) %in% cnoi] <- arrow.linetype[i] + } else { + alphas[ names(pcobj$scale) %in% coi[[i-1]] ] <- arrow.alpha[i] + colours[ names(pcobj$scale) %in% coi[[i-1]] ] <- arrow.color[i] + linetypes[ names(pcobj$scale) %in% coi[[i-1]] ] <- arrow.linetype[i] + } + } + } + # Base plot - g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) + - xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal() - - if(var.axes) { + g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) + xlab(u.axis.labs[1]) + + ylab(u.axis.labs[2]) + coord_equal() + + if (var.axes) { # Draw circle - if(circle) - { - theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) - circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta)) - g <- g + geom_path(data = circle, color = muted('white'), + if (circle) { + theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, + length = 50)) + circle <- data.frame(xvar = r * cos(theta), yvar = r * + sin(theta)) + g <- g + geom_path(data = circle, color = muted("white"), size = 1/2, alpha = 1/3) } - + # Draw directions - g <- g + - geom_segment(data = df.v, - aes(x = 0, y = 0, xend = xvar, yend = yvar), - arrow = arrow(length = unit(1/2, 'picas')), - color = muted('red')) + g <- g + + geom_segment(data = df.v, + aes(x = 0, y = 0, xend = xvar, yend = yvar), + arrow = arrow(length = unit(1/2, "picas")), + color = colours, + linetype = linetypes, + alpha = alphas) } - + # Draw either labels or points - if(!is.null(df.u$labels)) { - if(!is.null(df.u$groups)) { + if (!is.null(df.u$labels)) { + if (!is.null(df.u$groups)) { g <- g + geom_text(aes(label = labels, color = groups), size = labels.size) } else { - g <- g + geom_text(aes(label = labels), size = labels.size) + g <- g + geom_text(aes(label = labels), size = labels.size) } } else { - if(!is.null(df.u$groups)) { + if (!is.null(df.u$groups)) { g <- g + geom_point(aes(color = groups), alpha = alpha) } else { - g <- g + geom_point(alpha = alpha) + g <- g + geom_point(alpha = alpha) } } - + # Overlay a concentration ellipse if there are groups - if(!is.null(df.u$groups) && ellipse) { + if (!is.null(df.u$groups) && ellipse) { theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50)) circle <- cbind(cos(theta), sin(theta)) - - ell <- ddply(df.u, 'groups', function(x) { - if(nrow(x) < 2) { + + ell <- ddply(df.u, "groups", function(x) { + if (nrow(x) <= 2) { return(NULL) - } else if(nrow(x) == 2) { - sigma <- var(cbind(x$xvar, x$yvar)) - } else { - sigma <- diag(c(var(x$xvar), var(x$yvar))) } + sigma <- var(cbind(x$xvar, x$yvar)) mu <- c(mean(x$xvar), mean(x$yvar)) ed <- sqrt(qchisq(ellipse.prob, df = 2)) - data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'), - groups = x$groups[1]) + data.frame(sweep(circle %*% chol(sigma) * ed, 2, + mu, FUN = "+"), groups = x$groups[1]) }) - names(ell)[1:2] <- c('xvar', 'yvar') - g <- g + geom_path(data = ell, aes(color = groups, group = groups)) + names(ell)[1:2] <- c("xvar", "yvar") + g <- g + geom_path(data = ell, aes(color = groups, group = groups), alpha = ellipse.alpha) } - + # Label the variable axes - if(var.axes) { + if (var.axes) { g <- g + - geom_text(data = df.v, - aes(label = varname, x = xvar, y = yvar, - angle = angle, hjust = hjust), - color = 'darkred', size = varname.size) + geom_text(data = df.v, + aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust), + color = colours, + size = varname.size, + alpha = alphas) } # Change the name of the legend for groups # if(!is.null(groups)) { # g <- g + scale_color_brewer(name = deparse(substitute(groups)), # palette = 'Dark2') # } - + # TODO: Add a second set of axes - + return(g) -} +} \ No newline at end of file diff --git a/README-wine-example-1.png b/README-wine-example-1.png new file mode 100644 index 0000000..3e0873e Binary files /dev/null and b/README-wine-example-1.png differ diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..9e9424c --- /dev/null +++ b/README.Rmd @@ -0,0 +1,47 @@ +--- +output: + md_document: + variant: markdown_github +--- + + + +**NEWS**: Active development of ggbiplot has moved to the [experimental branch](https://github.com/vqv/ggbiplot/tree/experimental) + +```{r, echo = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "README-" +) +``` + +ggbiplot +======== + +An implementation of the biplot using ggplot2. The package provides two functions: `ggscreeplot()` and `ggbiplot()`. +`ggbiplot` aims to be a drop-in replacement for the built-in R function `biplot.princomp()` with extended functionality +for labeling groups, drawing a correlation circle, and adding Normal probability ellipsoids. + +*The development of this software was supported in part by NSF Postdoctoral Fellowship DMS-0903120* + +Installation +------------ + +```r +library(devtools) +install_github("vqv/ggbiplot") +``` + +Example Usage +------------- + +```{r wine-example, message = FALSE, warning = FALSE} +library(ggbiplot) +data(wine) +wine.pca <- prcomp(wine, scale. = TRUE) +ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, + groups = wine.class, ellipse = TRUE, circle = TRUE) + + scale_color_discrete(name = '') + + theme(legend.direction = 'horizontal', legend.position = 'top') +``` diff --git a/README.markdown b/README.markdown deleted file mode 100644 index 0e94128..0000000 --- a/README.markdown +++ /dev/null @@ -1,27 +0,0 @@ -ggbiplot -======== - -An implementation of the biplot using ggplot2. The package provides two functions: `ggscreeplot()` and `ggbiplot()`. -`ggbiplot` aims to be a drop-in replacement for the built-in R function `biplot.princomp()` with extended functionality -for labeling groups, drawing a correlation circle, and adding Normal probability ellipsoids. - -*The development of this software was supported in part by NSF Postdoctoral Fellowship DMS-0903120* - -Installation ------------- - - library(devtools) - install_github("ggbiplot", "vqv") - -Example Usage -------------- - - library(ggbiplot) - data(wine) - wine.pca <- prcomp(wine, scale. = TRUE) - g <- ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, - groups = wine.class, ellipse = TRUE, circle = TRUE) - g <- g + scale_color_discrete(name = '') - g <- g + opts(legend.direction = 'horizontal', - legend.position = 'top') - print(g) \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..f4e3aeb --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ + + +**NEWS**: Active development of ggbiplot has moved to the [experimental branch](https://github.com/vqv/ggbiplot/tree/experimental) + +ggbiplot +======== + +An implementation of the biplot using ggplot2. The package provides two functions: `ggscreeplot()` and `ggbiplot()`. `ggbiplot` aims to be a drop-in replacement for the built-in R function `biplot.princomp()` with extended functionality for labeling groups, drawing a correlation circle, and adding Normal probability ellipsoids. + +*The development of this software was supported in part by NSF Postdoctoral Fellowship DMS-0903120* + +Installation +------------ + +``` r +library(devtools) +install_github("vqv/ggbiplot") +``` + +Example Usage +------------- + +``` r +library(ggbiplot) +data(wine) +wine.pca <- prcomp(wine, scale. = TRUE) +ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, + groups = wine.class, ellipse = TRUE, circle = TRUE) + + scale_color_discrete(name = '') + + theme(legend.direction = 'horizontal', legend.position = 'top') +``` + +![](README-wine-example-1.png) diff --git a/ggbiplot2.Rproj b/ggbiplot2.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/ggbiplot2.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/man/ggbiplot.Rd b/man/ggbiplot.Rd index 82e2070..8f2bb4f 100644 --- a/man/ggbiplot.Rd +++ b/man/ggbiplot.Rd @@ -15,6 +15,14 @@ \item{choices}{which PCs to plot} + \item{coi}{define a set of columns of the original + data-frame (variables) which should be coloured in a + certain way. This has to be a list with as many list + elements as there should be different colours in the + final plot. Each list element should contain these + variable names, that should be coloured in the specified + colours.} + \item{scale}{covariance biplot (scale = 1), form biplot (scale = 0). When scale = 1, the inner product between the variables approximates the covariance and the @@ -59,6 +67,18 @@ \item{varname.abbrev}{whether or not to abbreviate the variable names} + + \item{arrow.color}{define a vector of colours which + should be applied to the names specified in 'coi'. The + length of this vector needs to be the lenght of coi + (which is a list) + 1. The first element should be the + colour for all unspecified elements (default colour).} + + \item{arrow.linetype}{same as arrow.colour, only for + linetype of the arrows.} + + \item{arrow.alpha}{same as arrow.colour, only for + linetype of the arrows.} } \value{ a ggplot2 plot