Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
^.*\.Rproj$
^\.Rproj\.user$
^README\.Rmd$
^README-.*\.png$
234 changes: 136 additions & 98 deletions R/ggbiplot.r
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#' @param ellipse.prob size of the ellipse in Normal probability
#' @param labels optional vector of labels for the observations
#' @param labels.size size of the text used for the labels
#' @param alpha alpha transparency value for the points (0 = TRUEransparent, 1 = opaque)
#' @param alpha alpha transparency value for the points (0 = transparent, 1 = opaque)
#' @param circle draw a correlation circle? (only applies when prcomp was called with scale = TRUE and when var.scale = 1)
#' @param var.axes draw arrows for the variables?
#' @param varname.size size of the text for variable names
Expand All @@ -45,179 +45,217 @@
#' wine.pca <- prcomp(wine, scale. = TRUE)
#' print(ggbiplot(wine.pca, obs.scale = 1, var.scale = 1, groups = wine.class, ellipse = TRUE, circle = TRUE))
#'
ggbiplot <- function(pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE,
obs.scale = 1 - scale, var.scale = scale,
groups = NULL, ellipse = FALSE, ellipse.prob = 0.68,
labels = NULL, labels.size = 3, alpha = 1,
var.axes = TRUE,
circle = FALSE, circle.prob = 0.69,
varname.size = 3, varname.adjust = 1.5,
varname.abbrev = FALSE, ...)
ggbiplot2 <- function (pcobj, choices = 1:2, coi = "all", scale = 1, pc.biplot = TRUE, # incl. new arg
obs.scale = 1 - scale, var.scale = scale, groups = NULL,
ellipse = FALSE, ellipse.prob = 0.68, ellipse.alpha = 1, labels = NULL, labels.size = 3,
alpha = 1, var.axes = TRUE, circle = FALSE, circle.prob = 0.69,
varname.size = 3, varname.adjust = 1.5, varname.abbrev = FALSE,
arrow.color = muted("red"), arrow.linetype = "solid", arrow.alpha = 1, # incl. new args
...)
{
library(ggplot2)
library(plyr)
library(scales)
library(grid)

stopifnot(length(choices) == 2)


# Recover the SVD
if(inherits(pcobj, 'prcomp')){
stopifnot(length(choices) == 2)
switch1 <- FALSE
if (inherits(pcobj, "prcomp")) {
nobs.factor <- sqrt(nrow(pcobj$x) - 1)
d <- pcobj$sdev
u <- sweep(pcobj$x, 2, 1 / (d * nobs.factor), FUN = '*')
u <- sweep(pcobj$x, 2, 1/(d * nobs.factor), FUN = "*")
v <- pcobj$rotation
} else if(inherits(pcobj, 'princomp')) {
} else if (inherits(pcobj, "princomp")) {
# incl. switch to use later on for determination of package used for ordination.
switch1 <- TRUE
nobs.factor <- sqrt(pcobj$n.obs)
d <- pcobj$sdev
u <- sweep(pcobj$scores, 2, 1 / (d * nobs.factor), FUN = '*')
u <- sweep(pcobj$scores, 2, 1/(d * nobs.factor), FUN = "*")
v <- pcobj$loadings
} else if(inherits(pcobj, 'PCA')) {
ln <- length(names(pcobj$scale))
} else if (inherits(pcobj, "PCA")) {
nobs.factor <- sqrt(nrow(pcobj$call$X))
d <- unlist(sqrt(pcobj$eig)[1])
u <- sweep(pcobj$ind$coord, 2, 1 / (d * nobs.factor), FUN = '*')
v <- sweep(pcobj$var$coord,2,sqrt(pcobj$eig[1:ncol(pcobj$var$coord),1]),FUN="/")
} else if(inherits(pcobj, "lda")) {
nobs.factor <- sqrt(pcobj$N)
d <- pcobj$svd
u <- predict(pcobj)$x/nobs.factor
v <- pcobj$scaling
d.total <- sum(d^2)
u <- sweep(pcobj$ind$coord, 2, 1/(d * nobs.factor), FUN = "*")
v <- sweep(pcobj$var$coord, 2, sqrt(pcobj$eig[1:ncol(pcobj$var$coord),
1]), FUN = "/")
} else if (inherits(pcobj, "lda")) {
nobs.factor <- sqrt(pcobj$N)
d <- pcobj$svd
u <- predict(pcobj)$x/nobs.factor
v <- pcobj$scaling
d.total <- sum(d^2)
} else {
stop('Expected a object of class prcomp, princomp, PCA, or lda')
stop("Expected a object of class prcomp, princomp, PCA, or lda")
}


# manage columns of interest (coi)
if(switch1){
if(coi=="all"){
coi = list(c(names(pcobj$scale)))
cnoi = NULL
} else {
if(!is.list(coi)){
stop("'coi' must be of type list")
}
cnoi <- names(pcobj$scale)[!names(pcobj$scale) %in% unlist(coi)]
}
lcoi <- length(coi)
lcnoi <- length(cnoi)
}

# Scores
choices <- pmin(choices, ncol(u))
df.u <- as.data.frame(sweep(u[,choices], 2, d[choices]^obs.scale, FUN='*'))

df.u <- as.data.frame(sweep(u[, choices], 2, d[choices]^obs.scale,
FUN = "*"))

# Directions
v <- sweep(v, 2, d^var.scale, FUN='*')
v <- sweep(v, 2, d^var.scale, FUN = "*")
df.v <- as.data.frame(v[, choices])

names(df.u) <- c('xvar', 'yvar')
names(df.u) <- c("xvar", "yvar")
names(df.v) <- names(df.u)

if(pc.biplot) {
if (pc.biplot) {
df.u <- df.u * nobs.factor
}

# Scale the radius of the correlation circle so that it corresponds to
# a data ellipse for the standardized PC scores
r <- sqrt(qchisq(circle.prob, df = 2)) * prod(colMeans(df.u^2))^(1/4)

# Scale directions
v.scale <- rowSums(v^2)
df.v <- r * df.v / sqrt(max(v.scale))

df.v <- r * df.v/sqrt(max(v.scale))
# Change the labels for the axes
if(obs.scale == 0) {
u.axis.labs <- paste('standardized PC', choices, sep='')
if (obs.scale == 0) {
u.axis.labs <- paste("standardized PC", choices, sep = "")
} else {
u.axis.labs <- paste('PC', choices, sep='')
u.axis.labs <- paste("PC", choices, sep = "")
}

# Append the proportion of explained variance to the axis labels
u.axis.labs <- paste(u.axis.labs,
sprintf('(%0.1f%% explained var.)',
100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2)))

u.axis.labs <- paste(u.axis.labs, sprintf("(%0.1f%% explained var.)",
100 * pcobj$sdev[choices]^2/sum(pcobj$sdev^2)))

# Score Labels
if(!is.null(labels)) {
if (!is.null(labels)) {
df.u$labels <- labels
}

# Grouping variable
if(!is.null(groups)) {
if (!is.null(groups)) {
df.u$groups <- groups
}

# Variable Names
if(varname.abbrev) {
if (varname.abbrev) {
df.v$varname <- abbreviate(rownames(v))
} else {
df.v$varname <- rownames(v)
}

# Variables for text label placement
df.v$angle <- with(df.v, (180/pi) * atan(yvar / xvar))
df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar)) / 2)

df.v$angle <- with(df.v, (180/pi) * atan(yvar/xvar))
df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar))/2)

# colour management
if(switch1){
alphas <- rep(1, ln)
colours <- rep(muted("red"), ln)
if(lcoi+1 != length(arrow.color) || lcoi+1!= length(arrow.linetype) || lcoi+1!= length(arrow.alpha)){
warning("styles can maybe not be changed meaningfully when the respective arguments don't have same lenghts.")
}
for(i in 1:(lcoi+1)){
if(i==1){
alphas[ names(pcobj$scale) %in% cnoi] <- arrow.alpha[i]
colours[ names(pcobj$scale) %in% cnoi] <- arrow.color[i]
linetypes[ names(pcobj$scale) %in% cnoi] <- arrow.linetype[i]
} else {
alphas[ names(pcobj$scale) %in% coi[[i-1]] ] <- arrow.alpha[i]
colours[ names(pcobj$scale) %in% coi[[i-1]] ] <- arrow.color[i]
linetypes[ names(pcobj$scale) %in% coi[[i-1]] ] <- arrow.linetype[i]
}
}
}

# Base plot
g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) +
xlab(u.axis.labs[1]) + ylab(u.axis.labs[2]) + coord_equal()

if(var.axes) {
g <- ggplot(data = df.u, aes(x = xvar, y = yvar)) + xlab(u.axis.labs[1]) +
ylab(u.axis.labs[2]) + coord_equal()
if (var.axes) {
# Draw circle
if(circle)
{
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- data.frame(xvar = r * cos(theta), yvar = r * sin(theta))
g <- g + geom_path(data = circle, color = muted('white'),
if (circle) {
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi,
length = 50))
circle <- data.frame(xvar = r * cos(theta), yvar = r *
sin(theta))
g <- g + geom_path(data = circle, color = muted("white"),
size = 1/2, alpha = 1/3)
}

# Draw directions
g <- g +
geom_segment(data = df.v,
aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = arrow(length = unit(1/2, 'picas')),
color = muted('red'))
g <- g +
geom_segment(data = df.v,
aes(x = 0, y = 0, xend = xvar, yend = yvar),
arrow = arrow(length = unit(1/2, "picas")),
color = colours,
linetype = linetypes,
alpha = alphas)
}

# Draw either labels or points
if(!is.null(df.u$labels)) {
if(!is.null(df.u$groups)) {
if (!is.null(df.u$labels)) {
if (!is.null(df.u$groups)) {
g <- g + geom_text(aes(label = labels, color = groups),
size = labels.size)
} else {
g <- g + geom_text(aes(label = labels), size = labels.size)
g <- g + geom_text(aes(label = labels), size = labels.size)
}
} else {
if(!is.null(df.u$groups)) {
if (!is.null(df.u$groups)) {
g <- g + geom_point(aes(color = groups), alpha = alpha)
} else {
g <- g + geom_point(alpha = alpha)
g <- g + geom_point(alpha = alpha)
}
}

# Overlay a concentration ellipse if there are groups
if(!is.null(df.u$groups) && ellipse) {
if (!is.null(df.u$groups) && ellipse) {
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- cbind(cos(theta), sin(theta))

ell <- ddply(df.u, 'groups', function(x) {
if(nrow(x) < 2) {
ell <- ddply(df.u, "groups", function(x) {
if (nrow(x) <= 2) {
return(NULL)
} else if(nrow(x) == 2) {
sigma <- var(cbind(x$xvar, x$yvar))
} else {
sigma <- diag(c(var(x$xvar), var(x$yvar)))
}
sigma <- var(cbind(x$xvar, x$yvar))
mu <- c(mean(x$xvar), mean(x$yvar))
ed <- sqrt(qchisq(ellipse.prob, df = 2))
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'),
groups = x$groups[1])
data.frame(sweep(circle %*% chol(sigma) * ed, 2,
mu, FUN = "+"), groups = x$groups[1])
})
names(ell)[1:2] <- c('xvar', 'yvar')
g <- g + geom_path(data = ell, aes(color = groups, group = groups))
names(ell)[1:2] <- c("xvar", "yvar")
g <- g + geom_path(data = ell, aes(color = groups, group = groups), alpha = ellipse.alpha)
}

# Label the variable axes
if(var.axes) {
if (var.axes) {
g <- g +
geom_text(data = df.v,
aes(label = varname, x = xvar, y = yvar,
angle = angle, hjust = hjust),
color = 'darkred', size = varname.size)
geom_text(data = df.v,
aes(label = varname, x = xvar, y = yvar, angle = angle, hjust = hjust),
color = colours,
size = varname.size,
alpha = alphas)
}
# Change the name of the legend for groups
# if(!is.null(groups)) {
# g <- g + scale_color_brewer(name = deparse(substitute(groups)),
# palette = 'Dark2')
# }

# TODO: Add a second set of axes

return(g)
}
}
Binary file added README-wine-example-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 47 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
---
output:
md_document:
variant: markdown_github
---

<!-- README.md is generated from README.Rmd. Please edit that file -->

**NEWS**: Active development of ggbiplot has moved to the [experimental branch](https://github.com/vqv/ggbiplot/tree/experimental)

```{r, echo = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "README-"
)
```

ggbiplot
========

An implementation of the biplot using ggplot2. The package provides two functions: `ggscreeplot()` and `ggbiplot()`.
`ggbiplot` aims to be a drop-in replacement for the built-in R function `biplot.princomp()` with extended functionality
for labeling groups, drawing a correlation circle, and adding Normal probability ellipsoids.

*The development of this software was supported in part by NSF Postdoctoral Fellowship DMS-0903120*

Installation
------------

```r
library(devtools)
install_github("vqv/ggbiplot")
```

Example Usage
-------------

```{r wine-example, message = FALSE, warning = FALSE}
library(ggbiplot)
data(wine)
wine.pca <- prcomp(wine, scale. = TRUE)
ggbiplot(wine.pca, obs.scale = 1, var.scale = 1,
groups = wine.class, ellipse = TRUE, circle = TRUE) +
scale_color_discrete(name = '') +
theme(legend.direction = 'horizontal', legend.position = 'top')
```
Loading