summarise
require(plyr) ddply(FAfinal, "gender", summarise, mean = mean(symmetry, na.rm = TRUE)) gender mean 1 1 -0.10718687 2 2 0.09220376
group <- c(rep('A', 8), rep('B', 15), rep('C', 6)) sex <- sample(c("M", "F"), size=29, replace=T) age <- runif(n=29, min=18, max=54) dfx <- data.frame(group, sex, age) # Note the use of the '.' function to allow # group and sex to be used without quoting ddply(dfx, .(group, sex), summarize, mean = round(mean(age), 2), sd = round(sd(age), 2) ) # group sex mean sd # 1 A F 35.89 8.53 # 2 A M 38.01 15.09 # 3 B F 39.08 10.79 # 4 B M 37.38 9.55 # 5 C F 30.01 NA # 6 C M 34.78 11.50
An example using a formula for .variables
ddply(baseball[1:100,], .variables= ~year, .fun=nrow)
Applying two functions; nrow and ncol
ddply(baseball, .(lg), c("nrow", "mean"))
Calculate mean runs batted in for each year
rbi <- ddply(baseball, .(year), summarise,
mean_rbi = mean(rbi, na.rm = TRUE))
Plot a line chart of the result
plot(mean_rbi ~year, type="l", data=rbi))
page revision: 6, last edited: 12 Nov 2012 12:40