title: "table analysis" author: "hungerzs" date: "2016年8月1日"
knitr::opts_chunk$set(echo = TRUE)
使用table进行一半的描述性统计
#加载数据框
lake.df<-readRDS("data/lake.df.rds")
# 生成列联表contingency table
lake.table<-xtabs(EVI~year+yday, lake.df, sparse = T)
# 计算边界平均值,1 代表dim1,计算列,2代表dim2,计算行
addmargins(lake.table, c(1,2), mean)
summary(lake.df)
simple lapply(list apply)
#sapply(lake.df, sd)
descstats <- function(x, na.omit=TRUE){
if (na.omit)
x <- x[!is.na(x)]
m <- mean(x)
n <- length(x)
s <- sd(x)
#skew <- sum((x-m)^3/s^3)/n
#kurt <- sum((x-m)^4/s^4)/n - 3
return(c(n=n, mean=m, stdev=s))
}
sapply(lake.df, descstats)
library(Hmisc)
describe(lake.df)
library(pastecs)
stat.desc(lake.df, basic = T, desc = T, norm = F, p=0.95)
library(psych)
describe(lake.df)
aggregate(lake.df, by=list(y=lake.df$year), mean, na.rm=T)
aggregate(lake.df, by=list(y=lake.df$yday), mean, na.rm=T)
aggregate(lake.df, by=list(y=lake.df$year, d=lake.df$yday), mean, na.rm=T)
可返回多个统计数
dstats<-function(x) (c(mean=mean(x), sd=sd(x)))
by(lake.df$EVI, lake.df$year, dstats)
library(psych)
describe.by(lake.df$EVI, lake.df$year)
library(reshape)
dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x)))
dfm <- melt(mtcars, measure.vars=c("mpg", "hp", "wt"), id.vars=c("am", "cyl"))
dfm
cast(dfm, am + cyl + variable ~ ., dstats)
library(gmodels)
CrossTable(Arthritis$Treatment, Arthritis$Improved)
CrossTable(lake.df$year, lake.df$yday)
tapply(lake.df$EVI, lake.df$year, dstats)