--- title: "table analysis" author: "hungerzs" date: "2016年8月1日" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## 交叉表统计 cross tabulation 使用table进行一半的描述性统计 ```{r} #加载数据框 lake.df<-readRDS("data/lake.df.rds") # 生成列联表contingency table lake.table<-xtabs(EVI~year+yday, lake.df, sparse = T) # 计算边界平均值,1 代表dim1,计算列,2代表dim2,计算行 addmargins(lake.table, c(1,2), mean) ``` ## summary ```{r} summary(lake.df) ``` ## sapply simple lapply(list apply) ```{r} #sapply(lake.df, sd) descstats <- function(x, na.omit=TRUE){ if (na.omit) x <- x[!is.na(x)] m <- mean(x) n <- length(x) s <- sd(x) #skew <- sum((x-m)^3/s^3)/n #kurt <- sum((x-m)^4/s^4)/n - 3 return(c(n=n, mean=m, stdev=s)) } sapply(lake.df, descstats) ``` ```{r} library(Hmisc) describe(lake.df) ``` # 描述性统计分析 ```{r} library(pastecs) stat.desc(lake.df, basic = T, desc = T, norm = F, p=0.95) ``` ```{r} library(psych) describe(lake.df) ``` ## 分组统计(Descriptive statistics by group) ```{r} aggregate(lake.df, by=list(y=lake.df$year), mean, na.rm=T) aggregate(lake.df, by=list(y=lake.df$yday), mean, na.rm=T) aggregate(lake.df, by=list(y=lake.df$year, d=lake.df$yday), mean, na.rm=T) ``` ## using by 可返回多个统计数 ```{r} dstats<-function(x) (c(mean=mean(x), sd=sd(x))) by(lake.df$EVI, lake.df$year, dstats) ``` ```{r} library(psych) describe.by(lake.df$EVI, lake.df$year) ``` ## reshape ```{r} library(reshape) dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x))) dfm <- melt(mtcars, measure.vars=c("mpg", "hp", "wt"), id.vars=c("am", "cyl")) dfm cast(dfm, am + cyl + variable ~ ., dstats) ``` ## crosstable ```{r} library(gmodels) CrossTable(Arthritis$Treatment, Arthritis$Improved) CrossTable(lake.df$year, lake.df$yday) ``` ## tapply ```{r} tapply(lake.df$EVI, lake.df$year, dstats) ```