table.md 1.9 KB


title: "table analysis" author: "hungerzs" date: "2016年8月1日"

output: html_document

knitr::opts_chunk$set(echo = TRUE)

交叉表统计 cross tabulation

使用table进行一半的描述性统计

#加载数据框
lake.df<-readRDS("data/lake.df.rds")
# 生成列联表contingency table
lake.table<-xtabs(EVI~year+yday, lake.df, sparse = T)
# 计算边界平均值,1 代表dim1,计算列,2代表dim2,计算行
addmargins(lake.table, c(1,2), mean)

summary

summary(lake.df)

sapply

simple lapply(list apply)

#sapply(lake.df, sd)

descstats <- function(x, na.omit=TRUE){
  if (na.omit)
    x <- x[!is.na(x)]
  m <- mean(x)
  n <- length(x)
  s <- sd(x)
  #skew <- sum((x-m)^3/s^3)/n
  #kurt <- sum((x-m)^4/s^4)/n - 3
  return(c(n=n, mean=m, stdev=s))
  }

sapply(lake.df, descstats)
library(Hmisc)
describe(lake.df)

描述性统计分析

library(pastecs)
stat.desc(lake.df, basic = T, desc = T, norm = F, p=0.95)
library(psych)
describe(lake.df)

分组统计(Descriptive statistics by group)

aggregate(lake.df, by=list(y=lake.df$year), mean, na.rm=T)
aggregate(lake.df, by=list(y=lake.df$yday), mean, na.rm=T)
aggregate(lake.df, by=list(y=lake.df$year, d=lake.df$yday), mean, na.rm=T)

using by

可返回多个统计数

dstats<-function(x) (c(mean=mean(x), sd=sd(x)))
by(lake.df$EVI, lake.df$year, dstats)
library(psych)
describe.by(lake.df$EVI, lake.df$year)

reshape

library(reshape)
dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x)))
dfm <- melt(mtcars, measure.vars=c("mpg", "hp", "wt"), id.vars=c("am", "cyl"))
dfm
cast(dfm, am + cyl + variable ~ ., dstats)

crosstable

library(gmodels)
CrossTable(Arthritis$Treatment, Arthritis$Improved)
CrossTable(lake.df$year, lake.df$yday)

tapply

tapply(lake.df$EVI, lake.df$year, dstats)