|
@@ -0,0 +1,108 @@
|
|
|
+---
|
|
|
+title: "table analysis"
|
|
|
+author: "hungerzs"
|
|
|
+date: "2016年8月1日"
|
|
|
+output: html_document
|
|
|
+---
|
|
|
+
|
|
|
+```{r setup, include=FALSE}
|
|
|
+knitr::opts_chunk$set(echo = TRUE)
|
|
|
+```
|
|
|
+
|
|
|
+## 交叉表统计 cross tabulation
|
|
|
+
|
|
|
+使用table进行一半的描述性统计
|
|
|
+
|
|
|
+```{r}
|
|
|
+#加载数据框
|
|
|
+lake.df<-readRDS("data/lake.df.rds")
|
|
|
+# 生成列联表contingency table
|
|
|
+lake.table<-xtabs(EVI~year+yday, lake.df, sparse = T)
|
|
|
+# 计算边界平均值,1 代表dim1,计算列,2代表dim2,计算行
|
|
|
+addmargins(lake.table, c(1,2), mean)
|
|
|
+```
|
|
|
+
|
|
|
+## summary
|
|
|
+```{r}
|
|
|
+summary(lake.df)
|
|
|
+```
|
|
|
+
|
|
|
+## sapply
|
|
|
+
|
|
|
+simple lapply(list apply)
|
|
|
+```{r}
|
|
|
+#sapply(lake.df, sd)
|
|
|
+
|
|
|
+descstats <- function(x, na.omit=TRUE){
|
|
|
+ if (na.omit)
|
|
|
+ x <- x[!is.na(x)]
|
|
|
+ m <- mean(x)
|
|
|
+ n <- length(x)
|
|
|
+ s <- sd(x)
|
|
|
+ #skew <- sum((x-m)^3/s^3)/n
|
|
|
+ #kurt <- sum((x-m)^4/s^4)/n - 3
|
|
|
+ return(c(n=n, mean=m, stdev=s))
|
|
|
+ }
|
|
|
+
|
|
|
+sapply(lake.df, descstats)
|
|
|
+```
|
|
|
+
|
|
|
+```{r}
|
|
|
+library(Hmisc)
|
|
|
+describe(lake.df)
|
|
|
+```
|
|
|
+
|
|
|
+# 描述性统计分析
|
|
|
+```{r}
|
|
|
+library(pastecs)
|
|
|
+stat.desc(lake.df, basic = T, desc = T, norm = F, p=0.95)
|
|
|
+```
|
|
|
+
|
|
|
+```{r}
|
|
|
+library(psych)
|
|
|
+describe(lake.df)
|
|
|
+```
|
|
|
+
|
|
|
+## 分组统计(Descriptive statistics by group)
|
|
|
+```{r}
|
|
|
+aggregate(lake.df, by=list(y=lake.df$year), mean, na.rm=T)
|
|
|
+aggregate(lake.df, by=list(y=lake.df$yday), mean, na.rm=T)
|
|
|
+aggregate(lake.df, by=list(y=lake.df$year, d=lake.df$yday), mean, na.rm=T)
|
|
|
+```
|
|
|
+
|
|
|
+## using by
|
|
|
+可返回多个统计数
|
|
|
+```{r}
|
|
|
+dstats<-function(x) (c(mean=mean(x), sd=sd(x)))
|
|
|
+by(lake.df$EVI, lake.df$year, dstats)
|
|
|
+```
|
|
|
+
|
|
|
+
|
|
|
+```{r}
|
|
|
+library(psych)
|
|
|
+describe.by(lake.df$EVI, lake.df$year)
|
|
|
+
|
|
|
+```
|
|
|
+
|
|
|
+## reshape
|
|
|
+
|
|
|
+```{r}
|
|
|
+library(reshape)
|
|
|
+dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x)))
|
|
|
+dfm <- melt(mtcars, measure.vars=c("mpg", "hp", "wt"), id.vars=c("am", "cyl"))
|
|
|
+dfm
|
|
|
+cast(dfm, am + cyl + variable ~ ., dstats)
|
|
|
+```
|
|
|
+
|
|
|
+## crosstable
|
|
|
+```{r}
|
|
|
+library(gmodels)
|
|
|
+CrossTable(Arthritis$Treatment, Arthritis$Improved)
|
|
|
+CrossTable(lake.df$year, lake.df$yday)
|
|
|
+```
|
|
|
+
|
|
|
+## tapply
|
|
|
+```{r}
|
|
|
+tapply(lake.df$EVI, lake.df$year, dstats)
|
|
|
+
|
|
|
+```
|