浏览文件内容

添加 'chap3/table.md'

hungerzs 6 年之前
父节点
当前提交
06328b2a0d
共有 1 个文件被更改: 108 次插入, 0 次删除
  1. 108 0
      chap3/table.md

+ 108 - 0
chap3/table.md

@@ -0,0 +1,108 @@
+---
+title: "table analysis"
+author: "hungerzs"
+date: "2016年8月1日"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+## 交叉表统计 cross tabulation
+
+使用table进行一半的描述性统计
+
+```{r}
+#加载数据框
+lake.df<-readRDS("data/lake.df.rds")
+# 生成列联表contingency table
+lake.table<-xtabs(EVI~year+yday, lake.df, sparse = T)
+# 计算边界平均值,1 代表dim1,计算列,2代表dim2,计算行
+addmargins(lake.table, c(1,2), mean)
+```
+
+## summary
+```{r}
+summary(lake.df)
+```
+
+## sapply
+
+simple lapply(list apply)
+```{r}
+#sapply(lake.df, sd)
+
+descstats <- function(x, na.omit=TRUE){
+  if (na.omit)
+    x <- x[!is.na(x)]
+  m <- mean(x)
+  n <- length(x)
+  s <- sd(x)
+  #skew <- sum((x-m)^3/s^3)/n
+  #kurt <- sum((x-m)^4/s^4)/n - 3
+  return(c(n=n, mean=m, stdev=s))
+  }
+
+sapply(lake.df, descstats)
+```
+
+```{r}
+library(Hmisc)
+describe(lake.df)
+```
+
+# 描述性统计分析
+```{r}
+library(pastecs)
+stat.desc(lake.df, basic = T, desc = T, norm = F, p=0.95)
+```
+
+```{r}
+library(psych)
+describe(lake.df)
+```
+
+## 分组统计(Descriptive statistics by group)
+```{r}
+aggregate(lake.df, by=list(y=lake.df$year), mean, na.rm=T)
+aggregate(lake.df, by=list(y=lake.df$yday), mean, na.rm=T)
+aggregate(lake.df, by=list(y=lake.df$year, d=lake.df$yday), mean, na.rm=T)
+```
+
+## using by
+可返回多个统计数
+```{r}
+dstats<-function(x) (c(mean=mean(x), sd=sd(x)))
+by(lake.df$EVI, lake.df$year, dstats)
+```
+
+
+```{r}
+library(psych)
+describe.by(lake.df$EVI, lake.df$year)
+
+```
+
+## reshape
+
+```{r}
+library(reshape)
+dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x)))
+dfm <- melt(mtcars, measure.vars=c("mpg", "hp", "wt"), id.vars=c("am", "cyl"))
+dfm
+cast(dfm, am + cyl + variable ~ ., dstats)
+```
+
+## crosstable
+```{r}
+library(gmodels)
+CrossTable(Arthritis$Treatment, Arthritis$Improved)
+CrossTable(lake.df$year, lake.df$yday)
+```
+
+## tapply
+```{r}
+tapply(lake.df$EVI, lake.df$year, dstats)
+
+```