数据探索
数据探索一般是数据分析的第一步,可以大致了解数据的分布情况,如均值、标准差、最小值、最大值、四分位数等。本文采用,尝试用R语言自定义函数实现SAS或SPSS的输出风格。
R自定义函数:
my.stat <- function(x){
require('e1071')
if(is.factor(x) == FALSE){
Nmiss <- sum(is.na(x))
mean <- mean(x, na.rm = TRUE)
median <- median(x, na.rm = TRUE)
sd <- sd(x, na.rm = TRUE)
min <- min(x, na.rm = TRUE)
Q3 <- quantile(x,probs = 0.25, na.rm = TRUE)
Q5 <- quantile(x,probs = 0.5, na.rm = TRUE)
Q7 <- quantile(x,probs = 0.75, na.rm = TRUE)
max <- max(x, na.rm = TRUE)
skewness <- skewness(x, na.rm = TRUE)
kurtosis <- kurtosis(x, na.rm = TRUE)
range <- max-min
return(data.frame(Nmiss=Nmiss,mean=mean,median=median,
sd=sd,min=min,Q3=Q3,Q5=Q5,Q7=Q7,
max=max,range=range,
skewness=skewness,kurtosis=kurtosis
))
}
}
res <- t(sapply(iris[,-5], my.stat))
res