R语言常用代码集
推荐几本看过的R语言书:
下面都是以前写的一些代码,配合以前文章使用效果更佳:
R语言安装部署基础
数据输入转换
文本批量读取
读取一个文件夹下的txt文件
链接:http://bbs.pinggu.org/forum.php?mod=viewthread&tid=6105793
setwd("G:/项目资料/研究生项目/莱州湾冲淤变化/数据计算201710/ggplot2 data/points data/")
filename <- list.files(pattern = ".txt")
for (i in 1:length(filename)){
var_name <- gsub('?.txt','',filename[i])
assign(var_name,read.table(filename[i],sep=",",header=TRUE))
}
潮汐数据读取与转换
library(plyr)
library(dplyr)
library(stringr)
a1 <- read.csv(file = "20180506-20180518.csv", header = T)
a1 <- apply(a1,2,function(x){
s<-str_replace_all(x,"\\[|\\]|'","")
s <- str_split(s,',')
s <- unlist(s)
s <- subset(s,nchar(s)>0)
s <- matrix(s,ncol=2,byrow=T)
s <- data.frame(Time=s[,1],cnt=s[,2])})
a1 <- ldply(a1)
地学数据转换应用
土地利用转移矩阵
帖子提问:http://bbs.pinggu.org/thread-5014884-1-1.html
library(dplyr)
library(tidyr)
library(stringr)
#在excel表中选取全部数据区域,Ctr+C
data <- tbl_df(read.table("clipboard", header = TRUE))
data %>% mutate(GRID05 = str_sub(GRIDCODE, start = 1, end = 1), GRID10 = str_sub(GRIDCODE_1, start = 1, end = 1)) %>%
filter(GRID05 != 0 & GRID10 != 0) %>%
mutate(GRID05 = factor(as.numeric(GRID05), levels = 1:6, labels = c("林地", "草地", "湿地", "耕地", "人工表面", "其他"))) %>%
mutate(GRID10 = factor(as.numeric(GRID10), levels = 1:6, labels = c("林地", "草地", "湿地", "耕地", "人工表面", "其他"))) %>%
group_by(GRID05, GRID10) %>% summarise(Shape_Area = sum(Shape_Area)) %>%
spread(key = GRID10, value = Shape_Area)
聚类分析
method表示类的合并方法,有:
single 最短距离法
complete 最长距离法
median 中间距离法
mcquitty 相似法
average 类平均法
centroid 重心法
ward 离差平方和法
min-max标准化(Min-maxnormalization)
也叫离差标准化,是对原始数据的线性变换,使结果落到[0,1]区间,转换函数如下:
其中max为样本数据的最大值,min为样本数据的最小值。这种方法有一个缺陷就是当有新数据加入时,可能导致max和min的变化,需要重新定义。
log函数转换
通过以10为底的log函数转换的方法同样可以实现归一下,具体方法如下:
看了下网上很多介绍都是x*=log10(x),其实是有问题的,这个结果并非一定落到[0,1]区间上,应该还要除以log10(max),max为样本数据最大值,并且所有的数据都要大于等于1。
atan函数转换
用反正切函数也可以实现数据的归一化:
使用这个方法需要注意的是如果想映射的区间为[0,1],则数据都应该大于等于0,小于0的数据将被映射到[-1,0]区间上。
而并非所有数据标准化的结果都映射到[0,1]区间上,其中最常见的标准化方法就是Z标准化,也是SPSS中最为常用的标准化方法:
z-score 标准化(zero-meannormalization)
也叫标准差标准化,经过处理的数据符合标准正态分布,即均值为0,标准差为1,其转化函数为:
其中μ为所有样本数据的均值,σ为所有样本数据的标准差。
对新疆地区进行聚类分析:
place | height | waterfall | icesoildepth | windday |
哈巴河 | 532.6 | 173.8 | 150 | 61.8 |
阿勒泰 | 735.1 | 191.5 | 146 | 37.7 |
克拉玛依 | 427 | 114.4 | 197 | 75.4 |
巴楚 | 1116.5 | 41.6 | 64 | 7.6 |
莎车 | 1231.2 | 42.5 | 93 | 11 |
于田 | 1427 | 46.4 | 81 | 1.4 |
将上述表格保存为xinjiang.csv文件放置于R语言工作目录下即可
工作目录可以使用getwd()函数获取
xinj<-read.csv("xinjiang.csv",header = TRUE)
fun <- function(x) (x-min(x))/(max(x)-min(x))
xj3 <- apply(xinj[,2:5], 2, FUN=fun) # use method "min-max"
xj3<-data.frame(xinj[,1],xj3)
hc.single=hclust(dist(xj3[2:5]),method = "single") #最短距离法聚类
plot(hc.single,main = "Single Linkage",xlab="",labels=xj3$xinj...1.,ylab="",sub = "place",cex=.9) #制作聚类图
黑松植物
library(readxl)
testdata <- read_excel(path = "trees.xlsx", sheet = 3, col_names = T)
NO1 <- testdata$NO
testdata[is.na(testdata)] <- 0 #缺失值替换为0
testdata <- testdata[1:57,]
library(tidyverse)
library(stringr)
result <- testdata %>%
mutate(group = str_sub(NO, -1, -1)) %>%
group_by(group) %>%
summarise_at(vars(NUM, knum), sum)
library(ggplot2)
ggplot(result, aes(group, NUM))+
labs(x="黑松数量", y="距坡顶距离", title="小钦岛黑松分布")+ #设置坐标轴和标题标签
geom_bar(stat = "identity")
#整体制图
library(readxl)
testdata <- read_excel(path = "treeall.xlsx", sheet = 1, col_names = T)
NO1 <- testdata$NO
testdata[is.na(testdata)] <- 0 #缺失值替换为0
testdata <- testdata[1:180,]
library(tidyverse)
library(stringr)
result <- testdata %>%
mutate(group = str_sub(NO, -1, -1)) %>%
group_by(group, TYPE) %>%
summarise_at(vars(NUM, knum), mean) #define Calculate method
library(ggplot2)
ggplot(result, aes(group, NUM))+
labs(x="距坡顶距离", y="黑松数量", title="小钦岛黑松分布")+ #设置坐标轴和标题标签
geom_bar(aes(fill=TYPE), stat = "identity",position="dodge")
#中文输出图
library(showtext) #引用包,输出中文字体
showtext.auto(enable = TRUE)
font.add('SimSun', 'simsun.ttc') #添加中文字体
pdf('test2.pdf',width = 11.69,height = 8.27) #输出PDF,指定长宽和文件名
library(ggplot2)
ggplot(result, aes(group, NUM))+
labs(x="距坡顶距离", y="黑松数量", title="小钦岛黑松分布")+ #设置坐标轴和标题标签
geom_bar(aes(fill=TYPE), stat = "identity",position="dodge")
dev.off()
坡向重分类
#发帖地址:
#http://bbs.pinggu.org/thread-6063456-1-1.html
#方法一:
library(dplyr)
cdaspect <- read.table("cdaspect.txt", header = T,sep=",")
cdaspect3 <-cdaspect %>%
mutate(ASPECT=ifelse(
((RASTERVALU>=0 & RASTERVALU<22.5) |(RASTERVALU>=337.5 & RASTERVALU<360)),1,
ifelse((RASTERVALU>=22.5 & RASTERVALU<67.5),2,
ifelse((RASTERVALU>=292.5 & RASTERVALU<337.5), 3,
ifelse((RASTERVALU>=67.5 & RASTERVALU <112.5), 4,
ifelse((RASTERVALU >=247.5 & RASTERVALU < 292.5), 5,
ifelse((RASTERVALU>=112.5 & RASTERVALU<157.5), 6,
ifelse((RASTERVALU>=202.5 & RASTERVALU <247.5), 7,
8)
)
)
)
)
)
)
)
print(cdaspect3)
#方法二:
library(tidyverse)
cdaspect2 <- cdaspect %>%
mutate(ASPECT = cut(RASTERVALU,
breaks = c(0, seq(1, 15, 2), 16) * 22.5,
labels = c(1L, 2L, 4L, 6L, 8L, 7L, 5L, 3L, 9L),
ASPECT = ifelse(ASPECT == 9, 1, ASPECT,)))
cdaspect2$ASPECT[which(cdaspect2$ASPECT==9)] <-1
print(cdaspect2)
计算物种多样性
library(readxl)
hdata <- read_excel(path = "bcdherb.xlsx", sheet = 1, col_names = T)
BCD11 <- hdata$BCD11
BCD11 <- na.omit(BCD11)
H <- -1*sum((BCD11)*log(BCD11))
library(readxl)
hdata <- read_excel(path = "ncc.xlsx", sheet = 1, col_names = T)
hdata <- read.csv(file = "ncdherb.csv", header = T)
hdata <- hdata[,2:46]
hdata <- hdata/3
#循环计算H值
i <- 1
totalcol <- ncol(hdata)
h <- vector(length = totalcol)
D <- vector(length = totalcol)
J <- vector(length = totalcol)
N <- vector(length = totalcol)
for(i in i:totalcol){
a <- names(hdata)[i] #提取数据框表头
tempdata <- hdata[[a]] #提取数据框中表头为a的数据Pi
tempdata <- na.omit(tempdata)
h[i] <- -1*sum(tempdata*log(tempdata)) #H值计算Shannon-Wiener多样性指数
D[i] <- 1-sum(tempdata^2) #Simpson指数
J[i] <- h[i]/log(length(tempdata)) #Pielou均匀度指数
N[i] <- length(tempdata) #有效值个数
}
h
D
J
N
hdataexport <- rbind(hdata,h,D,J,N)
write.csv(hdataexport, file = "nccExport.csv")
library(readxl)
hdata <- read_excel(path = "bcdherb.xlsx", sheet = 1, col_names = T)
hdata <- hdata[,2:169]
#循环计算H值
i <- 1
totalcol <- ncol(hdata)
h <- vector(length = totalcol)
for(i in i:totalcol){
a <- names(hdata)[i] #提取数据框表头
tempdata <- hdata[[a]] #提取数据框中表头为a的数据Pi
tempdata <- na.omit(tempdata)
h[i] <- length(tempdata)
}
h
hdataexport <- rbind(hdata,h)
write.csv(hdataexport, file = "ncdtreeexport.csv")
R语言plot函数绘图
library(readxl)
#FIG4
fig4 <- read_excel(path = "FIG4.xls", sheet = 1, col_names = T)
x <- fig4$RRS
y <- fig4$SSC
fit4 <- lm(y~I(x^3)+I(x^2)+x)
plot(x,y, pch=4)
lines(x, fitted(fit4))
#FIG5
fig5 <- read_excel(path = "FIG5.xlsx", sheet = 1, col_names = T)
x <- fig5$shice
y <- fig5$moni
fit5 <- lm(y~x)
plot(x,y, pch=4)
lines(x, fitted(fit5))
#FIG6
fig6a <- read_excel(path = "FIG6.xlsx", sheet = 1, col_names = T)
fig6b <- read_excel(path = "FIG6.xlsx", sheet = 2, col_names = T)
fig6c <- read_excel(path = "FIG6.xlsx", sheet = 3, col_names = T)
fig6d <- read_excel(path = "FIG6.xlsx", sheet = 4, col_names = T)
par(mfrow=c(2,2))
plot(fig6a$shiceSSSC, fig6a$OriMod, pch=4)
plot(fig6b$shiceSSSC, fig6b$OriMod, pch=4)
plot(fig6c$SSSC, fig6c$moni, pch=4)
plot(fig6d$shiceSSSC, fig6d$OriMod, pch=4)
#FIG7
fig7a <- read_excel(path = "FIG7.xls", sheet = 1, col_names = T)
fig7b <- read_excel(path = "FIG7.xls", sheet = 2, col_names = T)
fig7c <- read_excel(path = "FIG7.xls", sheet = 3, col_names = T)
fig7d <- read_excel(path = "FIG7.xls", sheet = 4, col_names = T)
fig7e <- read_excel(path = "FIG7.xls", sheet = 5, col_names = T)
fig7f <- read_excel(path = "FIG7.xls", sheet = 6, col_names = T)
fig7g <- read_excel(path = "FIG7.xls", sheet = 7, col_names = T)
fig7h <- read_excel(path = "FIG7.xls", sheet = 8, col_names = T)
fig7i <- read_excel(path = "FIG7.xls", sheet = 9, col_names = T)
fig7j <- read_excel(path = "FIG7.xls", sheet = 10, col_names = T)
par(mfrow=c(5,2), mar=c(2,2,2,2)) #设置行列个数,每个图间距
plot(fig7a$shiceSSC, fig7a$moniSSC, pch=4)
plot(fig7b$shice, fig7b$moni, pch=4)
plot(fig7c$shice, fig7c$moni, pch=4)
plot(fig7d$shice, fig7d$moni, pch=4)
plot(fig7e$shice, fig7e$moni, pch=4)
plot(fig7f$shice, fig7f$moni, pch=4)
plot(fig7g$shice, fig7g$moni, pch=4)
plot(fig7h$shice, fig7h$moni, pch=4)
plot(fig7i$shice, fig7i$moni, pch=4)
plot(fig7j$shice, fig7j$moni, pch=4)
ggplot2绘图
制作光谱曲线
ggplot(fig3, aes(Wavelength, Core))+
labs(x="Wavelength (nm)", y="Correlation coefficients")+ #设置坐标轴和标题标签
theme_bw() + #删去阴影
theme(panel.grid =element_blank())+ ## 删去网格线
geom_line()
回归分析结果标注
library(readxl)
testdata <-read_excel("testdata.xlsx", sheet = 1, col_names = FALSE)
as.numeric(testdata$X0)
shapiro.test(testdata$X0)
qqnorm(testdata$X0)
cor(x=testdata$X0, y=testdata$X1, use="complete", method = "kendall")
y <- testdata$X1
x <- testdata$X0
huigui <-lm(y~x)
library(ggplot2)
ggplot(testdata,aes(x=X0,y=X1))+
labs(x="x", y="y", title="分辨率随凝视时间变化图")+ #设置坐标轴和标题标签
geom_point()+
geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x)+
geom_text(x=0,y=17.5, #设置标注位置坐标
label = paste("y=",format(huigui$coefficients[2],digits = 5),
"x+",format(huigui$coefficients[1],digits = 5),
"R-squared:", format(summary(huigui)$r.squared,digits = 4)), #label=标注内容
size=8) #字号大小
冲淤精度图
#制作准确度图表
library(ggplot2)
library(grid)
library(readxl)
accuracydata <- read_excel(path =
"G:/项目资料/研究生项目/莱州湾冲淤变化/数据计算201710/ggplot2 data/合并数据.xls", sheet=1, col_names=T)
p2 <- ggplot(accuracydata, aes(x=Resolution, y=RMSPE, col= Method))+geom_line(size=1)
p1 <- ggplot(accuracydata, aes(x=Resolution, y=RMS, col=Method)) +geom_line(size=1)
p5 <- ggplot(accuracydata, aes(x=Resolution, y=MAX, col=Method)) +geom_line(size=1)
p4 <- ggplot(accuracydata, aes(x=Resolution, y=MIN, col=Method)) +geom_line(size=1)
p3 <- ggplot(accuracydata, aes(x=Resolution, y=MEAN, col=Method)) +geom_line(size=1)
########新建画图页面###########
grid.newpage() ##新建页面
pushViewport(viewport(layout = grid.layout(5,1))) ####将页面分成2*2矩阵
vplayout <- function(x,y){
viewport(layout.pos.row = x, layout.pos.col = y)
}
print(p1, vp = vplayout(1,1)) ###将1的位置画图p1
print(p2, vp = vplayout(2,1)) ###将(2,1)的位置画图p2
print(p3, vp = vplayout(3,1)) ###将(3,1)的位置画图p3
print(p4, vp = vplayout(4,1)) ###将(4,1)的位置画图p4
print(p5, vp = vplayout(5,1)) ###将(5,1)的位置画图p5
固定间隔坐标轴制图
library(ggplot2)
ggplot(data = temp, aes(Mon, Temp, colour=Class, hjust=1))+
theme_bw() + #remove gray background
geom_line()+
geom_point()+
scale_x_continuous(breaks = seq(1, 12, 1))+ #from 1 to 12, 1 separate
theme(panel.grid =element_blank())