2017年的第一周,你吸了多少雾霾?
最近学了些revst包的基础知识,勉强能爬到一些有用的数据,刚好趁着周末,捂着脸跟大家分享。
这一篇使用revst包爬取了中国环保部环境监测中心公布367个主要城市的日度AQI指数信息(2017年1~7日),由于个别城市数据有缺失,可视化过程可能会遗漏部分城市信息。
以下是本篇需要加载的环境包:
library(rvest)
library(stringr)
library(dplyr)
library(ggplot2)
library(plyr)
library(maptools)
library(ggmap)
library(Hmisc)
library(leafletCN)
library(ggthemes)
---------------------------------------------------------------------------------------------------------------
使用revst包爬取了2017年1日至7日的367各主要城市AQI指数数据:
url<-"http://datacenter.mep.gov.cn/report/air_daily/air_dairy.jsp?city=&startdate=2017-01-01&enddate=2017-01-07&page="
final <- data.frame()
for (m in 1:86){
fun<-function(m){
url<-paste(url,m,sep='')
web<-read_html(url,encoding="UTF-8")
Num<-web %>% html_nodes("tr>td:nth-child(1)") %>% html_text()
City<-web %>% html_nodes("tr>td:nth-child(2)") %>% html_text()
Date<-web %>% html_nodes("tr>td:nth-child(3)") %>% html_text()
AQI<-web %>% html_nodes("tr>td:nth-child(4)") %>% html_text()
Level<-web %>% html_nodes("tr>td:nth-child(5)") %>% html_text()
Mainpo<-web %>% html_nodes("tr>td:nth-child(6)") %>% html_text()
final<-data.frame(Num=Num[6:35],City=City[6:35],Date=Date[4:33],
AQI=AQI[4:33],Level=Level[3:32],Mainpo=Mainpo[2:31],
stringsAsFactors =FALSE)
}
final<-rbind(final,fun(m))
}
数据预处理:
final<-final[1:2569,]
final$AQI<-as.numeric(final$AQI)
final$Level<-factor(final$Level,levels=c("重度污染","严重污染","轻度污染","中度污染","良","优"),order=TRUE)
address<-unique(final$City)
add<-get_geo_position(address)
final1<-merge(final,add, by.x = "City", by.y = "city",all.x=TRUE)
final1$day<-substr(final1$Date,10,10)
names(final1)
final1<-final1[,c("City","Num","Date","day","AQI","Level","Mainpo","lon","lat")]
newdata1<-final1[,c("City","lon","lat","day","AQI","Level","Mainpo")]
地图素材导入:
china_map<-readShapePoly("c:/rstudy/bou2_4p.shp")
x <- china_map@data
xs <- data.frame(id=row.names(x),x)
china_map1 <- fortify(china_map)
china_map_data <- join(china_map1, xs, type = "full")
mydata <- read.csv("c:/rstudy/geshengzhibiao.csv")
china_data <- join(china_map_data, mydata, type="full")
首先查看下所选取的367个主要城市在全国的分布情况:
ggplot()+
geom_polygon(data=china_data,aes(x=long,y=lat,group=group),fill="white",colour="grey60")+
geom_point(data=newdata1,aes(x=lon,y=lat),colour="red")+
coord_map("polyconic") +
theme_nothing()
用气泡图展示主要城市AQI指数相对高低(气泡图大小及颜色深浅均表示AQI指数强弱)
(以下数据基于2017年1日~7日367个城市的平均AQI指数数据)
newdata2<-newdata1[,c("City","day","AQI")];newdata2$day<-as.factor(newdata2$day)
newdata2<-tapply(newdata2$AQI,list(newdata2$City),mean,na.rm=TRUE)
newdata2<-as.data.frame(newdata2)
newdata2$Address<-rownames(newdata2)
names(newdata2)<-c("AQIM","Address");newdata2<-newdata2[,c("Address","AQIM")]
newdata2<-na.omit(newdata2)
mynewdata<-merge(newdata2,add, by.x = "Address", by.y = "city",all.x=TRUE)
ggplot()+
geom_polygon(data=china_data,aes(x=long,y=lat,group=group),fill="white",colour="grey60")+
geom_point(data=mynewdata,aes(x=lon,y=lat,size=AQIM,fill=AQIM),shape=21,colour="black")+
scale_size_area(max_size=5)+
scale_fill_gradient(low="white",high="#D73434")+
coord_map("polyconic") +
theme_nothing()
使用中心辐射热度图及散点图叠加可以在宏观上洞察全国各省各地区的空气质量级别及集中分布趋势:
ggplot()+
geom_polygon(data=china_map,aes(x=long,y=lat,group=group),fill="#005A32",col="white")+
geom_polygon(data=mynewdata,aes(x=lon,y=lat,fill = ..level..), stat="density_2d", alpha = .5, color = NA)+
coord_map("polyconic") +
geom_point(data=mynewdata,aes(x=lon,y=lat),col="red",size=1)+
scale_fill_gradient2( low = "white",mid="yellow", high = "red")+
theme_nothing()
使用热力地图查看整体城市空气质量的地域分布特征:
geojsonMap(mynewdata,"city",popup = paste0(mynewdata$Address,":",dat$AQIM),palette = "Reds", legendTitle = "AQI Index")
AQI指数最高的10个城市:
mynewdata3<-newdata2[order(-newdata2$AQIM),][1:10,]
ggplot(mynewdata3,aes(reorder(Address,AQIM),AQIM))+
geom_bar(stat="identity",position="dodge",fill="#D6B869")+
theme_wsj()+
coord_flip()+
scale_fill_wsj("rgby", "")+
theme(axis.ticks.length=unit(0.5,'cm'))+
geom_text(aes(label=round(AQIM+0.05,1)), position = position_dodge(0.9),hjust=1.1,colour="white",size=5)+
guides(fill=guide_legend(title=NULL))+
ggtitle("十大污染最严重城市")+
theme(
axis.title = element_blank(),
legend.position='none',
panel.grid.major.x=element_line(linetype="dashed",colour="grey60"),
panel.grid.major.y=element_blank(),
axis.ticks.x=element_blank(),
axis.ticks.y=element_line(),
axis.ticks.length=unit(0.3,'cm'),
axis.line.x=element_blank(),
axis.line.y=element_line(),
axis.text.x=element_text(size=10),
)
因为所收集的数据中,行政区划名称与现有地图素材有出入,鉴于城市较多,匹配比较麻烦,暂时没有制作基于空气质量水平的离散填充地图,但是方法之前已经多有介绍,感兴趣的小伙伴儿可以借此自己练习。
我是分割线~
欢迎关注魔方学院QQ群