其他
R可视化18|ggstatsplot几行code终结SCI级图表统计+画图 (下)
"pythonic生物人"的第117篇分享
grouped_ggpiestats
# for reproducibility
set.seed(123)
# plot
ggstatsplot::grouped_ggpiestats(
data = ggstatsplot::movies_long,
x = genre,
grouping.var = mpaa, # grouping variable
title.prefix = "Movie genre", # prefix for the faceted title
label.repel = TRUE, # repel labels (helpful for overlapping labels)
package = "ggthemr",
palette = "dust",
title.text = "Composition of MPAA ratings for different genres"
)
ggbarstats
功能类似于ggpiestats,图形非常好康。
set.seed(123)
library(ggplot2)
# plot
ggstatsplot::ggbarstats(
data = ggstatsplot::movies_long,
x = mpaa,
y = genre,
sampling.plan = "jointMulti",
title = "MPAA Ratings by Genre",
xlab = "movie genre",
legend.title = "MPAA rating",
ggtheme = hrbrthemes::theme_ipsum_pub(),
ggplot.component = list(scale_x_discrete(guide = guide_axis(n.dodge = 2))),
package = "ggthemr",
palette = "dust",
messages = FALSE
)
grouped_ggbarstats
# setup
set.seed(123)
# smaller dataset
df <-
dplyr::filter(
.data = forcats::gss_cat,
race %in% c("Black", "White"),
relig %in% c("Protestant", "Catholic", "None"),
!partyid %in% c("No answer", "Don't know", "Other party")
)
# plot
ggstatsplot::grouped_ggbarstats(
data = df,
x = relig,
y = partyid,
grouping.var = race,
title.prefix = "Race",
xlab = "Party affiliation",
package = "ggthemr",
palette = "dust",
ggtheme = ggthemes::theme_tufte(base_size = 12),
ggstatsplot.layer = FALSE,
title.text = "Race, religion, and political affiliation",
plotgrid.args = list(nrow = 2)
)
package = "ggthemr",
palette = "dust",
k = 3, # decimal places in result
perc.k = 1 # decimal places in percentage labels
) + # further modification with `ggplot2` commands
ggplot2::theme(
plot.title = ggplot2::element_text(
color = "black",
size = 14,
hjust = 0
)
)
gghistostats
可视化单一变量的分布,计算单一变量的均值与指定值(下例子中为5)之间是否存在统计学差异。
# for reproducibility
set.seed(123)
# plot
ggstatsplot::gghistostats(
data = iris, # dataframe from which variable is to be taken
x = Sepal.Length, # numeric variable whose distribution is of interest
title = "Distribution of Iris sepal length", # title for the plot
caption = substitute(paste(italic("Source:"), "Ronald Fisher's Iris data set")),
bar.measure = "both",
test.value = 5, # default value is 0
test.value.line = TRUE, # display a vertical line at test value
centrality.parameter = "mean", # which measure of central tendency is to be plotted
centrality.line.args = list(color = "darkred"), # aesthetics for central tendency line
binwidth = 0.10, # binwidth value (experiment)
ggtheme = hrbrthemes::theme_ipsum_tw(), # choosing a different theme
ggstatsplot.layer = FALSE,# turn off ggstatsplot theme layer
package = "ggthemr",
palette = "dust",
)
grouped_gghistostats
# for reproducibility
set.seed(123)
# plot
ggstatsplot::grouped_gghistostats(
data = dplyr::filter(
.data = ggstatsplot::movies_long,
genre %in% c("Action", "Action Comedy", "Action Drama", "Comedy")
),
x = budget,
xlab = "Movies budget (in million US$)",
type = "robust", # use robust location measure
grouping.var = genre, # grouping variable
normal.curve = TRUE, # superimpose a normal distribution curve
normal.curve.args = list(color = "red", size = 1),
title.prefix = "Movie genre",
ggtheme = ggthemes::theme_economist(),
ggplot.component = list( # modify the defaults from `ggstatsplot` for each plot
ggplot2::scale_x_continuous(breaks = seq(0, 200, 50), limits = (c(0, 200)))
),
plotgrid.args = list(nrow = 2),
title.text = "Movies budgets for different genres"
)
ggcorrmat
轻松绘制相关系数矩阵图 (a matrix of correlation coefficients) 。python中也可以轻松绘制该图:Python可视化matplotlib&seborn16-相关性heatmap
# setup
set.seed(123)
library(ggstatsplot)
# select data only from the year 2007
gapminder_2007 <- dplyr::filter(.data = gapminder::gapminder, year == 2007)
# producing the correlation matrix
ggstatsplot::ggcorrmat(
data = gapminder_2007, # data from which variable is to be taken
cor.vars = lifeExp:gdpPercap,# specifying correlation matrix variables
colors = c("#E69F00", "white","#d5695d"), #传入配色
)
ggstatsplot::ggcorrmat(
data = gapminder_2007, # data from which variable is to be taken
cor.vars = lifeExp:gdpPercap, # specifying correlation matrix variables
cor.vars.names = c(
"Life Expectancy",
"population",
"GDP (per capita)"
),
type = "spearman", # which correlation coefficient is to be computed
lab.col = "red", # label color
ggtheme = ggplot2::theme_light(), # selected ggplot2 theme
ggstatsplot.layer = FALSE, # turn off default ggestatsplot theme overlay
matrix.type = "lower", # correlation matrix structure
colors = NULL, # 关闭指定色号
package = "ggthemr",#启用色盘
palette = "grape",
title = "Gapminder correlation matrix", # custom title
subtitle = "Source: Gapminder Foundation" # custom subtitle
)
grouped_ggcorrmat
# for reproducibility
set.seed(123)
options(repr.plot.width = 13.5, repr.plot.height = 20,repr.plot.res = 400)
# let's use just 5% of the data to speed it up
ggstatsplot::grouped_ggcorrmat(
# arguments relevant for ggstatsplot::ggcorrmat
data = dplyr::sample_frac(tbl = ggplot2::diamonds, size = 0.05),
type = "robust", # percentage bend correlation coefficient
beta = 0.2, # bending constant
p.adjust.method = "holm", # method to adjust p-values for multiple comparisons
grouping.var = cut,
title.prefix = "Quality of cut",
cor.vars = c(carat, depth:z),
cor.vars.names = c(
"carat",
"total depth",
"table",
"price",
"length (in mm)",
"width (in mm)",
"depth (in mm)"
),
lab.size = 3.5,
# arguments relevant for ggstatsplot::combine_plots
title.text = "Relationship between diamond attributes and price across cut",
title.args = list(size = 16, color = "red"),
caption.text = "Dataset: Diamonds from ggplot2 package",
caption.args = list(size = 14, color = "blue"),
plotgrid.args = list(
labels = c("(a)", "(b)", "(c)", "(d)", "(e)"),
nrow = 3,
ncol = 2
)
)
ggcoefstats
类似下面这种图
本文结束,更多好文:
有意见请移步到QQ群629562529反馈,一起进步哈!