第 1 章 直方图
1.1 基本直方图
1.1.1 控制bin宽度
data1 <- read.table("Datas/1_OneNum.csv", header=TRUE)
binwidths <- c(3,15,30,100)
plots <- list()
for (bin in binwidths) {
p <- data1 %>%
filter( price<300 ) %>%
ggplot( aes(x=price)) +
geom_histogram( binwidth=bin, fill="#69b3a2", color="#e9ecef", alpha=0.9) +
ggtitle(paste0("Bin size = ",bin)) +
theme_ipsum() +
plot.title = element_text(size=15)
plots[[as.character(bin)]] <- p
ggarrange(plotlist = plots)
1.2 分组直方图
data2 <- data.frame(
type = c( rep("variable 1", 1000), rep("variable 2", 1000) ),
value = c( rnorm(1000), rnorm(1000, mean=4) )
# Represent it
data2 %>%
ggplot( aes(x=value, fill=type)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080")) +
theme_ipsum() +
1.3 分面直方图
data3 <- read.table("Datas/probly.csv", header=TRUE, sep=",")
data3 <- data3 %>%
gather(key="text", value="value") %>%
mutate(text = gsub("\\.", " ",text)) %>%
mutate(value = round(as.numeric(value),0))
# plot
data3 %>%
mutate(text = fct_reorder(text, value)) %>%
ggplot( aes(x=value, color=text, fill=text)) +
geom_histogram(alpha=0.6, binwidth = 5) +
scale_fill_viridis(discrete=TRUE) + # library(viridis)
scale_color_viridis(discrete=TRUE) +
theme_ipsum() +
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 8)
) +
xlab("") +
ylab("Assigned Probability (%)") +
1.4 镜像直方图
data4 <- data.frame(
var1 = rnorm(1000),
var2 = rnorm(1000, mean=2)
ggplot(data4, aes(x=x) ) +
geom_histogram( aes(x = var1, y = ..density..), fill="#69b3a2" ) +
geom_label( aes(x=4.5, y=0.25, label="variable1"), color="#69b3a2") +
geom_histogram( aes(x = var2, y = -..density..), fill= "#404080") +
geom_label( aes(x=4.5, y=-0.25, label="variable2"), color="#404080") +
theme_ipsum() +
xlab("value of x")
1.5 带边缘的直方图
p <- ggplot(mtcars, aes(x=wt, y=mpg, color=cyl, size=cyl)) +
geom_point() +
# with marginal histogram
p1 <- ggMarginal(p, type="histogram")
# marginal density
p2 <- ggMarginal(p, type="density")
# marginal boxplot
p3 <- ggMarginal(p, type="boxplot")
1.5.1 边缘样式
# Set relative size of marginal plots (main plot 10x bigger than marginals)
p1 <- ggMarginal(p, type="histogram", size=10)
# Custom marginal plots:
p2 <- ggMarginal(p, type="histogram", fill = "slateblue", xparams = list( bins=10))
# Show only marginal plot for x axis
p3 <- ggMarginal(p, margins = 'x', color="purple", size=4)