Lesson 06 for Plotting in R for Biologists
前面几节课绘制的图形只有条形图,但是我们科研过程中需要绘制各种各样的图形来展现我们的数据,这节课就主要讲一下利用ggplot2绘制各种图形,这也是ggplot2的魔力所在。相同的数据可以通过不同类型的图形来可视化。本节课主要将绘制以下几类图形:
- 条形图
- 直方图
- 散点图
- 箱线图
- 小提琴图
- 密度图
- 点状图
- 线图
- 饼图
- 韦恩图
数据加载清洗
library(tidyverse)
theme_set(theme_gray(base_size = 18))
my_data <- read.csv("variants_from_assembly.bed", sep = "\t", quote = '', stringsAsFactors = FALSE)
names(my_data) <- c("chrom","start","stop","name","size","strand","type","ref.dist","query.dist")
head(my_data)
## chrom start stop name size strand type ref.dist query.dist
## 1 6 102958468 102958469 SV2 317 + Insertion -14 303
## 2 6 102741692 102741693 SV3 130 + Deletion 130 0
## 3 6 102283759 102283760 SV4 1271 + Insertion -12 1259
## 4 6 101194032 101194033 SV5 2864 + Insertion -13 2851
## 5 6 101056644 101056645 SV6 265 + Insertion 0 265
## 6 6 100407499 100407500 SV7 334 + Insertion 0 334
summary(my_data$chrom)
## Length Class Mode
## 9555 character character
#数据过滤
my_data <- my_data[my_data$chrom %in% c(seq(1:22), "X", "Y","MT"), ]
#染色体排序
my_data$chrom <- factor(my_data$chrom, levels = c(seq(1:22), "X", "Y","MT"))
#类型(type)排序
my_data$type <- factor(my_data$type, levels = c("Insertion","Deletion","Expansion","Contraction"))
head(my_data)
## chrom start stop name size strand type ref.dist query.dist
## 1 6 102958468 102958469 SV2 317 + Insertion -14 303
## 2 6 102741692 102741693 SV3 130 + Deletion 130 0
## 3 6 102283759 102283760 SV4 1271 + Insertion -12 1259
## 4 6 101194032 101194033 SV5 2864 + Insertion -13 2851
## 5 6 101056644 101056645 SV6 265 + Insertion 0 265
## 6 6 100407499 100407500 SV7 334 + Insertion 0 334
可视化
条形图
ggplot(my_data, aes(x=chrom, fill=type))+geom_bar()
直方图
ggplot(my_data, aes(x=size, fill=type))+geom_histogram()
ggplot(my_data, aes(x=size, fill=type))+geom_histogram()+xlim(0,500)
ggplot(my_data, aes(x=size, fill=type))+geom_histogram(binwidth = 5)+xlim(0,500)
散点图
#将type映射给颜色
ggplot(my_data, aes(x=ref.dist, y=query.dist, color=type))+geom_point()
ggplot(my_data, aes(x=ref.dist, y=query.dist, color=type))+geom_point()+xlim(-500,500)+ylim(-500,500)
#将数值型变量size映射给颜色
ggplot(my_data, aes(x=ref.dist, y=query.dist, color=size))+geom_point()+xlim(-500,500)+ylim(-500,500)
ggplot(my_data, aes(x=ref.dist, y=query.dist,color=size))+geom_point()+xlim(-500,500)+ylim(-500,500)+scale_color_gradient(limits=c(0, 500))
箱线图
ggplot(my_data, aes(type, y=size))+geom_boxplot()
ggplot(my_data, aes(x=type, y=size, fill=type))+geom_boxplot()
ggplot(my_data, aes(x=type, y=size, fill=type))+geom_boxplot()+coord_flip()
小提琴图
ggplot(my_data, aes(x=type, y=size, fill=type))+geom_violin()+ylim(0,1000)+guides(fill=FALSE)
ggplot(my_data, aes(x=type, y=size, fill=type))+geom_violin(adjust=0.2)+ylim(0,1000)+guides(fill=FALSE)#adjust调整分辨率,默认为1,值越低,分辨率越高
#坐标变换
ggplot(my_data, aes(x=type, y=size, fill=type))+geom_violin()+scale_y_log10()
密度图
ggplot(my_data, aes(x=size, fill=type))+geom_density()+xlim(0,500)
ggplot(my_data, aes(x=size, fill=type))+geom_density(position = "stack")+xlim(0,500)
ggplot(my_data, aes(x=size, fill=type))+geom_density(alpha=0.5)+xlim(0,500)
#图形分面
ggplot(my_data, aes(x=size, fill=type))+geom_density()+xlim(0,500)+facet_grid(type~.)
点状图
ggplot(my_data, aes(x=size, fill=type))+geom_dotplot()
线图
time_cource <- read.csv("time_course_data.txt", sep = ",", quote = '', stringsAsFactors = TRUE, header = TRUE)
head(time_cource)
## seconds value sample
## 1 0 0.00 A
## 2 1 5.97 A
## 3 2 13.42 A
## 4 3 56.08 A
## 5 4 98.04 A
## 6 5 27.11 A
ggplot(time_cource, aes(x=seconds, y=value, color=sample))+geom_line()
ggplot(time_cource, aes(x=seconds, y=value, color=sample))+geom_line(size=3)
饼图
type_counts <- summary(my_data$type)
type_counts
## Insertion Deletion Expansion Contraction
## 3141 2256 2476 1674
library(RColorBrewer)
pie(type_counts, col = brewer.pal(length(type_counts), "Set1"))
韦恩图
这里利用四个基因集
listA <- read.csv("genes_list_A.txt", header = FALSE)
A <- listA$V1
listB <- read.csv("genes_list_B.txt", header = FALSE)
B <- listB$V1
listC <- read.csv("genes_list_C.txt", header = FALSE)
C <- listC$V1
listD <- read.csv("genes_list_D.txt", header = FALSE)
D <- listD$V1
library(VennDiagram)
#注意这个包绘制的图只能直接保存在文件中,无法实时显示
venn.diagram(list(A=A, B=B, C=C, D=D), fill=c("yellow","red","cyan","forestgreen"), cex=1.5, filename = "Venn_diagram_genes_4.png")
SessionInfo
sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 16299)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Chinese (Simplified)_China.936
## [2] LC_CTYPE=Chinese (Simplified)_China.936
## [3] LC_MONETARY=Chinese (Simplified)_China.936
## [4] LC_NUMERIC=C
## [5] LC_TIME=Chinese (Simplified)_China.936
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] VennDiagram_1.6.18 futile.logger_1.4.3 RColorBrewer_1.1-2
## [4] forcats_0.2.0 stringr_1.2.0 dplyr_0.7.4
## [7] purrr_0.2.4 readr_1.1.1 tidyr_0.7.2
## [10] tibble_1.4.2 ggplot2_2.2.1.9000 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] reshape2_1.4.3 haven_1.1.1 lattice_0.20-35
## [4] colorspace_1.3-2 htmltools_0.3.6 yaml_2.1.16
## [7] rlang_0.1.6 pillar_1.1.0 foreign_0.8-69
## [10] glue_1.2.0 lambda.r_1.2 modelr_0.1.1
## [13] readxl_1.0.0 bindrcpp_0.2 bindr_0.1
## [16] plyr_1.8.4 munsell_0.4.3 gtable_0.2.0
## [19] cellranger_1.1.0 rvest_0.3.2 psych_1.7.8
## [22] evaluate_0.10.1 labeling_0.3 knitr_1.18
## [25] parallel_3.4.3 broom_0.4.3 Rcpp_0.12.15
## [28] scales_0.5.0.9000 backports_1.1.2 jsonlite_1.5
## [31] mnormt_1.5-5 hms_0.4.1 digest_0.6.14
## [34] stringi_1.1.6 rprojroot_1.3-2 cli_1.0.0
## [37] tools_3.4.3 magrittr_1.5 lazyeval_0.2.1
## [40] futile.options_1.0.0 crayon_1.3.4 pkgconfig_2.0.1
## [43] xml2_1.2.0 lubridate_1.7.1 assertthat_0.2.0
## [46] rmarkdown_1.8 httr_1.3.1 rstudioapi_0.7
## [49] R6_2.2.2 nlme_3.1-131 compiler_3.4.3