Lesson 06 for Plotting in R for Biologists

veendiagram

前面几节课绘制的图形只有条形图,但是我们科研过程中需要绘制各种各样的图形来展现我们的数据,这节课就主要讲一下利用ggplot2绘制各种图形,这也是ggplot2的魔力所在。相同的数据可以通过不同类型的图形来可视化。本节课主要将绘制以下几类图形:

  • 条形图
  • 直方图
  • 散点图
  • 箱线图
  • 小提琴图
  • 密度图
  • 点状图
  • 线图
  • 饼图
  • 韦恩图

数据加载清洗

library(tidyverse)
theme_set(theme_gray(base_size = 18))
my_data <- read.csv("variants_from_assembly.bed", sep = "\t", quote = '', stringsAsFactors = FALSE)
names(my_data) <- c("chrom","start","stop","name","size","strand","type","ref.dist","query.dist")
head(my_data)
##   chrom     start      stop name size strand      type ref.dist query.dist
## 1     6 102958468 102958469  SV2  317      + Insertion      -14        303
## 2     6 102741692 102741693  SV3  130      +  Deletion      130          0
## 3     6 102283759 102283760  SV4 1271      + Insertion      -12       1259
## 4     6 101194032 101194033  SV5 2864      + Insertion      -13       2851
## 5     6 101056644 101056645  SV6  265      + Insertion        0        265
## 6     6 100407499 100407500  SV7  334      + Insertion        0        334
summary(my_data$chrom)
##    Length     Class      Mode 
##      9555 character character
#数据过滤
my_data <- my_data[my_data$chrom %in% c(seq(1:22), "X", "Y","MT"), ]
#染色体排序
my_data$chrom <- factor(my_data$chrom, levels = c(seq(1:22), "X", "Y","MT"))
#类型(type)排序
my_data$type <- factor(my_data$type, levels = c("Insertion","Deletion","Expansion","Contraction"))
head(my_data)
##   chrom     start      stop name size strand      type ref.dist query.dist
## 1     6 102958468 102958469  SV2  317      + Insertion      -14        303
## 2     6 102741692 102741693  SV3  130      +  Deletion      130          0
## 3     6 102283759 102283760  SV4 1271      + Insertion      -12       1259
## 4     6 101194032 101194033  SV5 2864      + Insertion      -13       2851
## 5     6 101056644 101056645  SV6  265      + Insertion        0        265
## 6     6 100407499 100407500  SV7  334      + Insertion        0        334

可视化

条形图

ggplot(my_data, aes(x=chrom, fill=type))+geom_bar()

mark

直方图

ggplot(my_data, aes(x=size, fill=type))+geom_histogram()

mark

ggplot(my_data, aes(x=size, fill=type))+geom_histogram()+xlim(0,500)

mark

ggplot(my_data, aes(x=size, fill=type))+geom_histogram(binwidth = 5)+xlim(0,500)

mark

散点图

#将type映射给颜色
ggplot(my_data, aes(x=ref.dist, y=query.dist, color=type))+geom_point()

mark

ggplot(my_data, aes(x=ref.dist, y=query.dist, color=type))+geom_point()+xlim(-500,500)+ylim(-500,500)

mark

#将数值型变量size映射给颜色
ggplot(my_data, aes(x=ref.dist, y=query.dist, color=size))+geom_point()+xlim(-500,500)+ylim(-500,500)

mark

ggplot(my_data, aes(x=ref.dist, y=query.dist,color=size))+geom_point()+xlim(-500,500)+ylim(-500,500)+scale_color_gradient(limits=c(0, 500))

mark

箱线图

ggplot(my_data, aes(type, y=size))+geom_boxplot()

mark

ggplot(my_data, aes(x=type, y=size, fill=type))+geom_boxplot()

mark

ggplot(my_data, aes(x=type, y=size, fill=type))+geom_boxplot()+coord_flip()

mark

小提琴图

ggplot(my_data, aes(x=type, y=size, fill=type))+geom_violin()+ylim(0,1000)+guides(fill=FALSE)

mark

ggplot(my_data, aes(x=type, y=size, fill=type))+geom_violin(adjust=0.2)+ylim(0,1000)+guides(fill=FALSE)#adjust调整分辨率,默认为1,值越低,分辨率越高

mark

#坐标变换
ggplot(my_data, aes(x=type, y=size, fill=type))+geom_violin()+scale_y_log10()

mark

密度图

ggplot(my_data, aes(x=size, fill=type))+geom_density()+xlim(0,500)

mark

ggplot(my_data, aes(x=size, fill=type))+geom_density(position = "stack")+xlim(0,500)

mark

ggplot(my_data, aes(x=size, fill=type))+geom_density(alpha=0.5)+xlim(0,500)

mark

#图形分面
ggplot(my_data, aes(x=size, fill=type))+geom_density()+xlim(0,500)+facet_grid(type~.)

mark

点状图

ggplot(my_data, aes(x=size, fill=type))+geom_dotplot()

mark

线图

time_cource <- read.csv("time_course_data.txt", sep = ",", quote = '', stringsAsFactors = TRUE, header = TRUE)
head(time_cource)
##   seconds value sample
## 1       0  0.00      A
## 2       1  5.97      A
## 3       2 13.42      A
## 4       3 56.08      A
## 5       4 98.04      A
## 6       5 27.11      A
ggplot(time_cource, aes(x=seconds, y=value, color=sample))+geom_line()

mark

ggplot(time_cource, aes(x=seconds, y=value, color=sample))+geom_line(size=3)

mark

饼图

type_counts <- summary(my_data$type)
type_counts
##   Insertion    Deletion   Expansion Contraction 
##        3141        2256        2476        1674
library(RColorBrewer)
pie(type_counts, col = brewer.pal(length(type_counts), "Set1"))

mark

韦恩图

这里利用四个基因集

listA <- read.csv("genes_list_A.txt", header = FALSE)
A <- listA$V1
listB <- read.csv("genes_list_B.txt", header = FALSE)
B <- listB$V1
listC <- read.csv("genes_list_C.txt", header = FALSE)
C <- listC$V1
listD <- read.csv("genes_list_D.txt", header = FALSE)
D <- listD$V1
library(VennDiagram)
#注意这个包绘制的图只能直接保存在文件中,无法实时显示
venn.diagram(list(A=A, B=B, C=C, D=D), fill=c("yellow","red","cyan","forestgreen"), cex=1.5, filename = "Venn_diagram_genes_4.png")

mark

SessionInfo

sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 16299)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Chinese (Simplified)_China.936 
## [2] LC_CTYPE=Chinese (Simplified)_China.936   
## [3] LC_MONETARY=Chinese (Simplified)_China.936
## [4] LC_NUMERIC=C                              
## [5] LC_TIME=Chinese (Simplified)_China.936    
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] VennDiagram_1.6.18  futile.logger_1.4.3 RColorBrewer_1.1-2 
##  [4] forcats_0.2.0       stringr_1.2.0       dplyr_0.7.4        
##  [7] purrr_0.2.4         readr_1.1.1         tidyr_0.7.2        
## [10] tibble_1.4.2        ggplot2_2.2.1.9000  tidyverse_1.2.1    
## 
## loaded via a namespace (and not attached):
##  [1] reshape2_1.4.3       haven_1.1.1          lattice_0.20-35     
##  [4] colorspace_1.3-2     htmltools_0.3.6      yaml_2.1.16         
##  [7] rlang_0.1.6          pillar_1.1.0         foreign_0.8-69      
## [10] glue_1.2.0           lambda.r_1.2         modelr_0.1.1        
## [13] readxl_1.0.0         bindrcpp_0.2         bindr_0.1           
## [16] plyr_1.8.4           munsell_0.4.3        gtable_0.2.0        
## [19] cellranger_1.1.0     rvest_0.3.2          psych_1.7.8         
## [22] evaluate_0.10.1      labeling_0.3         knitr_1.18          
## [25] parallel_3.4.3       broom_0.4.3          Rcpp_0.12.15        
## [28] scales_0.5.0.9000    backports_1.1.2      jsonlite_1.5        
## [31] mnormt_1.5-5         hms_0.4.1            digest_0.6.14       
## [34] stringi_1.1.6        rprojroot_1.3-2      cli_1.0.0           
## [37] tools_3.4.3          magrittr_1.5         lazyeval_0.2.1      
## [40] futile.options_1.0.0 crayon_1.3.4         pkgconfig_2.0.1     
## [43] xml2_1.2.0           lubridate_1.7.1      assertthat_0.2.0    
## [46] rmarkdown_1.8        httr_1.3.1           rstudioapi_0.7      
## [49] R6_2.2.2             nlme_3.1-131         compiler_3.4.3
Researcher

I am a PhD student of Crop Genetics and Breeding at the Zhejiang University Crop Science Lab. My research interests covers a range of issues:Population Genetics Evolution and Ecotype Divergence Analysis of Oilseed Rape, Genome-wide Association Study (GWAS) of Agronomic Traits.

comments powered by Disqus