suppressPackageStartupMessages({
library(tidyverse)
library(rtracklayer)
library(ggrepel)
library(biomaRt)
library(readxl)
library(cowplot)
library(paletteer)
})
source("code/fisher_overlap.R")
Figure 3 - RBP Enrichments
Load Data
<- read_tsv("output/tables/TableS3_v3.tsv.gz") tableS3.gene
Rows: 102319 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (5): isoform_id, gene_id, gene_name, condition_1, condition_2
dbl (9): DTU_dIF, DTU_pval, DTU_qval, DTE_log2FC, DTE_pval, DTE_qval, DGE_lo...
lgl (3): DTU, DTE, DGE
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
RBP Enrichments
Celine’s RBP Targets
= read.csv("data/RBP_Data/CSVs/RBP_targets_v5.csv", header=TRUE);
brainRBPs = dplyr::select(brainRBPs, -c(MGI.symbol, ENSMUSG))
brainRBPs = read.csv("data/RBP_Data/CSVs/RBP_targets_ENCODE.csv", header=TRUE);
encodeRBPs = encodeRBPs %>% filter(cell.type=="HepG2") %>% rename("hgnc_symbol"="HGNC.symbol", "ensembl_gene_id"="ENSG")
encodeRBPs
= rbind(brainRBPs,encodeRBPs)
rbp_targets
= useMart("ENSEMBL_MART_ENSEMBL","mmusculus_gene_ensembl")
mart = listFilters(mart); a = listAttributes(mart)
f = c("ensembl_gene_id", "external_gene_name", "hsapiens_homolog_ensembl_gene", "hsapiens_homolog_associated_gene_name","hsapiens_homolog_orthology_type")
featuresToGet = getBM(attributes = featuresToGet,mart = mart)
mouseHumanHomologs
= mouseHumanHomologs %>% as_tibble() %>% filter(hsapiens_homolog_orthology_type == "ortholog_one2one") %>% dplyr::select("hsapiens_homolog_ensembl_gene") %>% pull() human_mouse_bg
= list("DTU"= tableS3.gene %>% filter(DTU) %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull(),
genesets "DTE" = tableS3.gene %>% filter(DTE) %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull(),
"DGE" = tableS3.gene %>% filter(DGE) %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull(),
"DTUnotDGE" = tableS3.gene %>% filter(DTU,DGE_pval>.05) %>% mutate(gene_id = substr(gene_id,1,15)) %>%
::select(gene_id) %>% pull())
dplyr
= tableS3.gene %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull() %>% unique()
DTU.bg
= data.frame()
df_fisher for(i in 1:length(genesets)) {
for(this_dataset in unique(na.omit(rbp_targets$dataset.id))) {
= rbp_targets %>% filter(dataset.id == this_dataset) %>% mutate(target = paste0(RBP, "_", data.type, "_", cell.type)) %>% dplyr::select(target) %>% unique() %>% pull()
this_rbp = rbp_targets %>% filter(dataset.id == this_dataset) %>% dplyr::select(ENSG) %>% pull()
target_genes
if(grepl("Human",this_rbp)) {
= ORA(genesets[[i]], target_genes, DTU.bg, DTU.bg)
this_or else {
} = ORA(genesets[[i]], target_genes, DTU.bg, human_mouse_bg)
this_or
}= rbind(df_fisher, data.frame(set = names(genesets)[[i]], dataset = this_dataset, target = this_rbp, t(this_or)))
df_fisher
}
}
$OR = as.numeric(df_fisher$OR)
df_fisher$Fisher.p[df_fisher$OR<1] = 1
df_fisher$Fisher.p = p.adjust(as.numeric(df_fisher$Fisher.p),'fdr')
df_fisher
= read_excel("data/RBP_Data/curatedRBPs_order.xlsx") %>% as_tibble()
order.brainRBPs = read_excel("data/RBP_Data/ENCODE_vanNostrand_NatMeth2016_Fig2a_order.xlsx", sheet=2);
order.Encode.TarReg = order.Encode.TarReg %>% filter(name %in% df_fisher$target)
order.Encode.TarReg = rbind(order.brainRBPs, order.Encode.TarReg)
order
$target = factor(df_fisher$target, levels=order$name)
df_fisher$org = "Mouse"; df_fisher$org[grep("Human",df_fisher$target)] = "Human"
df_fisher$data.type = rbp_targets$data.type[match(df_fisher$dataset, rbp_targets$dataset.id)]
df_fisher$cell.type = rbp_targets$cell.type[match(df_fisher$dataset, rbp_targets$dataset.id)]
df_fisher$target.region = order$target.region[match(df_fisher$target, order$name)]
df_fisher$label = signif(df_fisher$OR,1)
df_fisher$label[df_fisher$Fisher.p>.05] = '' df_fisher
Figure 3H
.1=ggplot(df_fisher %>% filter(set=="DTU"),aes(x=target, y= -log10(Fisher.p), fill=OR)) +
Fig3Hgeom_bar(stat='identity',position = position_dodge2()) + theme_bw() +
geom_hline(yintercept = 1,lty='dashed',size=0.5,color='red') + labs(y='Enrichment\n(-log10 q-value)',x='') +
theme(axis.text.x = element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_text(size=5), axis.title.y=element_text(size=7),
legend.key.size=unit(0.3,'cm'),
legend.text=element_text(size=4),
legend.title=element_text(size=6),
plot.margin=unit(c(5,5,0,5),"pt"), legend.position=c(0.95,0.75),
panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2))
.2=ggplot(df_fisher %>% filter(set=="DTU"),aes(x=target, label=target.region)) +
Fig3Hgeom_tile(aes(y=factor(1),fill=target.region)) +
geom_point(aes(y=factor(1), shape=data.type),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) +
scale_x_discrete(labels=sapply(strsplit(levels(df_fisher$target), "_"), "[[",1)) +
theme_bw() +
theme(axis.text.y = element_blank(),
axis.ticks.y=element_blank(),
axis.text.x = element_text(angle=45,vjust=1, hjust=1, size=3),
legend.key.size=unit(0.25,'cm'), legend.text=element_text(size=3), legend.title=element_text(size=5),
plot.margin=unit(c(-20,5,5,5),"pt"), legend.position=c(0.5,-6.8), legend.box="horizontal", legend.direction="horizontal",
panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2)) +
labs(x='', y='') + paletteer::scale_fill_paletteer_d("rcartocolor::Vivid") + guides(fill=guide_legend(order=1))
plot_grid(Fig3H.1,Fig3H.2, align="v", ncol=1, axis="lr", rel_heights=c(5,1))
pdf(file="output/figures/Fig3/Fig3H.2_byTargetRegion.pdf",width = 10,height=2.5)
plot_grid(Fig3H.1,Fig3H.2, align="v", ncol=1, axis="lr", rel_heights=c(4,1))
dev.off()
quartz_off_screen
2
Figure S4B
.1 = ggplot(df_fisher,aes(x=factor(target, levels=rev(levels(target))), y= -log10(Fisher.p), fill=OR)) +
FigS4B# scale_x_discrete(labels=df_fisher$RBP) +
geom_bar(stat='identity',position = position_dodge2()) + coord_flip() + theme_bw() + facet_grid(~set) +
geom_hline(yintercept = 1,lty=2,color='red') + labs(y='Enrichment (-log10 q-value)',x='') +
theme(axis.text.y = element_blank(), axis.ticks.y=element_blank(), axis.text.x=element_text(size=5), axis.title.x=element_text(size=7),
panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.line=element_line(color="black", size=0.2),
plot.margin=unit(c(5,5,5,-18),"pt"),
legend.text=element_text(size=5), legend.title=element_text(size=7), legend.key.size=unit(0.4,'cm'),
panel.spacing=unit(-0.1, "lines")) +
labs(x='', y='')
.2 = ggplot(df_fisher %>% filter(set=="DTU"),aes(x=factor(target, levels=rev(levels(target))), label=target.region)) +
FigS4B# geom_tile(aes(y=factor(1),fill=data.type)) +
geom_tile(aes(y=factor(1),fill=target.region)) +
# geom_point(aes(y=factor(1), shape=target.region),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) +
geom_point(aes(y=factor(1), shape=data.type),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) +
# scale_x_discrete(labels=df_fisher$RBP) +
scale_x_discrete(labels=sapply(strsplit(rev(levels(df_fisher$target)), "_"), "[[",1)) +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
axis.text.y = element_text(angle=0,vjust=1, hjust=1, size=4),
legend.position="none",
plot.margin=unit(c(5,0,5,5),"pt"),
panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2)) +
labs(x='', y='') + paletteer::scale_fill_paletteer_d("rcartocolor::Vivid") + guides(fill=guide_legend(order=1)) +
coord_flip()
= get_legend(FigS4B.2 + theme(legend.position=c(2.7,0.8), legend.box="horizontal", legend.direction="horizontal",
legend legend.text=element_text(size=4), legend.title=element_text(size=5), legend.key.size=unit(0.25,'cm')))
plot_grid(FigS4B.2,FigS4B.1, legend, align="h", nrow=2, axis="bt", rel_widths=c(1,4.5), rel_heights=c(10,1))
pdf("output/figures/supplement/FigS4B_wEncode_byTargetRegion.pdf", height=10, width=5)
plot_grid(FigS4B.2,FigS4B.1, legend, align="h", nrow=2, axis="bt", rel_widths=c(1,4.5), rel_heights=c(10,1))
dev.off()
quartz_off_screen
2
FigureS4C
= list("DTU.up"= tableS3.gene %>% filter(DTU&DTU_dIF>0) %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull(),
genesets "DTU.down"= tableS3.gene %>% filter(DTU&DTU_dIF<0) %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull(),
"DTE.up" = tableS3.gene %>% filter(DTE&DTE_log2FC>0) %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull(),
"DTE.down" = tableS3.gene %>% filter(DTE&DTE_log2FC<0) %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull(),
"DGE.up" = tableS3.gene %>% filter(DGE&DGE_log2FC>0) %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull(),
"DGE.down" = tableS3.gene %>% filter(DGE&DGE_log2FC<0) %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull()
)
= tableS3.gene %>% mutate(gene_id = substr(gene_id,1,15)) %>% dplyr::select(gene_id) %>% pull() %>% unique()
DTU.bg
= data.frame()
df_fisher for(i in 1:length(genesets)) {
for(this_dataset in unique(na.omit(rbp_targets$dataset.id))) {
= rbp_targets %>% filter(dataset.id == this_dataset) %>% mutate(target = paste0(RBP, "_", data.type, "_", cell.type)) %>% dplyr::select(target) %>% unique() %>% pull()
this_rbp = rbp_targets %>% filter(dataset.id == this_dataset) %>% dplyr::select(ENSG) %>% pull()
target_genes
if(grepl("Human",this_rbp)) {
= ORA(genesets[[i]], target_genes, DTU.bg, DTU.bg)
this_or else {
} = ORA(genesets[[i]], target_genes, DTU.bg, human_mouse_bg)
this_or
}= rbind(df_fisher, data.frame(set = names(genesets)[[i]], dataset = this_dataset, target = this_rbp, t(this_or)))
df_fisher
}
}
$OR = as.numeric(df_fisher$OR)
df_fisher$Fisher.p[df_fisher$OR<1] = 1
df_fisher$Fisher.p = p.adjust(as.numeric(df_fisher$Fisher.p),'fdr')
df_fisher
= read_excel("data/RBP_Data/curatedRBPs_order.xlsx") %>% as_tibble()
order.brainRBPs = read_excel("data/RBP_Data/ENCODE_vanNostrand_NatMeth2016_Fig2a_order.xlsx", sheet=2)
order.Encode.TarReg = order.Encode.TarReg %>% filter(name %in% df_fisher$target)
order.Encode.TarReg = rbind(order.brainRBPs, order.Encode.TarReg)
order
$target = factor(df_fisher$target, levels=order$name)
df_fisher$org = "Mouse"; df_fisher$org[grep("Human",df_fisher$target)] = "Human"
df_fisher$data.type = rbp_targets$data.type[match(df_fisher$dataset, rbp_targets$dataset.id)]
df_fisher$cell.type = rbp_targets$cell.type[match(df_fisher$dataset, rbp_targets$dataset.id)]
df_fisher$target.region = order$target.region[match(df_fisher$target, order$name)]
df_fisher$label = signif(df_fisher$OR,1)
df_fisher$label[df_fisher$Fisher.p>.05] = '' df_fisher
= ggplot(df_fisher %>% filter(target %in% order.brainRBPs$name),aes(x=factor(target, levels=rev(levels(target))), y= -log10(Fisher.p), fill=OR)) +
FigS4C geom_bar(stat='identity',position = position_dodge2()) + coord_flip() + theme_bw() + facet_grid(~set) +
geom_hline(yintercept = 1,lty=2,color='red') + labs(y='Enrichment (-log10 q-value)',x='') +
theme(axis.text.y=element_text(size=5), axis.text.x=element_text(size=5), axis.title.x=element_text(size=7),
axis.line=element_line(color="black", size=0.2),
plot.margin=unit(c(5,5,5,-18),"pt"),
legend.text=element_text(size=5), legend.title=element_text(size=7), legend.key.size=unit(0.4,'cm'),
panel.spacing=unit(-0.1, "lines")) +
labs(x='', y='')
FigS4C
pdf("output/figures/supplement/FigS4C_brainRBPs_splitUpDown.pdf", height=4, width=8)
FigS4Cdev.off()
quartz_off_screen
2
sessionInfo()
R version 4.1.3 (2022-03-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur/Monterey 10.16
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] paletteer_1.5.0 cowplot_1.1.1 readxl_1.4.1
[4] biomaRt_2.50.3 ggrepel_0.9.2 rtracklayer_1.54.0
[7] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 IRanges_2.28.0
[10] S4Vectors_0.32.4 BiocGenerics_0.40.0 forcats_0.5.2
[13] stringr_1.5.0 dplyr_1.0.10 purrr_0.3.4
[16] readr_2.1.2 tidyr_1.2.0 tibble_3.1.8
[19] ggplot2_3.3.6 tidyverse_1.3.2
loaded via a namespace (and not attached):
[1] googledrive_2.0.0 colorspace_2.0-3
[3] rjson_0.2.21 ellipsis_0.3.2
[5] XVector_0.34.0 fs_1.5.2
[7] rstudioapi_0.14 farver_2.1.1
[9] bit64_4.0.5 AnnotationDbi_1.56.2
[11] fansi_1.0.3 lubridate_1.8.0
[13] xml2_1.3.3 codetools_0.2-18
[15] cachem_1.0.6 knitr_1.40
[17] jsonlite_1.8.0 Rsamtools_2.10.0
[19] broom_1.0.1 dbplyr_2.2.1
[21] png_0.1-7 compiler_4.1.3
[23] httr_1.4.4 backports_1.4.1
[25] assertthat_0.2.1 Matrix_1.4-1
[27] fastmap_1.1.0 gargle_1.2.0
[29] cli_3.4.1 htmltools_0.5.3
[31] prettyunits_1.1.1 tools_4.1.3
[33] gtable_0.3.1 glue_1.6.2
[35] GenomeInfoDbData_1.2.7 rappdirs_0.3.3
[37] Rcpp_1.0.9 Biobase_2.54.0
[39] cellranger_1.1.0 vctrs_0.5.1
[41] Biostrings_2.62.0 xfun_0.32
[43] rvest_1.0.3 lifecycle_1.0.3
[45] restfulr_0.0.15 XML_3.99-0.10
[47] googlesheets4_1.0.1 zlibbioc_1.40.0
[49] scales_1.2.1 vroom_1.5.7
[51] hms_1.1.2 MatrixGenerics_1.6.0
[53] parallel_4.1.3 SummarizedExperiment_1.24.0
[55] rematch2_2.1.2 prismatic_1.1.1
[57] yaml_2.3.5 curl_4.3.2
[59] memoise_2.0.1 stringi_1.7.8
[61] RSQLite_2.2.16 BiocIO_1.4.0
[63] filelock_1.0.2 BiocParallel_1.28.3
[65] rlang_1.0.6 pkgconfig_2.0.3
[67] matrixStats_0.62.0 bitops_1.0-7
[69] evaluate_0.16 lattice_0.20-45
[71] labeling_0.4.2 GenomicAlignments_1.30.0
[73] htmlwidgets_1.5.4 bit_4.0.4
[75] tidyselect_1.2.0 magrittr_2.0.3
[77] R6_2.5.1 generics_0.1.3
[79] DelayedArray_0.20.0 DBI_1.1.3
[81] pillar_1.8.1 haven_2.5.1
[83] withr_2.5.0 KEGGREST_1.34.0
[85] RCurl_1.98-1.8 modelr_0.1.9
[87] crayon_1.5.1 utf8_1.2.2
[89] BiocFileCache_2.2.1 tzdb_0.3.0
[91] rmarkdown_2.16 progress_1.2.2
[93] grid_4.1.3 blob_1.2.3
[95] reprex_2.0.2 digest_0.6.29
[97] munsell_0.5.0