Figure 4 - Networks Enrichments

Author

Celine Vuong

suppressPackageStartupMessages({
    library(tidyverse)
    library(edgeR)
    library(WGCNA)
    library(biomaRt)
    library(openxlsx)
    library(readxl)
    library(gprofiler2)
    library(gridExtra)
    library(cowplot)
    library(paletteer)
})

source("code/fisher_overlap.R")

colorVector = c(
  "Known" = "#009E73",
  "ISM"   = "#0072B2",
  "ISM_Prefix" = "#005996",
  "ISM_Suffix" = "#378bcc",
  "NIC"   = "#D55E00",
  "NNC"   = "#E69F00",
  "Other" = "#000000"
)
colorVector_ismSplit = colorVector[-2]

Load Data

cts = read_tsv("data/cp_vz_0.75_min_7_recovery_talon_abundance_filtered.tsv.gz")

Rows: 214516 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr  (7): annot_gene_id, annot_transcript_id, annot_gene_name, annot_transcr...
dbl (28): gene_ID, transcript_ID, n_exons, length, 209_1_VZ, 209_2_VZ, 209_3...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

load("ref/EWCE/CellTypeData_DamonNeuralFetalOnly.rda")
celltypemarkers <- openxlsx::read.xlsx('https://www.cell.com/cms/10.1016/j.neuron.2019.06.011/attachment/508ae008-b926-487a-b871-844a12acc1f8/mmc5', sheet='Cluster enriched genes') %>% as_tibble()
celltypemarkers_tableS5 = openxlsx::read.xlsx('https://ars.els-cdn.com/content/image/1-s2.0-S0896627319305616-mmc6.xlsx',sheet=2)
celltypemarkers.bg = read.csv("ref/polioudakis_neuron2020/single_cell_background_MJG221228.csv") %>% dplyr::select(ensembl_gene_id) %>% pull()
celltypemarkers.bg = unique(c(celltypemarkers.bg, celltypemarkers$Ensembl))

datExpr.isoCounts = as.data.frame(cts[,12:35])
rownames(datExpr.isoCounts) = cts$annot_transcript_id
datMeta = data.frame(sample=colnames(datExpr.isoCounts))
datMeta$Region = substr(datMeta$sample, 7,9)
datMeta$Subject = substr(datMeta$sample, 1,3)
datMeta$batch = substr(datMeta$sample, 5,5)
datAnno = data.frame(gene_id = cts$annot_gene_id, transcript_id=cts$annot_transcript_id,
                     transcript_name=cts$annot_transcript_name, length=cts$length,
                     gene_name = cts$annot_gene_name, ensg = substr(cts$annot_gene_id,1,15), novelty=cts$transcript_novelty)

brainRBPs = read.csv("data/RBP_Data/CSVs/RBP_targets_v5.csv", header=TRUE)
brainRBPs = dplyr::select(brainRBPs, -c(MGI.symbol, ENSMUSG))
encodeRBPs = read.csv("data/RBP_Data/CSVs/RBP_targets_ENCODE.csv", header=TRUE)
encodeRBPs = encodeRBPs %>% filter(cell.type=="HepG2") %>% rename("HGNC.symbol"="hgnc_symbol", "ENSG"="ensembl_gene_id")

rbp_targets = rbind(brainRBPs,encodeRBPs)
rbp_targets$regulation[is.na(rbp_targets$regulation)]=""

mart = useMart("ENSEMBL_MART_ENSEMBL","mmusculus_gene_ensembl")
f = listFilters(mart); a = listAttributes(mart)
featuresToGet = c("ensembl_gene_id", "external_gene_name", "hsapiens_homolog_ensembl_gene", "hsapiens_homolog_associated_gene_name","hsapiens_homolog_orthology_type")
mouseHumanHomologs = getBM(attributes = featuresToGet,mart = mart)

human_mouse_bg = mouseHumanHomologs %>% as_tibble() %>% filter(hsapiens_homolog_orthology_type == "ortholog_one2one") %>% dplyr::select("hsapiens_homolog_ensembl_gene") %>% pull()

## Load 3 Networks

datExpr.isoFr = readRDS("data/working/WGCNA/final/datExpr.localIF_batchCorrected_103k.rds")
net.isoFr = readRDS("data/working/WGCNA/final/WGCNA_isoformFraction_top103k_signed_sft14_net.rds")
net.isoFr$colors = labels2colors(net.isoFr$cut2$labels)
names(net.isoFr$colors) = names(net.isoFr$cut2$labels)
net.isoFr$module.number = net.isoFr$cut2$labels
net.isoFr$MEs = moduleEigengenes(t(datExpr.isoFr), colors=net.isoFr$colors)
net.isoFr$kMEtable = signedKME(t(datExpr.isoFr), datME =net.isoFr$MEs$eigengenes,corFnc = "bicor")

Warning in bicor(datExpr, datME, , use = "p"): bicor: zero MAD in variable 'x'.
Pearson correlation was used for individual columns with zero (or missing) MAD.

datExpr.isoExpr = readRDS(file='data/working/WGCNA/final/datExpr.isoExpr_batchCorrected_92k.rds')
net.isoExpr = readRDS('data/working/WGCNA/final/WGCNA_isoExpression_top92k_signed_sft9_net.rds')
net.isoExpr$colors = labels2colors(net.isoExpr$cut2$labels)
names(net.isoExpr$colors) = names(net.isoExpr$cut2$labels)
net.isoExpr$module.number = net.isoExpr$cut2$labels
net.isoExpr$MEs = moduleEigengenes(t(datExpr.isoExpr), colors=net.isoExpr$colors)
net.isoExpr$kMEtable = signedKME(t(datExpr.isoExpr), datME =net.isoExpr$MEs$eigengenes,corFnc = "bicor")

datExpr.geneExpr = readRDS(file='data/working/WGCNA/final/datExpr.geneExpr_batchCorrected_16k.rds')
net.geneExpr = readRDS(file='data/working/WGCNA/final/WGCNA_geneExpression_top16k_signed_sft14_net.rds')
net.geneExpr$colors = labels2colors(net.geneExpr$cut2$labels)
names(net.geneExpr$colors) = names(net.geneExpr$cut2$labels)
net.geneExpr$module.number = net.geneExpr$cut2$labels
net.geneExpr$MEs = moduleEigengenes(t(datExpr.geneExpr), colors=net.geneExpr$colors)
net.geneExpr$kMEtable = signedKME(t(datExpr.geneExpr), datME =net.geneExpr$MEs$eigengenes,corFnc = "bicor")

Network Dendrogram Plots

Fig4A

this_tree = net.isoFr$dendrograms[[1]]
this_anno = datAnno[match(rownames(datExpr.isoFr), datAnno$transcript_id),]
these_mods = data.frame(IsoUsage=net.isoFr$colors)
these_mods$isoExpr = net.isoExpr$colors[this_anno$transcript_id]
these_mods$geneExpr = net.geneExpr$colors[this_anno$gene_id]

cell_anno = as.data.frame(matrix('white', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
  marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
  cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}

rbp.comp = openxlsx::read.xlsx("ref/RBPCompilation_Sundararaman_MolCell2016_TableS1.xlsx", 'Sheet2_1072_RBP_compilation')
these_mods$RBP = 'white'
these_mods$RBP[this_anno$gene_name %in% rbp.comp$Gene.Symbol] = 'black'

plotDendroAndColors(this_tree, colors=cbind(these_mods), dendroLabels = F, cex.colorLabels = 0.4)

pdf("output/figures/Fig4/Fig4A_IsoExprDendro_wRBPComp.pdf", width=6, height=3.5)
plotDendroAndColors(this_tree, colors=cbind(these_mods), dendroLabels = F, cex.colorLabels = 0.4)
dev.off()

quartz_off_screen 
                2

FigS5A

this_tree = net.geneExpr$dendrograms[[1]]
this_anno = datAnno[match(rownames(datExpr.geneExpr), datAnno$gene_id),]
these_mods = data.frame(geneExpr=net.geneExpr$colors)
these_mods$IsoExpr = net.isoExpr$colors[this_anno$transcript_id]
these_mods$IsoUsage = net.isoFr$colors[this_anno$transcript_id]

cell_anno = as.data.frame(matrix('grey', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
  marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
  cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}

rbp.comp = openxlsx::read.xlsx("ref/RBPCompilation_Sundararaman_MolCell2016_TableS1.xlsx", 'Sheet2_1072_RBP_compilation')
these_mods$RBP = 'white'
these_mods$RBP[this_anno$gene_name %in% rbp.comp$Gene.Symbol] = 'black'

plotDendroAndColors(this_tree, colors=cbind(these_mods,cell_anno), dendroLabels = F, cex.colorLabels = 0.4)

pdf("output/figures/supplement/FigS5A_Dendro_geneExpr_wCellAnno_wRBP.pdf", width=8,height=6)
plotDendroAndColors(this_tree, colors=cbind(these_mods,cell_anno), dendroLabels = F, cex.colorLabels = 0.8)
dev.off()

quartz_off_screen 
                2

FigS5B

this_tree = net.isoExpr$dendrograms[[1]]
this_anno = datAnno[match(rownames(datExpr.isoExpr), datAnno$transcript_id),]
these_mods = data.frame(isoExpr=net.isoExpr$colors)
these_mods$GeneExpr = net.geneExpr$colors[this_anno$gene_id]
these_mods$IsoUsage = net.isoFr$colors[this_anno$transcript_id]

cell_anno = as.data.frame(matrix('grey', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
  marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
  cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}

rbp.comp = openxlsx::read.xlsx("ref/RBPCompilation_Sundararaman_MolCell2016_TableS1.xlsx", 'Sheet2_1072_RBP_compilation')
these_mods$RBP = 'white'
these_mods$RBP[this_anno$gene_name %in% rbp.comp$Gene.Symbol] = 'black'

plotDendroAndColors(this_tree, colors=cbind(these_mods,cell_anno), dendroLabels = F, cex.colorLabels = 0.4)

pdf("output/figures/supplement/FigS5B_Dendro_isoExpr_wCellAnno_wRBP.pdf", width=8,height=6)
plotDendroAndColors(this_tree, colors=cbind(these_mods,cell_anno), dendroLabels = F, cex.colorLabels = 0.8)
dev.off()

quartz_off_screen 
                2

FigS5C

this_tree = net.isoFr$dendrograms[[1]]
this_anno = datAnno[match(rownames(datExpr.isoFr), datAnno$transcript_id),]
these_mods = data.frame(isoFr=net.isoFr$colors)
these_mods$GeneExpr = net.geneExpr$colors[this_anno$gene_id]
these_mods$IsoExpr = net.isoExpr$colors[this_anno$transcript_id]

cell_anno = as.data.frame(matrix('grey', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
  marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
  cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}

rbp.comp = openxlsx::read.xlsx("ref/RBPCompilation_Sundararaman_MolCell2016_TableS1.xlsx", 'Sheet2_1072_RBP_compilation')
these_mods$RBP = 'white'
these_mods$RBP[this_anno$gene_name %in% rbp.comp$Gene.Symbol] = 'black'

plotDendroAndColors(this_tree, colors=cbind(these_mods,cell_anno), dendroLabels = F, cex.colorLabels = 0.4)

pdf("output/figures/supplement/FigS5C_Dendro_isoUsage_wCellAnno_wRBP.pdf", width=8,height=6)
plotDendroAndColors(this_tree, colors=cbind(these_mods,cell_anno), dendroLabels = F, cex.colorLabels = 0.8)
dev.off()

quartz_off_screen 
                2

Compare Networks

Cell-type enrichment

Fisher’s exact

df_fisher.celltype = data.frame()
networks = list("geneExpr" = net.geneExpr$module.number[datAnno$gene_id], 
                "isoExpr" = net.isoExpr$module.number[datAnno$transcript_id], 
                "isoUsage" = net.isoFr$module.number[datAnno$transcript_id])

for(this_net in names(networks)) {
  all_mods = unique(na.omit(networks[[this_net]][networks[[this_net]] != 0]))
  
  for(this_mod in all_mods) {
    mod_genes = unique(na.omit(datAnno$ensg[networks[[this_net]]==this_mod]))
    mod_gene.bg = unique(na.omit(datAnno$ensg[!is.na(networks[[this_net]])]))
    
    for(this_cell in unique(celltypemarkers$Cluster)) {
      marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% 
        dplyr::select(Ensembl) %>% pull()
      enrichment = ORA(mod_genes, marker_genes, mod_gene.bg, celltypemarkers.bg)
      df_fisher.celltype = rbind(df_fisher.celltype, 
                                 data.frame(net = this_net, mod = this_mod, cell=this_cell,
                                            as.data.frame(t(enrichment))))
    }
  }
}

for(col in c("OR", "Fisher.p")) df_fisher.celltype[,col] = as.numeric(df_fisher.celltype[,col])
df_fisher.celltype$Fisher.p[df_fisher.celltype$OR<1] = 1
df_fisher.celltype$Fisher.q = p.adjust(df_fisher.celltype$Fisher.p,'fdr')

df_fisher.celltype$label = ""
df_fisher.celltype$label[df_fisher.celltype$Fisher.q<.05] = signif((df_fisher.celltype$OR[df_fisher.celltype$Fisher.q<.05]),1)

df_fisher.celltype$net = factor(df_fisher.celltype$net, levels=c("geneExpr", "isoExpr", "isoUsage"))

# Order cell types
order.celltype = c("vRG", "oRG", "PgS", "PgG2M", "IP", "ExN", "ExM", "ExM-U", "ExDp1", "ExDp2", "InMGE", "InCGE", "OPC", "End", "Per", "Mic")

# Order modules
geneExpr_order.celltype = c(2,21,9,16,15,1, # Progenitors
                            5,12,6,8,10, #Exc Neurons
                            4,11, #Exc Neurons 2
                            7,19,14, #In Neurons
                            3,13,17,18,20,22,23) # Other 
isoExpr_order.celltype = c(2,15,41,4,3,11,21, # Progenitors
                           12,19,23, # Sub progenitors
                           16, # ExN
                           1,30,6,20,14,38,5,8,46,34,49,45,31, # Neurons 
                           47,25,26, # Sub neurons
                           17,42,9,29, # ExM
                           35,40,53,27, # Sub neurons 2
                           48,39,13, # In neurons
                           7,10,18,22,24,28,32,33,36,37,43,44,50,51,52) # Other
isoUsage_order.celltype = c(3,12,11, # Progenitors to neurons
                            1,27, # Non-migrating
                            2,8,14, # Neurons
                            20,15,24,10,29, # Exc neurons
                            23, # In neurons
                            19, # OPC
                            4,5,6,7,9,13,16,17,18,21,22,25,26,28) # Other

df_fisher.celltype_geneExpr = df_fisher.celltype  %>% filter(net=="geneExpr")
df_fisher.celltype_geneExpr$mod = factor(df_fisher.celltype_geneExpr$mod, levels=geneExpr_order.celltype)
df_fisher.celltype_isoExpr = df_fisher.celltype  %>% filter(net=="isoExpr")
df_fisher.celltype_isoExpr$mod = factor(df_fisher.celltype_isoExpr$mod, levels=isoExpr_order.celltype)
df_fisher.celltype_isoUsage = df_fisher.celltype  %>% filter(net=="isoUsage")
df_fisher.celltype_isoUsage$mod = factor(df_fisher.celltype_isoUsage$mod, levels=isoUsage_order.celltype)

# Cell type aggregation
df_fisher.celltype = df_fisher.celltype %>% mutate(cell.type = case_when(cell %in% c('vRG', 'oRG', 'PgS', 'PgG2M', 'IP') ~ 'Progenitor',
                                             cell %in% c('ExN', 'ExM', 'ExM-U', 'ExDp1', 'ExDp2') ~ 'Exc Neuron',
                                             cell %in% c('InMGE', 'InCGE') ~ 'Inh Neuron',
                                             cell %in% c('OPC', 'End', 'Per', 'Mic') ~ 'Other'))

FigS6A

CellType_ColorBar = ggplot(df_fisher.celltype %>% filter(net=="geneExpr",mod==1),aes(x=factor(cell, levels=order.celltype))) + 
  geom_tile(aes(y=factor(1),fill=cell.type)) + 
  # geom_point(aes(y=factor(1), shape=data.type),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
  # scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) +
  theme_bw() +
  theme(axis.text.y = element_blank(), 
        axis.ticks.y=element_blank(),
        axis.text.x = element_text(angle=45,vjust=1, hjust=1, size=6), 
        legend.key.size=unit(0.3,'cm'), legend.text=element_text(size=6), legend.title=element_text(size=6), 
        plot.margin=unit(c(-21,5,5,5),"pt"), legend.position=c(0.5,-2.5), legend.box="horizontal", legend.direction="horizontal", 
        panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2)) + 
  labs(x='', y='') + paletteer::scale_fill_paletteer_d("rcartocolor::Vivid") + guides(fill=guide_legend(order=1, nrow=1))

plot_gE = ggplot(df_fisher.celltype_geneExpr,aes(y=factor(mod, levels=rev(levels(mod))), x=factor(cell, levels=order.celltype), label=label, fill=-log10(Fisher.q))) + 
  geom_tile() + facet_wrap(~net, scales='free') + geom_text(size=2.5)+ scale_fill_gradient(low='white',high='red') + 
  theme(axis.text.x = element_blank(), axis.text.y = element_text(size=6), axis.ticks.x=element_blank()) +
  labs(x='Cell Type', y='Module')

plot_grid(plot_gE,CellType_ColorBar, align="v", ncol=1, axis="lr", rel_heights=c(9,1))

pdf("output/figures/supplement/FigS6A_geneExpr_ModOrdered.pdf", height=6, width=6)
plot_grid(plot_gE,CellType_ColorBar, align="v", ncol=1, axis="lr", rel_heights=c(9,1))
dev.off()

quartz_off_screen 
                2

FigS6B

plot_iE = ggplot(df_fisher.celltype_isoExpr,aes(y=factor(mod, levels=rev(levels(mod))), x=factor(cell, levels=order.celltype), label=label, fill=-log10(Fisher.q))) + 
  geom_tile() + facet_wrap(~net, scales='free') + geom_text(size=2.5)+ scale_fill_gradient(low='white',high='red') + 
  theme(axis.text.x = element_blank(), axis.text.y = element_text(size=6), axis.ticks.x=element_blank()) +
  labs(x='Cell Type', y='Module')
plot_grid(plot_iE,CellType_ColorBar, align="v", ncol=1, axis="lr", rel_heights=c(9,1))

pdf("output/figures/supplement/FigS6B_isoExpr_ModOrdered.pdf", height=6, width=6)
plot_grid(plot_iE,CellType_ColorBar, align="v", ncol=1, axis="lr", rel_heights=c(9,1))
dev.off()

quartz_off_screen 
                2

FigS6C

plot_iU = ggplot(df_fisher.celltype_isoUsage,aes(y=factor(mod, levels=rev(levels(mod))), x=factor(cell, levels=order.celltype), label=label, fill=-log10(Fisher.q))) + 
  geom_tile() + facet_wrap(~net, scales='free') + geom_text(size=2.5)+ scale_fill_gradient(low='white',high='red') + 
  theme(axis.text.x = element_blank(), axis.text.y = element_text(size=6), axis.ticks.x=element_blank()) +
  labs(x='Cell Type', y='Module')
plot_grid(plot_iU,CellType_ColorBar, align="v", ncol=1, axis="lr", rel_heights=c(9,1))

pdf("output/figures/supplement/FigS6C_isoUsage_ModOrdered.pdf", height=6, width=6)
plot_grid(plot_iU,CellType_ColorBar, align="v", ncol=1, axis="lr", rel_heights=c(9,1))
dev.off()

quartz_off_screen 
                2

Rare Var Enrichments

Gene level

geneAnno = rtracklayer::import("ref/gencode.v33lift37.annotation.gtf.gz") %>% as_tibble() %>% filter(type=='gene')
geneAnno$ensg = substr(geneAnno$gene_id,1,15)

fu=openxlsx::read.xlsx(('https://static-content.springer.com/esm/art%3A10.1038%2Fs41588-022-01104-0/MediaObjects/41588_2022_1104_MOESM3_ESM.xlsx'),'Supplementary Table 11')
fu$p_TADA_ASD[fu$p_TADA_ASD==0] = min(fu$p_TADA_ASD[fu$p_TADA_ASD >0],na.rm=T)
fu$p_TADA_NDD[fu$p_TADA_NDD==0] = min(fu$p_TADA_NDD[fu$p_TADA_NDD >0],na.rm=T)

geneAnno.logit = data.frame(ASD_fuTADA= -log10(fu$p_TADA_ASD)[match(geneAnno$ensg, fu$gene_id)])
geneAnno.logit$NDD_fuTADA = -log10(fu$p_TADA_NDD)[match(geneAnno$ensg, fu$gene_id)]
SCZ.schema = read_tsv('ref/risk_genes/SCHEMA_gene_results.tsv')

Warning: One or more parsing issues, see `problems()` for details

Rows: 18321 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr  (8): gene_id, group, OR (PTV), OR (Class I), OR (Class II), OR (PTV) up...
dbl (16): Case PTV, Ctrl PTV, Case mis3, Ctrl mis3, Case mis2, Ctrl mis2, P ...
lgl  (2): De novo mis3, De novo mis2

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

geneAnno.logit$SCZ_schema = -log10(SCZ.schema$`P meta`[match(geneAnno$ensg, SCZ.schema$gene_id)])
BIP.bipex = read_tsv('ref/risk_genes/BipEx_gene_results.tsv') %>% filter(group=="Bipolar Disorder")

Rows: 119958 Columns: 20
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr  (4): gene_id, group, damaging_missense_fisher_gnom_non_psych_OR, ptv_fi...
dbl (16): n_cases, n_controls, damaging_missense_case_count, damaging_missen...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

geneAnno.logit$BIP.bipex = -log10(BIP.bipex$ptv_fisher_gnom_non_psych_pval[match(geneAnno$ensg,BIP.bipex$gene_id)])
EPI.epi25 = read_tsv('ref/risk_genes/Epi25_gene_results.tsv') %>% filter(group=="EPI")

Rows: 71456 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): gene_id, group
dbl (9): xcase_lof, xctrl_lof, pval_lof, xcase_mpc, xctrl_mpc, pval_mpc, xca...
lgl (1): pval_infrIndel

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

geneAnno.logit$EPI.epi25 = -log10(EPI.epi25$pval[match(geneAnno$ensg,EPI.epi25$gene_id)])

networks = data.frame(net="isoExpr", module = net.isoExpr$module.number, transcript_id = names(net.isoExpr$colors))
networks = rbind(networks, data.frame(net='isoUsage', module = net.isoFr$module.number, transcript_id = names(net.isoFr$colors)))
networks$gene_id = datAnno$gene_id[match(networks$transcript_id, datAnno$transcript_id)]
networks = rbind(networks, data.frame(net='geneExpr', module = net.geneExpr$module.number, transcript_id = NA, gene_id=names(net.geneExpr$colors)))
networks$ensg = substr(networks$gene_id,1,15)

df_logit.gene = data.frame()
binaryVec = rep(NA, times=nrow(geneAnno))
names(binaryVec) = geneAnno$ensg

for(this_net in unique(networks$net)) {
  all_mods = unique(na.omit(networks$module[networks$net ==this_net & networks$module!=0]))
  
  for(this_mod in all_mods) {
    
    this_binary_vec = binaryVec
    this_binary_vec[names(this_binary_vec) %in% networks$ensg[networks$net==this_net]] = 0 
    this_binary_vec[names(this_binary_vec) %in% networks$ensg[networks$net==this_net & networks$module==this_mod]] = 1
    
    for(this_rare_var in colnames(geneAnno.logit)) {
      this_glm = summary(glm(this_binary_vec ~ geneAnno.logit[,this_rare_var] + width + log10(width), data=geneAnno, family='binomial'))
      df_logit.gene = rbind(df_logit.gene, data.frame(net = this_net, mod = this_mod, set = this_rare_var, t(this_glm$coefficients[2,])))
      
    }
  }
}

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

df_logit.gene$OR = exp(df_logit.gene$Estimate)
df_logit.gene$P = df_logit.gene$Pr...z..
df_logit.gene$P[df_logit.gene$OR < 1] = 1
df_logit.gene$Q = p.adjust(df_logit.gene$P,'fdr')

df_logit.gene$label = signif((df_logit.gene$OR),3)
df_logit.gene$label[df_logit.gene$Q>.05] = ""
df_logit.gene$mod = factor(df_logit.gene$mod,levels=c(100:1))

df_logit.gene_geneExpr = df_logit.gene %>% filter(net=="geneExpr")
df_logit.gene_geneExpr$mod = factor(df_logit.gene_geneExpr$mod, levels=geneExpr_order.celltype)
df_logit.gene_isoExpr = df_logit.gene %>% filter(net=="isoExpr")
df_logit.gene_isoExpr$mod = factor(df_logit.gene_isoExpr$mod, levels=isoExpr_order.celltype)
df_logit.gene_isoUsage = df_logit.gene %>% filter(net=="isoUsage")
df_logit.gene_isoUsage$mod = factor(df_logit.gene_isoUsage$mod, levels=isoUsage_order.celltype)

# Module aggregation
df_logit.gene_geneExpr = df_logit.gene_geneExpr %>% mutate(mod.group = case_when(mod %in% c(2,21,9,16,15,1) ~ 'Progenitor',
                                                                                 mod %in% c(5,12,6,8,10,4,11) ~ 'Exc Neuron',
                                                                                 mod %in% c(7,19) ~ 'Inh Neuron',
                                                                                 mod %in% c(14,3,13,17,18,20,22,23) ~ 'Other'))
df_logit.gene_isoExpr = df_logit.gene_isoExpr %>% mutate(mod.group = case_when(mod %in% c(2,15,41,4,3,11,21,12,19,23,26) ~ 'Progenitor',
                                                                                 mod %in% c(16,1,30,6,20,14,38,5,8,46,34,49,45,31,47,25,17,42,9,29,35,40,53,27,48) ~ 'Exc Neuron',
                                                                                 mod %in% c(39,13,7) ~ 'Inh Neuron',
                                                                                 mod %in% c(10,18,22,24,28,32,33,36,37,43,44,50,51,52) ~ 'Other'))
df_logit.gene_isoUsage = df_logit.gene_isoUsage %>% mutate(mod.group = case_when(mod %in% c(27,19) ~ 'Progenitor',
                                                                                 mod %in% c(2,8,14,20,15,24,10,29) ~ 'Exc Neuron',
                                                                                 mod %in% c(23) ~ 'Inh Neuron',
                                                                                 mod %in% c(4,5,6,7,9,13,16,17,18,21,22,25,26,28) ~ 'Other',
                                                                                 mod %in% c(3,12,11,1) ~ 'Prog/Neur'))

# Order of Rare Vars
order.rarevar = c("NDD_fuTADA", "ASD_fuTADA", "SCZ_schema", "BIP.bipex", "EPI.epi25")

ModGroup_ColorBar.gE = ggplot(df_logit.gene_geneExpr,aes(x=factor(mod, levels = rev(levels(mod))))) + 
  geom_tile(aes(y=factor(1),fill=mod.group)) + 
  # geom_point(aes(y=factor(1), shape=data.type),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
  theme_bw() +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_text(angle=0,vjust=1, hjust=1, size=5), 
        # legend.key.size=unit(0.3,'cm'), legend.text=element_text(size=3), legend.title=element_text(size=4), 
        plot.margin=unit(c(5,-10,5,5),"pt"), legend.position=c(-3,0.8), 
        # legend.box="vertical", legend.direction="vertical", 
        panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2)) + 
  scale_y_discrete(labels=rev(levels(df_logit.gene_geneExpr$mod))) +
  labs(x='Module', y='') + paletteer::scale_fill_paletteer_d("rcartocolor::Vivid") + guides(fill=guide_legend(reverse=TRUE)) + coord_flip()
legend.gE = get_legend(ModGroup_ColorBar.gE + theme(legend.position=c(2.5,0.5), legend.key.size=unit(0.3,'cm'), legend.text=element_text(size=3), legend.title=element_text(size=4),
                                         legend.box="horizontal", legend.direction="horizontal"))

plot_logit.gE = ggplot(df_logit.gene_geneExpr,aes(y=factor(mod, levels=rev(levels(mod))), x=factor(set, levels=order.rarevar), label=label, fill=-log10(Q))) + 
  # scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) + 
  geom_tile() + 
  facet_wrap(~net, scales='free') + 
  geom_text(size=1) + scale_fill_gradient(low='white',high='red') + 
  theme(axis.text.x = element_text(angle=45,hjust=1, size=4), axis.text.y = element_blank(), axis.title = element_text(size=5),
        axis.ticks.y = element_blank(), plot.margin=unit(c(5,5,5,-10),"pt"),
        legend.key.size=unit(0.25,'cm'), legend.text=element_text(size=5), legend.title=element_text(size=5)) + 
  labs(x='Rare Variant Set', y='')

ModGroup_ColorBar.iE = ggplot(df_logit.gene_isoExpr,aes(x=factor(mod, levels = rev(levels(mod))))) + 
  geom_tile(aes(y=factor(1),fill=mod.group)) + 
  # geom_point(aes(y=factor(1), shape=data.type),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
  theme_bw() +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_text(angle=0,vjust=1, hjust=1, size=4), 
        # legend.key.size=unit(0.3,'cm'), legend.text=element_text(size=6), legend.title=element_text(size=6), 
        plot.margin=unit(c(5,-10,5,5),"pt"), legend.position=c(0.5,-2.5), 
        # legend.box="vertical", legend.direction="vertical", 
        panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2)) + 
  scale_y_discrete(labels=rev(levels(df_logit.gene_geneExpr$mod))) +
  labs(x='Module', y='') + paletteer::scale_fill_paletteer_d("rcartocolor::Vivid") + guides(fill=guide_legend(order=1, nrow=1)) + coord_flip()
legend.iE = get_legend(ModGroup_ColorBar.iE + theme(legend.position=c(2.5,0.5), legend.key.size=unit(0.3,'cm'), legend.text=element_text(size=3), legend.title=element_text(size=4),
                                                    legend.box="horizontal", legend.direction="horizontal"))

plot_logit.iE = ggplot(df_logit.gene_isoExpr,aes(y=factor(mod, levels=rev(levels(mod))), x=factor(set, levels=order.rarevar), label=label, fill=-log10(Q))) + 
  # scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) + 
  geom_tile() + facet_wrap(~net, scales='free') + geom_text(size=1) + scale_fill_gradient(low='white',high='red') + 
  theme(axis.text.x = element_text(angle=45,hjust=1, size=4), axis.text.y = element_blank(), axis.title = element_text(size=5),
        axis.ticks.y = element_blank(), plot.margin=unit(c(5,5,5,-10),"pt"),
        legend.key.size=unit(0.25,'cm'), legend.text=element_text(size=5), legend.title=element_text(size=5)) + 
  labs(x='Rare Variant Set', y='')

ModGroup_ColorBar.iU = ggplot(df_logit.gene_isoUsage,aes(x=factor(mod, levels = rev(levels(mod))))) + 
  geom_tile(aes(y=factor(1),fill=mod.group)) + 
  # geom_point(aes(y=factor(1), shape=data.type),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
  theme_bw() +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_text(angle=0,vjust=1, hjust=1, size=4), 
        # legend.key.size=unit(0.3,'cm'), legend.text=element_text(size=6), legend.title=element_text(size=6), 
        plot.margin=unit(c(5,-10,5,5),"pt"), legend.position=c(0.5,-2.5), 
        # legend.box="vertical", legend.direction="vertical", 
        panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2)) + 
  scale_y_discrete(labels=rev(levels(df_logit.gene_geneExpr$mod))) +
  labs(x='Module', y='') + paletteer::scale_fill_paletteer_d("rcartocolor::Vivid") + guides(fill=guide_legend(order=1, nrow=1)) + coord_flip()
legend.iU = get_legend(ModGroup_ColorBar.iU + theme(legend.position=c(2.5,0.5), legend.key.size=unit(0.3,'cm'), legend.text=element_text(size=3), legend.title=element_text(size=4),
                                                    legend.box="horizontal", legend.direction="horizontal"))

plot_logit.iU = ggplot(df_logit.gene_isoUsage,aes(y=factor(mod, levels=rev(levels(mod))), x=factor(set, levels=order.rarevar), label=label, fill=-log10(Q))) + 
  # scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) + 
  geom_tile() + facet_wrap(~net, scales='free') + geom_text(size=1) + scale_fill_gradient(low='white',high='red') + 
  theme(axis.text.x = element_text(angle=45,hjust=1, size=4), axis.text.y = element_blank(), axis.title = element_text(size=5),
        axis.ticks.y = element_blank(), plot.margin=unit(c(5,5,5,-10),"pt"),
        legend.key.size=unit(0.25,'cm'), legend.text=element_text(size=5), legend.title=element_text(size=5)) + 
  labs(x='Rare Variant Set', y='')

FigS5_C_geneExpr = plot_grid(ModGroup_ColorBar.gE,plot_logit.gE, legend.gE, align="h", nrow=2, axis="bt", rel_widths=c(1,4), rel_heights=c(20,1))
FigS5_C_isoExpr = plot_grid(ModGroup_ColorBar.iE,plot_logit.iE, legend.iE, align="h", nrow=2, axis="bt", rel_widths=c(1,4), rel_heights=c(20,1))
FigS5_C_isoUsage = plot_grid(ModGroup_ColorBar.iU,plot_logit.iU, legend.iU, align="h", nrow=2, axis="bt", rel_widths=c(1,4), rel_heights=c(20,1))

plot_grid(FigS5_C_geneExpr,FigS5_C_isoExpr,FigS5_C_isoUsage, align="h", nrow=1, axis="bt", rel_widths=c(1,1,1))

GWAS Enrichments

df_gwas = data.frame()
gwas = dir("data/LDSC_cell_type_specific/networks/isoExpr_cts_result/", pattern="txt")
for(this_gwas in gwas) {
  this_ldsc = read.table(paste0("data/LDSC_cell_type_specific/networks/isoExpr_cts_result/",this_gwas),header = T)
  colnames(this_ldsc)[1] = 'module'
  this_ldsc$gwas = gsub(".cell_type_results.txt", "", this_gwas)
  this_ldsc$net = 'isoExpr'
  df_gwas = rbind(df_gwas, this_ldsc)
}

gwas = dir("data/LDSC_cell_type_specific/networks/isoUsage_cts_result/", pattern="txt")
for(this_gwas in gwas) {
  this_ldsc = read.table(paste0("data/LDSC_cell_type_specific/networks/isoUsage_cts_result/",this_gwas),header = T)
  colnames(this_ldsc)[1] = 'module'
  this_ldsc$gwas = gsub(".cell_type_results.txt", "", this_gwas)
  this_ldsc$net = 'isoUsage'
  df_gwas = rbind(df_gwas, this_ldsc)
}

gwas = dir("data/LDSC_cell_type_specific/networks/geneExpr_cts_result/", pattern="txt")
for(this_gwas in gwas) {
  this_ldsc = read.table(paste0("data/LDSC_cell_type_specific/networks/geneExpr_cts_result/",this_gwas),header = T)
  colnames(this_ldsc)[1] = 'module'
  this_ldsc$gwas = gsub(".cell_type_results.txt", "", this_gwas)
  this_ldsc$net = 'geneExpr'
  df_gwas = rbind(df_gwas, this_ldsc)
}


df_gwas$fdr = p.adjust(df_gwas$Coefficient_P_value,'fdr')
df_gwas$mod = as.numeric(gsub("M", "", unlist(lapply(strsplit(unlist(lapply(strsplit(df_gwas$module, "[.]"),'[',2)), '_'),'[',1))))
df_gwas$Zscore = df_gwas$Coefficient/df_gwas$Coefficient_std_error

RBP Enrichments

networks = list("net.isoExpr" = net.isoExpr$colors[datAnno$transcript_id], 
                "net.isoFr" = net.isoFr$colors[datAnno$transcript_id],
                "net.geneExpr" = net.geneExpr$colors[datAnno$gene_id])

df_fisher_rbp = data.frame()

for(this_net in names(networks)) {
  all_mods = unique(na.omit(networks[[this_net]][networks[[this_net]] != 'grey']))
  
  for(this_mod in all_mods) {
    
    if(this_net == "net.geneExpr") {
      modGenes = substr(unique(na.omit(names(networks[[this_net]][networks[[this_net]] == this_mod]))), 1,15)
      modGeneg.bg = substr(unique(na.omit(names(networks[[this_net]]))),1,15)
    } else {
      modTx = unique(na.omit(names(networks[[this_net]][networks[[this_net]] == this_mod])))
      modGenes = unique(datAnno$ensg[match(modTx, datAnno$transcript_id)])
      modGeneg.bg = unique(datAnno$ensg[match(names(networks[[this_net]]), datAnno$transcript_id)])
    } 
    for(this_dataset in unique(rbp_targets$dataset.id)) {
      this_rbp = rbp_targets %>% filter(dataset.id == this_dataset) %>% mutate(target = paste0(RBP, "_", data.type, "_", cell.type)) %>% dplyr::select(target) %>% unique()  %>% pull()
      target_genes = rbp_targets %>% filter(dataset.id == this_dataset) %>% dplyr::select(ENSG) %>% pull()
      
      if(grepl("Human",this_rbp)) {
        this_or = ORA(modGenes, target_genes, unique(datAnno$ensg), unique(datAnno$ensg))
      } else {
        this_or = ORA(modGenes, target_genes, unique(datAnno$ensg), human_mouse_bg)
      }
      df_fisher_rbp = rbind(df_fisher_rbp, data.frame(net = this_net, mod = this_mod, dataset = this_dataset, target = this_rbp, t(this_or)))
    }
  }}

# Order of curated RBPs
order.brainRBPs = read_excel("data/RBP_Data/curatedRBPs_order.xlsx") %>% as_tibble()
# Order Encode by Target Regions
order.Encode.TarReg = read_excel("data/RBP_Data/ENCODE_vanNostrand_NatMeth2016_Fig2a_order.xlsx", sheet=2)
order.Encode.TarReg = order.Encode.TarReg %>% filter(name %in% df_fisher_rbp$target)
order.RBPs = rbind(order.brainRBPs, order.Encode.TarReg)

df_fisher_rbp$OR = as.numeric(df_fisher_rbp$OR)
df_fisher_rbp$Fisher.p[df_fisher_rbp$OR<1] = 1
df_fisher_rbp$Fisher.p = p.adjust(as.numeric(df_fisher_rbp$Fisher.p),'fdr')
df_fisher_rbp$label = signif(df_fisher_rbp$OR,2)
df_fisher_rbp$label[df_fisher_rbp$Fisher.p>.001] = ''
df_fisher_rbp$target.region = order.RBPs$target.region[match(df_fisher_rbp$target, order.RBPs$name)]
df_fisher_rbp$data.type = rbp_targets$data.type[match(df_fisher_rbp$dataset, rbp_targets$dataset.id)]
df_fisher_rbp$cell.type = rbp_targets$cell.type[match(df_fisher_rbp$dataset, rbp_targets$dataset.id)]
df_fisher_rbp<-df_fisher_rbp %>% left_join(data.frame(mod.num = 1:100, mod=labels2colors(1:100)))

Joining, by = "mod"

df_fisher_rbp$target = factor(df_fisher_rbp$target, levels=order.RBPs$name)

TableS4D

TableS4D <- df_fisher_rbp %>% mutate(module.color = mod, mod = paste0(net,"M.",mod.num,"_",mod))
write_tsv(TableS4D, file="output/tables/TableS4D_fisherRBP.tsv")

FigS6B

# Order of modules
df_fisher_rbp.geneExpr = df_fisher_rbp  %>% filter(net=="net.geneExpr")
df_fisher_rbp.geneExpr$mod.num = factor(df_fisher_rbp.geneExpr$mod.num, levels=geneExpr_order.celltype)
df_fisher_rbp.isoExpr = df_fisher_rbp  %>% filter(net=="net.isoExpr")
df_fisher_rbp.isoExpr$mod.num = factor(df_fisher_rbp.isoExpr$mod.num, levels=isoExpr_order.celltype)
df_fisher_rbp.isoUsage = df_fisher_rbp  %>% filter(net=="net.isoFr")
df_fisher_rbp.isoUsage$mod.num = factor(df_fisher_rbp.isoUsage$mod.num, levels=isoUsage_order.celltype)

# Color bar for RBP TargetRegions
RBP_ColorBar = ggplot(df_fisher_rbp %>% filter(net=="net.isoFr",mod.num==1),aes(x=target, label=target.region)) + 
  # geom_tile(aes(y=factor(1),fill=data.type)) + 
  geom_tile(aes(y=factor(1),fill=target.region)) + 
  # geom_point(aes(y=factor(1), shape=target.region),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
  geom_point(aes(y=factor(1), shape=data.type),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
  # scale_x_discrete(labels=df_fisher$RBP) +
  scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) +
  theme_bw() +
  theme(axis.text.y = element_blank(), 
        axis.ticks.y=element_blank(),
        axis.text.x = element_text(angle=45,vjust=1, hjust=1, size=5), 
        legend.key.size=unit(0.3,'cm'), legend.text=element_text(size=6), legend.title=element_text(size=6), 
        plot.margin=unit(c(-21,5,5,5),"pt"), legend.position=c(0.5,-2.5), legend.box="horizontal", legend.direction="horizontal", 
        panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2)) + 
  labs(x='', y='') + paletteer::scale_fill_paletteer_d("rcartocolor::Vivid") + guides(fill=guide_legend(order=1, nrow=1))

plot_geneExpr = ggplot(df_fisher_rbp.geneExpr,aes(y=factor(mod.num, levels=rev(levels(mod.num))), x=target, label=label, fill=-log10(Fisher.p))) + 
  scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) + 
  geom_tile() + 
  # facet_wrap(~net, scales='free') + 
  geom_text(size=1) + scale_fill_gradient(low='white',high='red', limits=c(0,200)) + 
  theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + labs(x='', y='')
col1_gE = plot_grid(plot_geneExpr,RBP_ColorBar, align="v", ncol=1, axis="lr", rel_heights=c(9,1))
col2_gE = plot_grid(ModGroup_ColorBar.gE + theme(plot.margin=unit(c(5,-20,5,5),"pt")),NULL, ncol=1, rel_heights=c(9,1))

plot_grid(col2_gE, col1_gE, ncol=2, rel_widths=c(1,20))

pdf("output/figures/supplement/FigS6B_geneExpr_ModOrdered.pdf", height=6, width=20)
plot_grid(col2_gE, col1_gE, ncol=2, rel_widths=c(1,20))
dev.off()

quartz_off_screen 
                2

plot_isoExpr = ggplot(df_fisher_rbp.isoExpr,aes(y=factor(mod.num, levels=rev(levels(mod.num))), x=target, label=label, fill=-log10(Fisher.p))) + 
  scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) + 
  geom_tile() + 
  # facet_wrap(~net, scales='free') + 
  geom_text(size=1) + scale_fill_gradient(low='white',high='red', limits=c(0,200)) + 
  theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + labs(x='', y='')
col1_iE = plot_grid(plot_isoExpr,RBP_ColorBar, align="v", ncol=1, axis="lr", rel_heights=c(9,1))
col2_iE = plot_grid(ModGroup_ColorBar.iE + theme(plot.margin=unit(c(5,-20,5,5),"pt")),NULL, ncol=1, rel_heights=c(9,1))
plot_grid(col2_iE, col1_iE, ncol=2, rel_widths=c(1,20))

pdf("output/figures/supplement/FigS6B_isoExpr_ModOrdered.pdf", height=6, width=20)
plot_grid(col2_iE, col1_iE, ncol=2, rel_widths=c(1,20))
dev.off()

quartz_off_screen 
                2

plot_isoUsage = ggplot(df_fisher_rbp.isoUsage,aes(y=factor(mod.num, levels=rev(levels(mod.num))), x=target, label=label, fill=-log10(Fisher.p))) + 
  scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) + 
  geom_tile() + 
  # facet_wrap(~net, scales='free') + 
  geom_text(size=1) + scale_fill_gradient(low='white',high='red', limits=c(0,200)) + 
  theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + labs(x='', y='')
col1_iU = plot_grid(plot_isoUsage,RBP_ColorBar, align="v", ncol=1, axis="lr", rel_heights=c(9,1))
col2_iU = plot_grid(ModGroup_ColorBar.iU + theme(plot.margin=unit(c(5,-20,5,5),"pt")),NULL, ncol=1, rel_heights=c(9,1))
plot_grid(col2_iU, col1_iU, ncol=2, rel_widths=c(1,20))

pdf("output/figures/supplement/FigS6B_isoUsage_ModOrdered.pdf", height=6, width=20)
plot_grid(col2_iU, col1_iU, ncol=2, rel_widths=c(1,20))
dev.off()

quartz_off_screen 
                2

Fig4B

Fig4B_density.rbp = ggplot(df_fisher_rbp %>% filter(Fisher.p <= .001), aes(x=-log10(Fisher.p), fill=net)) + 
  geom_density(alpha=0.25, size=0.2, position="identity", stat='bin', binwidth=15) +
  scale_y_log10() +
  theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.text = element_text(size=5), axis.title = element_text(size=8),
                     legend.position="top", legend.key.size = unit(0.25,'cm'), legend.title=element_text(size=5)) +
  labs(y='RBP enrichments', x='-log10(pValue)')

Fig4B_density.rbp

Warning: Transformation introduced infinite values in continuous y-axis

pdf("output/figures/Fig4/Fig4B_density_RBPEnrich.pdf", height=2, width=3)
Fig4B_density.rbp

Warning: Transformation introduced infinite values in continuous y-axis

dev.off()

quartz_off_screen 
                2

Fig4B_density.celltype = ggplot(df_fisher.celltype %>% filter(Fisher.q <= .001), aes(x=-log10(Fisher.q), fill=net)) + 
  geom_density(alpha=0.25, size=0.2, position="identity", stat='bin', binwidth=15) +
  scale_y_log10() +
  theme_bw() + theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(), axis.text = element_text(size=5), axis.title = element_text(size=8),
                     legend.position="top", legend.key.size = unit(0.25,'cm'), legend.title=element_text(size=5)) +
  labs(y='Cell Type enrichments', x='-log10(pValue)')  
Fig4B_density.celltype

Warning: Transformation introduced infinite values in continuous y-axis

pdf("output/figures/Fig4/Fig4B_density_CellTypeEnrich.pdf", height=2, width=3)
Fig4B_density.celltype

Warning: Transformation introduced infinite values in continuous y-axis

dev.off()

quartz_off_screen 
                2

Annotate Individual Modules

Fig4C,D and FigS7

IsoUsage Network

this_datExpr = datExpr.isoFr
this_net = net.isoFr
this_net$module = paste0("M", this_net$module.number, ".", labels2colors(this_net$module.number))
fileBaseNet="datExpr.localIF_batchCorrected_103k_byTargetRegion_Reordered_RBPModColor"

#Generate MEs and kME table for Network
MEs = moduleEigengenes(t(this_datExpr), colors=this_net$module.number)
kMEtable = signedKME(t(this_datExpr), datME =MEs$eigengenes,corFnc = "bicor")

this_anno = datAnno[match(rownames(this_datExpr), datAnno$transcript_id),]
idx = grep("^TALON", this_anno$transcript_name)
this_anno$transcript_name[idx] = paste0(this_anno$gene_name[idx], '_', this_anno$transcript_name[idx])

cell_anno = as.data.frame(matrix('grey', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
  marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
  cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}

tidyMods = tibble(transcript_id = rownames(kMEtable), module=this_net$module, module.color = this_net$colors, module.number=this_net$module.number, kMEtable)
tidyMods <- tidyMods %>% pivot_longer(-c(transcript_id,module,module.color,module.number), names_to = "kME_to_module", values_to = 'kME') %>% mutate(kMEtoMod = gsub("kME", "", kME_to_module)) %>% filter(module!=0, module != "grey", kMEtoMod !='grey', kMEtoMod != 0) %>% dplyr::select(-kME_to_module)
tidyMods <- tidyMods %>% left_join(this_anno)

Joining, by = "transcript_id"

tidyMods %>% group_by(module) %>% filter(kMEtoMod == module.number) %>% slice_max(kME,n=10)


pdf(file=paste0("output/figures/Fig4/", fileBaseNet, ".pdf"),width=15,height=8)
plotDendroAndColors(this_net$tree, colors=cbind((this_net$colors),cell_anno), dendroLabels = F)

ggplot(df_fisher.celltype %>% filter(net=="isoUsage"), aes(x=factor(cell, levels=order.celltype),y=factor(mod,levels=c(100:1)), label=label, fill=-log10(Fisher.p))) + geom_tile() + scale_fill_gradient(low='white', high='red') + geom_text(size=3) + theme(axis.text.x = element_text(angle=-45,hjust=0)) + labs(x="",y="")

for(i in 1:(ncol(MEs$eigengenes))) {
# for(i in 2:2) {
  print(i)
  this_mod = gsub("ME", "", colnames(MEs$eigengenes)[i])
  mod_color = tidyMods %>% filter(module.number==this_mod,kMEtoMod==this_mod) %>% dplyr::select(module.color) %>% unique() %>% as.character()
  mod_genes = tidyMods %>% filter(module.number==this_mod,kMEtoMod==this_mod) %>% arrange(-kME) %>% dplyr::select(ensg) %>% pull()
  if(!this_mod %in% c("grey", 0, "M0.grey")) {
    this_plot = data.frame(datMeta, eig=MEs$eigengenes[,i])
    g1 = ggplot(this_plot, aes(x=Region, y=eig, color=Subject)) + geom_point() + ggtitle(paste0("Module ",this_mod, ": ", labels2colors(as.numeric(this_mod)), " n=", length(mod_genes)))
    g2 = tidyMods %>% filter(module.number==this_mod, kMEtoMod==this_mod) %>% slice_max(kME,n=25) %>% ggplot(aes(x=reorder(transcript_name, kME), y=kME, fill=novelty)) + geom_bar(stat='identity') + coord_flip() + labs(x="") + scale_fill_manual(values = colorVector)
    

    g3=ggplot(df_fisher.celltype %>% filter(net=="isoUsage",mod==this_mod),
              # aes(x=reorder(cell, -Fisher.p), 
              aes(x=factor(cell, levels=order.celltype), 
                  y=-log10(Fisher.p), fill=OR)) + 
      geom_bar(stat='identity') + 
      coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')
    
    # Add pathway enrichments
    path = gprofiler2::gost(query=mod_genes,ordered_query = T,correction_method = 'fdr',
                            custom_bg = unique(datAnno$ensg), sources = c("GO","KEGG", "REACTOME"))
    if(!is.null(path)) {
      df_path = as_tibble(path$result)
      df_path <- df_path %>% filter(term_size > 5, term_size < 1000)
      g4 <- df_path %>% group_by(source) %>% slice_min(p_value,n=5,with_ties = F) %>% ungroup() %>%
        ggplot(aes(x=reorder(term_name, -p_value), y=-log10(p_value), fill=source)) + geom_bar(stat='identity') + theme_bw() + 
        labs(x="")  + scale_x_discrete(labels = function(x) str_wrap(x, width=30)) +
        facet_grid(source~., space = 'free', scales='free') + theme(legend.position = 'none') + theme(axis.text.y = element_text(size=8)) +
        coord_flip()
    } else {
      print("No pathway enrichment for this module")
      g4=NULL
    }
    
    g5=ggplot(df_fisher_rbp %>% filter(net=="net.isoFr",mod.num==this_mod),
              aes(x=target, y=-log10(Fisher.p), fill=OR)) + 
      geom_bar(stat='identity',position = position_dodge2()) + 
      labs(x="", y="Enrichment\n(-log10 q-value)") + geom_hline(yintercept = -log10(.05),lty=2,col='red')+ ggtitle("RBP Enrichment") + 
      scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) + 
      scale_fill_gradient(low="grey60", high=mod_color) + 
      theme(axis.text.y = element_text(size=5), axis.text.x = element_blank(), axis.ticks.x=element_blank(), plot.margin=unit(c(5,5,0,5),"pt"))
    # + coord_flip()
    Bar = ggplot(df_fisher_rbp %>% filter(net=="net.isoFr",mod.num==this_mod),aes(x=target, label=target.region)) + 
      # geom_tile(aes(y=factor(1),fill=data.type)) + 
      geom_tile(aes(y=factor(1),fill=target.region)) + 
      # geom_point(aes(y=factor(1), shape=target.region),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
      geom_point(aes(y=factor(1), shape=data.type),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
      # scale_x_discrete(labels=df_fisher$RBP) +
      scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) +
      theme_bw() +
      theme(axis.text.y = element_blank(), 
            axis.ticks.y=element_blank(),
            axis.text.x = element_text(angle=45,vjust=1, hjust=1, size=5), 
            legend.key.size=unit(0.25,'cm'), legend.text=element_text(size=5), legend.title=element_text(size=5), 
            plot.margin=unit(c(-20,5,5,5),"pt"), legend.position=c(0.5,-4), legend.box="horizontal", legend.direction="horizontal", 
            panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2)) + 
      labs(x='', y='') + paletteer::scale_fill_paletteer_d("rcartocolor::Vivid") + guides(fill=guide_legend(order=1, nrow=1))
    g5.Bar = plot_grid(g5,Bar, align="v", ncol=1, axis="lr", rel_heights=c(4,1))
    
    # Rare Var
    g6=ggplot(df_logit.gene %>% filter(net=="isoUsage",mod==this_mod),aes(x=reorder(set, -Q), y=-log10(Q), fill=OR)) + geom_bar(stat='identity',position = position_dodge2()) + 
      coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')+ ggtitle("Rare Var Enrichment")
    
    # GWAS
    g7=ggplot(df_gwas %>% filter(net=="isoUsage",mod==this_mod),aes(x=reorder(gwas, -fdr), y=-log10(fdr), fill=Zscore)) + geom_bar(stat='identity',position = position_dodge2()) + scale_fill_gradient2()+
      coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red') + ggtitle("GWAS Enrichment")
    
    
    gridExtra::grid.arrange(grobs=list(g1,g2,g3,g4,g5.Bar,g6,g7),layout_matrix=rbind(c(1,1,1,3,3,4,4,7,7,7),
                                                                                 c(2,2,2,3,3,4,4,7,7,7), 
                                                                                 c(2,2,2,3,3,4,4,7,7,7), 
                                                                                 c(2,2,2,3,3,4,4,7,7,7),
                                                                                 c(6,6,6,5,5,5,5,5,5,5),
                                                                                 c(6,6,6,5,5,5,5,5,5,5)))
  }
}

Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'

dev.off()

# A tibble: 290 × 12
# Groups:   module [29]
   transcr…¹ module modul…² modul…³   kME kMEto…⁴ gene_id trans…⁵ length gene_…⁶
   <chr>     <chr>  <chr>     <dbl> <dbl> <chr>   <chr>   <chr>    <dbl> <chr>  
 1 ENST0000… M1.tu… turquo…       1 0.996 1       ENSG00… PKM-202   2305 PKM    
 2 ENST0000… M1.tu… turquo…       1 0.992 1       ENSG00… CTNNA1…   3754 CTNNA1 
 3 ENST0000… M1.tu… turquo…       1 0.992 1       ENSG00… SMARCE…   5150 SMARCE1
 4 ENST0000… M1.tu… turquo…       1 0.992 1       ENSG00… USP5-2…   3083 USP5   
 5 ENST0000… M1.tu… turquo…       1 0.990 1       ENSG00… TRIM9-…   5284 TRIM9  
 6 ENST0000… M1.tu… turquo…       1 0.989 1       ENSG00… TWF1-2…   3045 TWF1   
 7 ENST0000… M1.tu… turquo…       1 0.988 1       ENSG00… SSR1-2…   9661 SSR1   
 8 ENST0000… M1.tu… turquo…       1 0.987 1       ENSG00… RPN2-2…   2227 RPN2   
 9 ENST0000… M1.tu… turquo…       1 0.987 1       ENSG00… DPF2-2…   3714 DPF2   
10 ENST0000… M1.tu… turquo…       1 0.986 1       ENSG00… DYNC1I…   2572 DYNC1I2
# … with 280 more rows, 2 more variables: ensg <chr>, novelty <chr>, and
#   abbreviated variable names ¹transcript_id, ²module.color, ³module.number,
#   ⁴kMEtoMod, ⁵transcript_name, ⁶gene_name
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
quartz_off_screen 
                2

IsoExpr Network

this_datExpr = datExpr.isoExpr
this_net = net.isoExpr
this_net$module = paste0("M", this_net$module.number, ".", labels2colors(this_net$module.number))
fileBaseNet="datExpr.isoExpr_batchCorrected_92k_byTargetRegion_Reordered_RBPModColor"

#Generate MEs and kME table for Network
MEs = moduleEigengenes(t(this_datExpr), colors=this_net$module.number)
kMEtable = signedKME(t(this_datExpr), datME =MEs$eigengenes,corFnc = "bicor")

this_anno = datAnno[match(rownames(this_datExpr), datAnno$transcript_id),]
idx = grep("^TALON", this_anno$transcript_name)
this_anno$transcript_name[idx] = paste0(this_anno$gene_name[idx], '_', this_anno$transcript_name[idx])

cell_anno = as.data.frame(matrix('grey', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
  marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
  cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}

tidyMods = tibble(transcript_id = rownames(kMEtable), module=this_net$module, module.color = this_net$colors, module.number=this_net$module.number, kMEtable)
tidyMods <- tidyMods %>% pivot_longer(-c(transcript_id,module,module.color,module.number), names_to = "kME_to_module", values_to = 'kME') %>% mutate(kMEtoMod = gsub("kME", "", kME_to_module)) %>% filter(module!=0, module != "grey", kMEtoMod !='grey', kMEtoMod != 0) %>% dplyr::select(-kME_to_module)
tidyMods <- tidyMods %>% left_join(this_anno)

Joining, by = "transcript_id"

tidyMods %>% group_by(module) %>% filter(kMEtoMod == module.number) %>% slice_max(kME,n=10)

pdf(file=paste0("output/figures/Fig4/", fileBaseNet, ".pdf"),width=15,height=8)
plotDendroAndColors(this_net$tree, colors=cbind((this_net$colors),cell_anno), dendroLabels = F)

ggplot(df_fisher.celltype %>% filter(net=="isoExpr"), aes(x=factor(cell, levels=order.celltype),y=factor(mod,levels=c(100:1)), label=label, fill=-log10(Fisher.p))) + geom_tile() + scale_fill_gradient(low='white', high='red') + geom_text(size=3) + theme(axis.text.x = element_text(angle=-45,hjust=0)) + labs(x="",y="")

for(i in 1:(ncol(MEs$eigengenes))) {
  # for(i in 2:2) {
  print(i)
  this_mod = gsub("ME", "", colnames(MEs$eigengenes)[i])
  mod_color = tidyMods %>% filter(module.number==this_mod,kMEtoMod==this_mod) %>% dplyr::select(module.color) %>% unique() %>% as.character()
  mod_genes = tidyMods %>% filter(module.number==this_mod,kMEtoMod==this_mod) %>% arrange(-kME) %>% dplyr::select(ensg) %>% pull()
  if(!this_mod %in% c("grey", 0, "M0.grey")) {
    this_plot = data.frame(datMeta, eig=MEs$eigengenes[,i])
    g1 = ggplot(this_plot, aes(x=Region, y=eig, color=Subject)) + geom_point() + ggtitle(paste0("Module ",this_mod, ": ", labels2colors(as.numeric(this_mod)), " n=", length(mod_genes)))
    g2 = tidyMods %>% filter(module.number==this_mod, kMEtoMod==this_mod) %>% slice_max(kME,n=25) %>% ggplot(aes(x=reorder(transcript_name, kME), y=kME, fill=novelty)) + geom_bar(stat='identity') + coord_flip() + labs(x="") + scale_fill_manual(values = colorVector)
    
    
    g3=ggplot(df_fisher.celltype %>% filter(net=="isoExpr",mod==this_mod),
              # aes(x=reorder(cell, -Fisher.p), 
              aes(x=factor(cell, levels=order.celltype), 
                  y=-log10(Fisher.p), fill=OR)) + 
      geom_bar(stat='identity') + 
      coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')
    
    # Add pathway enrichments
    path = gprofiler2::gost(query=mod_genes,ordered_query = T,correction_method = 'fdr',
                            custom_bg = unique(datAnno$ensg), sources = c("GO","KEGG", "REACTOME"))
    if(!is.null(path)) {
      df_path = as_tibble(path$result)
      df_path <- df_path %>% filter(term_size > 5, term_size < 1000)
      g4 <- df_path %>% group_by(source) %>% slice_min(p_value,n=5,with_ties = F) %>% ungroup() %>%
        ggplot(aes(x=reorder(term_name, -p_value), y=-log10(p_value), fill=source)) + geom_bar(stat='identity') + theme_bw() + 
        labs(x="")  + scale_x_discrete(labels = function(x) str_wrap(x, width=30)) +
        facet_grid(source~., space = 'free', scales='free') + theme(legend.position = 'none') + theme(axis.text.y = element_text(size=8)) +
        coord_flip()
    } else {
      print("No pathway enrichment for this module")
      g4=NULL
    }
    
    # RBP
    g5=ggplot(df_fisher_rbp %>% filter(net=="net.isoExpr",mod.num==this_mod),
              aes(x=target, y=-log10(Fisher.p), fill=OR)) + 
      geom_bar(stat='identity',position = position_dodge2()) + 
      labs(x="", y="Enrichment\n(-log10 q-value)") + geom_hline(yintercept = -log10(.05),lty=2,col='red')+ ggtitle("RBP Enrichment") + 
      scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) +
      scale_fill_gradient(low="grey60", high=mod_color) + 
      theme(axis.text.y = element_text(size=5), axis.text.x = element_blank(), axis.ticks.x=element_blank(), plot.margin=unit(c(5,5,0,5),"pt"))
    # + coord_flip()
    Bar = ggplot(df_fisher_rbp %>% filter(net=="net.isoExpr",mod.num==this_mod),aes(x=target, label=target.region)) + 
      # geom_tile(aes(y=factor(1),fill=data.type)) + 
      geom_tile(aes(y=factor(1),fill=target.region)) + 
      # geom_point(aes(y=factor(1), shape=target.region),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
      geom_point(aes(y=factor(1), shape=data.type),position=position_dodge2(width=1),size=0.5) + scale_shape_manual(values = c(1:9)) + 
      # scale_x_discrete(labels=df_fisher$RBP) +
      scale_x_discrete(labels=sapply(strsplit(levels(df_fisher_rbp$target), "_"), "[[",1)) +
      theme_bw() +
      theme(axis.text.y = element_blank(), 
            axis.ticks.y=element_blank(),
            axis.text.x = element_text(angle=45,vjust=1, hjust=1, size=5), 
            legend.key.size=unit(0.25,'cm'), legend.text=element_text(size=5), legend.title=element_text(size=5), 
            plot.margin=unit(c(-20,5,5,5),"pt"), legend.position=c(0.5,-4), legend.box="horizontal", legend.direction="horizontal", 
            panel.grid.major=element_blank(), panel.grid.minor=element_blank(), panel.border=element_blank(), axis.line=element_line(color="black", size=0.2)) + 
      labs(x='', y='') + paletteer::scale_fill_paletteer_d("rcartocolor::Vivid") + guides(fill=guide_legend(order=1, nrow=1))
    g5.Bar = plot_grid(g5,Bar, align="v", ncol=1, axis="lr", rel_heights=c(4,1))
    
    # Rare Var
    g6=ggplot(df_logit.gene %>% filter(net=="isoExpr",mod==this_mod),aes(x=reorder(set, -Q), y=-log10(Q), fill=OR)) + geom_bar(stat='identity',position = position_dodge2()) + 
      coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')+ ggtitle("Rare Var Enrichment")
    
    # GWAS
    g7=ggplot(df_gwas %>% filter(net=="isoExpr",mod==this_mod),aes(x=reorder(gwas, -fdr), y=-log10(fdr), fill=Zscore)) + geom_bar(stat='identity',position = position_dodge2()) + scale_fill_gradient2()+
      coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red') + ggtitle("GWAS Enrichment")
    
    
    gridExtra::grid.arrange(grobs=list(g1,g2,g3,g4,g5.Bar,g6,g7),layout_matrix=rbind(c(1,1,1,3,3,4,4,7,7,7),
                                                                                     c(2,2,2,3,3,4,4,7,7,7), 
                                                                                     c(2,2,2,3,3,4,4,7,7,7), 
                                                                                     c(2,2,2,3,3,4,4,7,7,7),
                                                                                     c(6,6,6,5,5,5,5,5,5,5),
                                                                                     c(6,6,6,5,5,5,5,5,5,5)))
  }
}

Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'
Detected custom background input, domain scope is set to 'custom'

dev.off()

# A tibble: 530 × 12
# Groups:   module [53]
   transcr…¹ module modul…² modul…³   kME kMEto…⁴ gene_id trans…⁵ length gene_…⁶
   <chr>     <chr>  <chr>     <dbl> <dbl> <chr>   <chr>   <chr>    <dbl> <chr>  
 1 TALONT00… M1.tu… turquo…       1 0.995 1       ENSG00… ABLIM3…   4338 ABLIM3 
 2 ENST0000… M1.tu… turquo…       1 0.994 1       ENSG00… DYNC1I…   2947 DYNC1I1
 3 ENST0000… M1.tu… turquo…       1 0.992 1       ENSG00… NCDN-2…   3677 NCDN   
 4 ENST0000… M1.tu… turquo…       1 0.992 1       ENSG00… HS3ST5…   3901 HS3ST5 
 5 ENST0000… M1.tu… turquo…       1 0.991 1       ENSG00… SULT4A…   2478 SULT4A1
 6 ENST0000… M1.tu… turquo…       1 0.991 1       ENSG00… JPH4-2…   4265 JPH4   
 7 ENST0000… M1.tu… turquo…       1 0.990 1       ENSG00… SRD5A1…   7035 SRD5A1 
 8 TALONT00… M1.tu… turquo…       1 0.990 1       ENSG00… CDH13_…   3659 CDH13  
 9 ENST0000… M1.tu… turquo…       1 0.990 1       ENSG00… SATB2-…   1663 SATB2-…
10 ENST0000… M1.tu… turquo…       1 0.990 1       ENSG00… SOSTDC…   1758 SOSTDC1
# … with 520 more rows, 2 more variables: ensg <chr>, novelty <chr>, and
#   abbreviated variable names ¹transcript_id, ²module.color, ³module.number,
#   ⁴kMEtoMod, ⁵transcript_name, ⁶gene_name
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 44
[1] 45
[1] 46
[1] 47
[1] 48
[1] 49
[1] 50
[1] 51
[1] 52
[1] 53
[1] 54
quartz_off_screen 
                2

Hub Transcript Models

suppressPackageStartupMessages({
  library(GenomicFeatures)
  library(ggtranscript)
})

# Isoseq Annotations
isoseq="data/sqanti/cp_vz_0.75_min_7_recovery_talon_corrected.gtf.cds.gff.gz"
isoseq_txdb=makeTxDbFromGFF(isoseq, format="gtf")

Import genomic features from the file as a GRanges object ... OK
Prepare the 'metadata' data frame ... OK
Make the TxDb object ...

Warning in .get_cds_IDX(mcols0$type, mcols0$phase): some CDS phases are missing
or not between 0 and 2

OK

isoseq_transcript=exonsBy(isoseq_txdb,by="tx",use.names=T)
gr.isoseq = rtracklayer::import(isoseq) %>% as_tibble()

cts = read_table("data/cp_vz_0.75_min_7_recovery_talon_abundance_filtered.tsv.gz")


── Column specification ────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  annot_gene_id = col_character(),
  annot_transcript_id = col_character(),
  annot_gene_name = col_character(),
  annot_transcript_name = col_character(),
  gene_novelty = col_character(),
  transcript_novelty = col_character(),
  ISM_subtype = col_character()
)
ℹ Use `spec()` for the full column specifications.

cts$novelty2 = as.character(cts$transcript_novelty)
cts$novelty2[which(cts$novelty2=="ISM" & cts$ISM_subtype=="Prefix")] = "ISM_Prefix"
cts$novelty2[which(cts$novelty2=="ISM" & cts$ISM_subtype=="Suffix")] = "ISM_Suffix"
cts$novelty2[cts$novelty2 %in% c("Antisense", "Genomic", "Intergenic", "ISM")] = "Other"
cts$novelty2 = factor(cts$novelty2,levels=c("Known", "ISM_Prefix", "ISM_Suffix", "NIC", "NNC", "Other"))
cts$counts = rowSums(cts[,c(12:35)])
cts$cpm = cts$counts / (sum(cts$counts)/1000000)

gr.isoseq.old = gr.isoseq

gr.isoseq <- gr.isoseq.old %>% left_join(cts, by=c("transcript_id" = "annot_transcript_id"))

isoseq.gene.names = rtracklayer::import("data/cp_vz_0.75_min_7_recovery_talon.gtf.gz") %>%
  as_tibble() %>%
  dplyr::filter(type == "gene") %>%
  dplyr::select(gene_id, gene_name)

gr.isoseq = gr.isoseq %>% left_join(isoseq.gene.names)

Joining, by = "gene_id"

SMARCE1

this_gene="SMARCE1"

these_exons <- gr.isoseq %>% dplyr::filter(annot_gene_name == this_gene & type == "exon" & (counts > 100 | novelty2=="Known"))
this_cds <- gr.isoseq %>% dplyr::filter(annot_gene_name == this_gene & type == "CDS" & (counts > 100 | novelty2=="Known"))

g1<-these_exons %>% 
  ggplot(aes(
    xstart = start,
    xend = end,
    y = reorder(transcript_id, counts)
  )) +
  geom_range(
    aes(fill = log2(1+cpm), group=novelty2), height=.25) + 
  geom_range(data=this_cds, aes(fill = log2(1+cpm), group=novelty2)) + 
  geom_intron(
    data = to_intron(these_exons, "transcript_id"),
    aes(strand = strand),arrow.min.intron.length = 500,
    arrow = grid::arrow(ends = "last", length = grid::unit(0.1, "inches")),
    color='grey60',
  ) + facet_grid(novelty2~.,scale='free',space='free') + theme_bw() + labs(y="") + ggtitle(this_gene,subtitle = unique(these_exons$seqnames)) + theme(plot.title = element_text(hjust=.5), plot.subtitle = element_text(hjust=.5), axis.text.y = element_text(size=12))
g1

pdf("output/figures/Fig4/Fig4C_subplot_ggtranscript_SMARCE1.pdf", width=10, height=20)
print(g1)
dev.off()

quartz_off_screen 
                2

ELAVL1

this_gene="ELAVL1"

these_exons <- gr.isoseq %>% dplyr::filter(annot_gene_name == this_gene & type == "exon" & (counts > 100 | novelty2=="Known"))
this_cds <- gr.isoseq %>% dplyr::filter(annot_gene_name == this_gene & type == "CDS" & (counts > 100 | novelty2=="Known"))

g1<-these_exons %>% 
  ggplot(aes(
    xstart = start,
    xend = end,
    y = reorder(transcript_id, counts)
  )) +
  geom_range(
    aes(fill = log2(1+cpm), group=novelty2), height=.25) + 
  geom_range(data=this_cds, aes(fill = log2(1+cpm), group=novelty2)) + 
  geom_intron(
    data = to_intron(these_exons, "transcript_id"),
    aes(strand = strand),arrow.min.intron.length = 500,
    arrow = grid::arrow(ends = "last", length = grid::unit(0.1, "inches")),
    color='grey60',
  ) + facet_grid(novelty2~.,scale='free',space='free') + theme_bw() + labs(y="") + ggtitle(this_gene,subtitle = unique(these_exons$seqnames)) + theme(plot.title = element_text(hjust=.5), plot.subtitle = element_text(hjust=.5), axis.text.y = element_text(size=12))
g1

pdf("output/figures/Fig4/Fig4D_subplot_ggtranscript_ELAVL1.pdf", width=10, height=4)
print(g1)
dev.off()

quartz_off_screen 
                2

DDX3X

this_gene="DDX3X"

these_exons <- gr.isoseq %>% dplyr::filter(annot_gene_name == this_gene & type == "exon" & (counts > 100 | novelty2=="Known"))
this_cds <- gr.isoseq %>% dplyr::filter(annot_gene_name == this_gene & type == "CDS" & (counts > 100 | novelty2=="Known"))

g1<-these_exons %>% 
  ggplot(aes(
    xstart = start,
    xend = end,
    y = reorder(transcript_id, counts)
  )) +
  geom_range(
    aes(fill = log2(1+cpm), group=novelty2), height=.25) + 
  geom_range(data=this_cds, aes(fill = log2(1+cpm), group=novelty2)) + 
  geom_intron(
    data = to_intron(these_exons, "transcript_id"),
    aes(strand = strand),arrow.min.intron.length = 500,
    arrow = grid::arrow(ends = "last", length = grid::unit(0.1, "inches")),
    color='grey60',
  ) + facet_grid(novelty2~.,scale='free',space='free') + theme_bw() + labs(y="") + ggtitle(this_gene,subtitle = unique(these_exons$seqnames)) + theme(plot.title = element_text(hjust=.5), plot.subtitle = element_text(hjust=.5), axis.text.y = element_text(size=12))
g1

pdf("output/figures/supplement/FigS7A_subplot_ggtranscript_DDX3X.pdf", width=10, height=20)
print(g1)
dev.off()

quartz_off_screen 
                2

sessionInfo()

R version 4.1.3 (2022-03-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur/Monterey 10.16

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] ggtranscript_0.99.9    GenomicFeatures_1.46.5 AnnotationDbi_1.56.2  
 [4] Biobase_2.54.0         GenomicRanges_1.46.1   GenomeInfoDb_1.30.1   
 [7] IRanges_2.28.0         S4Vectors_0.32.4       BiocGenerics_0.40.0   
[10] paletteer_1.5.0        cowplot_1.1.1          gridExtra_2.3         
[13] gprofiler2_0.2.1       readxl_1.4.1           openxlsx_4.2.5        
[16] biomaRt_2.50.3         WGCNA_1.72-1           fastcluster_1.2.3     
[19] dynamicTreeCut_1.63-1  edgeR_3.36.0           limma_3.50.3          
[22] forcats_0.5.2          stringr_1.5.0          dplyr_1.0.10          
[25] purrr_0.3.4            readr_2.1.2            tidyr_1.2.0           
[28] tibble_3.1.8           ggplot2_3.3.6          tidyverse_1.3.2       

loaded via a namespace (and not attached):
  [1] backports_1.4.1             Hmisc_4.7-1                
  [3] BiocFileCache_2.2.1         lazyeval_0.2.2             
  [5] splines_4.1.3               BiocParallel_1.28.3        
  [7] digest_0.6.29               foreach_1.5.2              
  [9] htmltools_0.5.3             GO.db_3.14.0               
 [11] fansi_1.0.3                 magrittr_2.0.3             
 [13] checkmate_2.1.0             memoise_2.0.1              
 [15] googlesheets4_1.0.1         cluster_2.1.4              
 [17] doParallel_1.0.17           tzdb_0.3.0                 
 [19] Biostrings_2.62.0           modelr_0.1.9               
 [21] matrixStats_0.62.0          vroom_1.5.7                
 [23] prettyunits_1.1.1           jpeg_0.1-9                 
 [25] colorspace_2.0-3            ggrepel_0.9.2              
 [27] blob_1.2.3                  rvest_1.0.3                
 [29] rappdirs_0.3.3              haven_2.5.1                
 [31] xfun_0.32                   prismatic_1.1.1            
 [33] crayon_1.5.1                RCurl_1.98-1.8             
 [35] jsonlite_1.8.0              impute_1.68.0              
 [37] survival_3.4-0              iterators_1.0.14           
 [39] glue_1.6.2                  gtable_0.3.1               
 [41] gargle_1.2.0                zlibbioc_1.40.0            
 [43] XVector_0.34.0              DelayedArray_0.20.0        
 [45] scales_1.2.1                DBI_1.1.3                  
 [47] Rcpp_1.0.9                  viridisLite_0.4.1          
 [49] progress_1.2.2              htmlTable_2.4.1            
 [51] foreign_0.8-82              bit_4.0.4                  
 [53] preprocessCore_1.56.0       Formula_1.2-4              
 [55] htmlwidgets_1.5.4           httr_1.4.4                 
 [57] RColorBrewer_1.1-3          ellipsis_0.3.2             
 [59] pkgconfig_2.0.3             XML_3.99-0.10              
 [61] farver_2.1.1                nnet_7.3-17                
 [63] dbplyr_2.2.1                deldir_1.0-6               
 [65] locfit_1.5-9.6              utf8_1.2.2                 
 [67] tidyselect_1.2.0            labeling_0.4.2             
 [69] rlang_1.0.6                 munsell_0.5.0              
 [71] cellranger_1.1.0            tools_4.1.3                
 [73] cachem_1.0.6                cli_3.4.1                  
 [75] generics_0.1.3              RSQLite_2.2.16             
 [77] broom_1.0.1                 evaluate_0.16              
 [79] fastmap_1.1.0               yaml_2.3.5                 
 [81] rematch2_2.1.2              knitr_1.40                 
 [83] bit64_4.0.5                 fs_1.5.2                   
 [85] zip_2.2.0                   KEGGREST_1.34.0            
 [87] xml2_1.3.3                  compiler_4.1.3             
 [89] rstudioapi_0.14             plotly_4.10.0              
 [91] filelock_1.0.2              curl_4.3.2                 
 [93] png_0.1-7                   reprex_2.0.2               
 [95] stringi_1.7.8               lattice_0.20-45            
 [97] Matrix_1.4-1                vctrs_0.5.1                
 [99] pillar_1.8.1                lifecycle_1.0.3            
[101] data.table_1.14.2           bitops_1.0-7               
[103] rtracklayer_1.54.0          BiocIO_1.4.0               
[105] R6_2.5.1                    latticeExtra_0.6-30        
[107] codetools_0.2-18            assertthat_0.2.1           
[109] SummarizedExperiment_1.24.0 rjson_0.2.21               
[111] withr_2.5.0                 GenomicAlignments_1.30.0   
[113] Rsamtools_2.10.0            GenomeInfoDbData_1.2.7     
[115] parallel_4.1.3              hms_1.1.2                  
[117] grid_4.1.3                  rpart_4.1.16               
[119] rmarkdown_2.16              MatrixGenerics_1.6.0       
[121] googledrive_2.0.0           lubridate_1.8.0            
[123] base64enc_0.1-3             restfulr_0.0.15            
[125] interp_1.1-3