Figure 4 - Combined Networks

Author

Michael Gandal

Published

January 19, 2023

Load Packages and Data

source('code/fisher_overlap.R')

suppressPackageStartupMessages({
  library(tidyverse)
  library(edgeR)
  library(WGCNA)
  library(biomaRt)
})

colorVector = c(
  "Known" = "#009E73",
  "ISM"   = "#0072B2",
  "ISM_Prefix" = "#005996",
  "ISM_Suffix" = "#378bcc",
  "NIC"   = "#D55E00",
  "NNC"   = "#E69F00",
  "Other" = "#000000"
)
colorVector_ismSplit = colorVector[-2]



load("ref/EWCE/CellTypeData_DamonNeuralFetalOnly.rda")
celltypemarkers <- openxlsx::read.xlsx('https://www.cell.com/cms/10.1016/j.neuron.2019.06.011/attachment/508ae008-b926-487a-b871-844a12acc1f8/mmc5', sheet='Cluster enriched genes') %>% as_tibble()
celltypemarkers_tableS5 = openxlsx::read.xlsx('https://ars.els-cdn.com/content/image/1-s2.0-S0896627319305616-mmc6.xlsx',sheet=2)
celltypemarkers.bg = read.csv("ref/polioudakis_neuron2020/single_cell_background_MJG221228.csv") %>% dplyr::select(ensembl_gene_id) %>% pull()
celltypemarkers.bg = unique(c(celltypemarkers.bg, celltypemarkers$Ensembl))

cts = read_tsv("data/cp_vz_0.75_min_7_recovery_talon_abundance_filtered.tsv.gz")
Rows: 214516 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr  (7): annot_gene_id, annot_transcript_id, annot_gene_name, annot_transcr...
dbl (28): gene_ID, transcript_ID, n_exons, length, 209_1_VZ, 209_2_VZ, 209_3...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
datExpr.isoCounts = as.data.frame(cts[,12:35])
rownames(datExpr.isoCounts) = cts$annot_transcript_id
datMeta = data.frame(sample=colnames(datExpr.isoCounts))
datMeta$Region = substr(datMeta$sample, 7,9)
datMeta$Subject = substr(datMeta$sample, 1,3)
datMeta$batch = substr(datMeta$sample, 5,5)
datAnno = data.frame(gene_id = cts$annot_gene_id, transcript_id=cts$annot_transcript_id,
                     transcript_name=cts$annot_transcript_name, length=cts$length,
                     gene_name = cts$annot_gene_name, ensg = substr(cts$annot_gene_id,1,15), novelty=cts$transcript_novelty)


rbp_targets = read.csv("ref/RBP_targets.csv")
rbp_targets$regulation[is.na(rbp_targets$regulation)]=""


mart = useMart("ENSEMBL_MART_ENSEMBL","mmusculus_gene_ensembl")
f = listFilters(mart); a = listAttributes(mart)
featuresToGet = c("ensembl_gene_id", "external_gene_name", "hsapiens_homolog_ensembl_gene", "hsapiens_homolog_associated_gene_name","hsapiens_homolog_orthology_type")
mouseHumanHomologs = getBM(attributes = featuresToGet,mart = mart)

human_mouse_bg = mouseHumanHomologs %>% as_tibble() %>% filter(hsapiens_homolog_orthology_type == "ortholog_one2one") %>% dplyr::select("hsapiens_homolog_ensembl_gene") %>% pull()

## Load 3 Networks
datExpr.isoFr = readRDS('data/working/WGCNA/final/datExpr.localIF_batchCorrected_103k.rds')
net.isoFr = readRDS('data/working/WGCNA/final/WGCNA_isoformFraction_top103k_signed_sft14_net.rds')
net.isoFr$colors = labels2colors(net.isoFr$cut2$labels)
names(net.isoFr$colors) = names(net.isoFr$cut2$labels)
net.isoFr$module.number = net.isoFr$cut2$labels
net.isoFr$MEs = moduleEigengenes(t(datExpr.isoFr), colors=net.isoFr$colors)
net.isoFr$kMEtable = signedKME(t(datExpr.isoFr), datME =net.isoFr$MEs$eigengenes,corFnc = "bicor")
Warning in bicor(datExpr, datME, , use = "p"): bicor: zero MAD in variable 'x'.
Pearson correlation was used for individual columns with zero (or missing) MAD.
datExpr.isoExpr = readRDS(file='data/working/WGCNA/final/datExpr.isoExpr_batchCorrected_92k.rds')
net.isoExpr = readRDS('data/working/WGCNA/final/WGCNA_isoExpression_top92k_signed_sft9_net.rds')
net.isoExpr$colors = labels2colors(net.isoExpr$cut2$labels)
names(net.isoExpr$colors) = names(net.isoExpr$cut2$labels)
net.isoExpr$module.number = net.isoExpr$cut2$labels
net.isoExpr$MEs = moduleEigengenes(t(datExpr.isoExpr), colors=net.isoExpr$colors)
net.isoExpr$kMEtable = signedKME(t(datExpr.isoExpr), datME =net.isoExpr$MEs$eigengenes,corFnc = "bicor")

datExpr.geneExpr = readRDS(file='data/working/WGCNA/final/datExpr.geneExpr_batchCorrected_16k.rds')
net.geneExpr = readRDS(file='data/working/WGCNA/final/WGCNA_geneExpression_top16k_signed_sft14_net.rds')
net.geneExpr$colors = labels2colors(net.geneExpr$cut2$labels)
names(net.geneExpr$colors) = names(net.geneExpr$cut2$labels)
net.geneExpr$module.number = net.geneExpr$cut2$labels
net.geneExpr$MEs = moduleEigengenes(t(datExpr.geneExpr), colors=net.geneExpr$colors)
net.geneExpr$kMEtable = signedKME(t(datExpr.geneExpr), datME =net.geneExpr$MEs$eigengenes,corFnc = "bicor")

TableS4A

tidyMods1 = tibble(network = "isoExpr", transcript_id = rownames(net.isoExpr$kMEtable), module.color=net.isoExpr$colors, module.number = net.isoExpr$module.number, net.isoExpr$kMEtable) %>% pivot_longer(-c(network, transcript_id,module.color,module.number), names_to = "kME_to_module", values_to = 'kME') %>% mutate(kMEtoMod = gsub("kME", "", kME_to_module)) %>% filter(module.color==kMEtoMod) %>% dplyr::select(-kMEtoMod, -kME_to_module)

tidyMods2 = tibble(network = "isoUsage", transcript_id = rownames(net.isoFr$kMEtable), module.color=net.isoFr$colors, module.number = net.isoFr$module.number, net.isoFr$kMEtable) %>% pivot_longer(-c(network, transcript_id,module.color,module.number), names_to = "kME_to_module", values_to = 'kME') %>% mutate(kMEtoMod = gsub("kME", "", kME_to_module)) %>% filter(module.color==kMEtoMod) %>% dplyr::select(-kMEtoMod, -kME_to_module)

tidyMods <- rbind(tidyMods1, tidyMods2) %>% mutate(module=paste0(network, ".M", module.number, "_", module.color))
tidyMods <- tidyMods %>% left_join(datAnno %>% dplyr::select(gene_id, transcript_id, gene_name))
Joining, by = "transcript_id"
tidyMods3 = tibble(network = "geneExpr", gene_id = rownames(net.geneExpr$kMEtable), module.color=net.geneExpr$colors, module.number = net.geneExpr$module.number, net.geneExpr$kMEtable) %>% pivot_longer(-c(network, gene_id,module.color,module.number), names_to = "kME_to_module", values_to = 'kME') %>% mutate(kMEtoMod = gsub("kME", "", kME_to_module)) %>% filter(module.color==kMEtoMod) %>% dplyr::select(-kMEtoMod, -kME_to_module) %>% mutate(module=paste0(network, ".M", module.number, "_", module.color), transcript_id=NA)
tidyMods3$gene_name = datAnno$gene_name[match(tidyMods3$gene_id, datAnno$gene_id)]

tidyMods = rbind(tidyMods, tidyMods3)
tidyMods <- tidyMods %>% group_by(module) %>% mutate(hub.rank = rank(-kME)) %>% mutate(hub = (hub.rank <= 25), ensg = substr(gene_id,1,15))
rm(tidyMods1, tidyMods2,tidyMods3)
write_tsv(tidyMods,file='output/tables/TableS4A_networks.tsv')
write_tsv(tidyMods %>% dplyr::select(ensg, module),file='output/tables/TableS4A_networks_forLDSC.tsv')
# write_tsv(tidyMods %>% dplyr::select(gene_name, module),file='~/Downloads/TableS4A_networks_forTransite.tsv')
# write_tsv(tidyMods %>% ungroup() %>% dplyr::select(gene_name) %>% unique(),file='~/Downloads/TableS4A_networks_forTransite_background.tsv')
tidyMods_all = tidyMods

Network Dendrogram Plots

Fig4A: Plot IsoExpr

this_tree = net.isoExpr$dendrograms[[1]]
this_anno = datAnno[match(rownames(datExpr.isoExpr), datAnno$transcript_id),]
these_mods = data.frame(IsoExpr=net.isoExpr$colors)
these_mods$IsoFR = net.isoFr$colors[this_anno$transcript_id]
these_mods$GeneExpr = net.geneExpr$colors[this_anno$gene_id]

cell_anno = as.data.frame(matrix('white', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
    marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
    cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}

ndd = read.csv("ref/ASD+SCZ+DDD_2022.csv")
ndd <- ndd %>% filter(Set %in% c("ASD (Satterstrom et al. 2019)", "ASD (SFARI score 1or2)", "ASD (SFARI syndromic)", "DDD (Kaplanis et al. 2019)", "SCZ_SCHEMA genes (FDR < 5%)")) %>% dplyr::select(Gene) %>% unique() %>% pull()

these_mods$NDD = 'white'
these_mods$NDD[this_anno$gene_name %in% ndd] = 'purple'

rbpcolors = table(rbp_targets$ENSG)[this_anno$ensg]
these_mods$RBP_targets = numbers2colors(rbpcolors,signed = T,centered = F, colors=blueWhiteRed(100)[51:1], naColor = "white" )


plotDendroAndColors(this_tree, colors=cbind(these_mods), dendroLabels = F)

 pdf(file = "output/figures/Fig4/Fig4_wgcna.pdf",width = 8, height=8)
plotDendroAndColors(this_tree, colors=cbind(these_mods, cell_anno), dendroLabels = F, main="")
dev.off()
quartz_off_screen 
                2 
pdf(file = "output/figures/Fig4/Fig4A.pdf",width = 8, height=2)
plotDendroAndColors(this_tree,colors=NULL,dendroLabels = F, main="")
dev.off()
quartz_off_screen 
                2 
jpeg(filename = "output/figures/Fig4/Fig4_wgcna.jpeg",width = 8, height=8, units='in',res = 300)
plotDendroAndColors(this_tree, colors=cbind(these_mods, cell_anno), dendroLabels = F, main="")
dev.off()
quartz_off_screen 
                2 

FigS4: Supplement (Plot IsoFr)

this_tree = net.isoFr$tree
this_anno = datAnno[match(rownames(datExpr.isoFr), datAnno$transcript_id),]
these_mods = data.frame(IsoFr=net.isoFr$colors)
these_mods$IsoExpr = net.isoExpr$colors[this_anno$transcript_id]
these_mods$GeneExpr = net.geneExpr$colors[this_anno$gene_id]

cell_anno = as.data.frame(matrix('white', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
    marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
    cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}

ndd = read.csv("ref/ASD+SCZ+DDD_2022.csv")
ndd <- ndd %>% filter(Set %in% c("ASD (Satterstrom et al. 2019)", "ASD (SFARI score 1or2)", "ASD (SFARI syndromic)", "DDD (Kaplanis et al. 2019)", "SCZ_SCHEMA genes (FDR < 5%)")) %>% dplyr::select(Gene) %>% unique() %>% pull()

these_mods$NDD = 'white'
these_mods$NDD[this_anno$gene_name %in% ndd] = 'purple'

plotDendroAndColors(this_tree, colors=cbind(these_mods), dendroLabels = F)

Compare networks

Cell-type enrichment

EWCE

if(FALSE){
  df_ewce= data.frame()
  datAnno$net.geneExpr = net.geneExpr$colors[datAnno$gene_id]
  datAnno$net.isoExpr = net.isoExpr$colors[datAnno$transcript_id]
  datAnno$net.isoFr = net.isoFr$colors[datAnno$transcript_id]
  
  for(this_mod in unique(na.omit(datAnno$net.geneExpr[datAnno$net.geneExpr!="grey"]))) {
    mod_genes = unique(na.omit(datAnno$gene_name[datAnno$net.geneExpr==this_mod]))
    mod_gene.bg = unique(na.omit(datAnno$gene_name[!is.na(datAnno$net.geneExpr)]))
    res=EWCE::bootstrap_enrichment_test(ctd, hits = mod_genes, bg=mod_gene.bg,genelistSpecies = 'human',sctSpecies = 'human',annotLevel = 2,verbose = F,no_cores = 10,reps = 10000)
      df_ewce = rbind(df_ewce,data.frame(net = 'net.geneExpr', mod=this_mod, res$results))
  }
  
  for(this_mod in unique(na.omit(datAnno$net.isoExpr[datAnno$net.isoExpr!="grey"]))) {
    mod_genes = unique(na.omit(datAnno$gene_name[datAnno$net.isoExpr==this_mod]))
    mod_gene.bg = unique(na.omit(datAnno$gene_name[!is.na(datAnno$net.isoExpr)]))
    res=EWCE::bootstrap_enrichment_test(ctd, hits = mod_genes, bg=mod_gene.bg,genelistSpecies = 'human',sctSpecies = 'human',annotLevel = 2,verbose = F,no_cores = 10,reps = 10000)
      df_ewce = rbind(df_ewce,data.frame(net = 'net.isoExpr', mod=this_mod, res$results))
  }
  
  for(this_mod in unique(na.omit(datAnno$net.isoFr[datAnno$net.isoFr!="grey"]))) {
    mod_genes = unique(na.omit(datAnno$gene_name[datAnno$net.isoFr==this_mod]))
    mod_gene.bg = unique(na.omit(datAnno$gene_name[!is.na(datAnno$net.isoFr)]))
    res=EWCE::bootstrap_enrichment_test(ctd, hits = mod_genes, bg=mod_gene.bg,genelistSpecies = 'human',sctSpecies = 'human',annotLevel = 2,verbose = F,no_cores = 10,reps = 10000)
      df_ewce = rbind(df_ewce,data.frame(net = 'net.isoFr', mod=this_mod, res$results))
  }
  
  
  df_ewce$label = ""
  df_ewce$label[df_ewce$q<.05] = signif((df_ewce$sd_from_mean[df_ewce$q<.05]),1)
  ggplot(df_ewce, aes(y=mod, x=CellType, label=label, fill=-log10(p+1e-5))) + geom_tile(color='grey') + scale_fill_gradient(low='white', high='red') + geom_text(size=2) + theme(axis.text.x = element_text(angle=-45,hjust=0)) + facet_grid(net~., scales='free',space = 'free') + labs(x="", y="")
  
  df_ewce$mod2 = paste0(df_ewce$net, '.', df_ewce$mod)
  
  df_ewce %>% as_tibble() %>%  mutate(ct = ((sd_from_mean))) %>% group_by(net) %>% summarise(mean(ct), median(ct), max(ct))
}

Fisher’s exact

df_fisher.celltype = data.frame()
networks = list("geneExpr" = net.geneExpr$module.number[datAnno$gene_id], 
                "isoExpr" = net.isoExpr$module.number[datAnno$transcript_id], 
                "isoUsage" = net.isoFr$module.number[datAnno$transcript_id])

for(this_net in names(networks)) {
  all_mods = unique(na.omit(networks[[this_net]][networks[[this_net]] != 0]))

  for(this_mod in all_mods) {
    mod_genes = unique(na.omit(datAnno$ensg[networks[[this_net]]==this_mod]))
    mod_gene.bg = unique(na.omit(datAnno$ensg[!is.na(networks[[this_net]])]))
    
    for(this_cell in unique(celltypemarkers$Cluster)) {
      marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% 
        dplyr::select(Ensembl) %>% pull()
      enrichment = ORA(mod_genes, marker_genes, mod_gene.bg, celltypemarkers.bg)
      df_fisher.celltype = rbind(df_fisher.celltype, 
                        data.frame(net = this_net, mod = this_mod, cell=this_cell,
                                   as.data.frame(t(enrichment))))
    }
  }
}

for(col in c("OR", "Fisher.p")) df_fisher.celltype[,col] = as.numeric(df_fisher.celltype[,col])
df_fisher.celltype$Fisher.p[df_fisher.celltype$OR<1] = 1
df_fisher.celltype$Fisher.q = p.adjust(df_fisher.celltype$Fisher.p,'fdr')

df_fisher.celltype$label = ""
df_fisher.celltype$label[df_fisher.celltype$Fisher.q<.05] = signif((df_fisher.celltype$OR[df_fisher.celltype$Fisher.q<.05]),1)
FigS4
df_fisher.celltype$net = factor(df_fisher.celltype$net, levels=c("geneExpr", "isoUsage", "isoExpr"))
g1= ggplot(df_fisher.celltype, aes(y=factor(mod,levels=c(100:1)), x=cell, label=label, fill=-log10(Fisher.q))) + geom_tile(color='grey') + scale_fill_gradient(low='white', high='red') + geom_text(size=2) + theme(axis.text.x = element_text(angle=-45,hjust=0)) + facet_grid(net~., scales='free',space = 'free') + labs(x="", y="")
g1

ggsave(g1, file="output/figures/Fig4/Fig4_supp_Fisher.pdf",width=5,height=12)
TableS4B
TableS4B <- df_fisher.celltype%>% mutate(module.number = as.numeric(mod), module.color = labels2colors(module.number), mod = paste0(net, "M.", module.number, "_", module.color))
write_tsv(TableS4B, file="output/tables/TableS4B_fisherCellType.tsv")

Single Cell IsoSeq Overlap

tableS5 = openxlsx::read.xlsx('data/single_cell/Table_S5_v2.xlsx',sheet = "C") %>% as_tibble()
mod_bg = tidyMods_all %>% dplyr::select(transcript_id) %>% unique() %>% pull() %>% substr(1,15)
Adding missing grouping variables: `module`
df_scOverlap = data.frame()
tidyMods_all <- tidyMods_all %>% ungroup()
for(this_cell in unique(tableS5$cluster)){
  cell_markers = tableS5 %>% filter(cluster == this_cell) %>% dplyr::select(Isoform.Id) %>% pull()
  for(this_mod in tidyMods_all %>% filter(network != 'geneExpr', module.color !='grey') %>% dplyr::select(module) %>% unique() %>% pull()) {
    mod_isoforms = tidyMods_all %>% filter(module==this_mod) %>% dplyr::select(transcript_id) %>% pull() %>% substr(1,15)
    this_stat = ORA(mod_isoforms,cell_markers, mod_bg,mod_bg)
    df_scOverlap = rbind(df_scOverlap, data.frame(cell=this_cell, mod=this_mod, t(this_stat)))
  }
}

for(col in c("OR", "Fisher.p")) df_scOverlap[,col] = as.numeric(df_scOverlap[,col])
df_scOverlap$Fisher.p[df_scOverlap$OR<1] = 1
df_scOverlap$Fisher.q = p.adjust(df_scOverlap$Fisher.p,'fdr')

df_scOverlap$label = ""
df_scOverlap$label[df_scOverlap$Fisher.q<.05] = signif((df_scOverlap$OR[df_scOverlap$Fisher.q<.05]),1)

df_scOverlap$net = factor(substr(df_scOverlap$mod,1,6))

ggplot(df_scOverlap, aes(y=mod, x=cell, label=label, fill=-log10(Fisher.q))) + geom_tile(color='grey') + scale_fill_gradient(low='white', high='red') + geom_text(size=2) + theme(axis.text.x = element_text(angle=-45,hjust=0)) + facet_wrap(net~., scales='free',shrink =T,ncol=1) + labs(x="", y="") + coord_flip()

Rare Var Enrichments

Isoform level

if(FALSE) {
  fu=openxlsx::read.xlsx(('https://static-content.springer.com/esm/art%3A10.1038%2Fs41588-022-01104-0/MediaObjects/41588_2022_1104_MOESM3_ESM.xlsx'),'Supplementary Table 11')
fu$p_TADA_ASD[fu$p_TADA_ASD==0] = min(fu$p_TADA_ASD[fu$p_TADA_ASD >0],na.rm=T)
fu$p_TADA_NDD[fu$p_TADA_NDD==0] = min(fu$p_TADA_NDD[fu$p_TADA_NDD >0],na.rm=T)

datAnno.logit = data.frame(ASD_fuTADA= -log10(fu$p_TADA_ASD)[match(datAnno$ensg, fu$gene_id)])
datAnno.logit$NDD_fuTADA = -log10(fu$p_TADA_NDD)[match(datAnno$ensg, fu$gene_id)]

SCZ.schema = read_tsv('ref/risk_genes/SCHEMA_gene_results.tsv')
datAnno.logit$SCZ_schema = -log10(SCZ.schema$`P meta`[match(datAnno$ensg, SCZ.schema$gene_id)])

BIP.bipex = read_tsv('ref/risk_genes/BipEx_gene_results.tsv') %>% filter(group=="Bipolar Disorder")
datAnno.logit$BIP.bipex = -log10(BIP.bipex$ptv_fisher_gnom_non_psych_pval[match(datAnno$ensg,BIP.bipex$gene_id)])

EPI.epi25 = read_tsv('ref/risk_genes/Epi25_gene_results.tsv') %>% filter(group=="EPI")
datAnno.logit$EPI.epi25 = -log10(EPI.epi25$pval[match(datAnno$ensg,EPI.epi25$gene_id)])



networks = list("net.isoExpr" = net.isoExpr$colors[datAnno$transcript_id], 
                "net.isoFr" = net.isoFr$colors[datAnno$transcript_id])


df_logit= data.frame()

for(this_net in names(networks)) {
  all_mods = unique(na.omit(networks[[this_net]][networks[[this_net]] != 'grey']))

  for(this_mod in all_mods) {
    
    binaryVec = as.numeric(networks[[this_net]] == this_mod)
    
    for(this_rare_var in colnames(datAnno.logit)) {
      this_glm = summary(glm(binaryVec ~ datAnno.logit[,this_rare_var] + length + log10(length), data=datAnno, family='binomial'))
    df_logit = rbind(df_logit, data.frame(net = this_net, mod = this_mod, set = this_rare_var, t(this_glm$coefficients[2,])))
      
    }
  }
}
    
df_logit$OR = exp(df_logit$Estimate)
df_logit$P = df_logit$Pr...z..
df_logit$P[df_logit$OR < 1] = 1
df_logit$Q = p.adjust(df_logit$P,'fdr')
df_logit$mod = factor(df_logit$mod, levels=labels2colors(100:1))


df_logit$label = signif((df_logit$OR),3)
df_logit$label[df_logit$Q>.1] = ""
ggplot(df_logit, aes(y=factor(mod),x=set, label=label, fill=-log10(P))) + geom_tile() + scale_fill_gradient(low='white', high='red') + geom_text(size=2) + theme(axis.text.x = element_text(angle=-45,hjust=0)) + facet_wrap(~net, scales='free') + labs(x="",y="")
}

Gene level

geneAnno = rtracklayer::import("ref/gencode.v33lift37.annotation.gtf.gz") %>% as_tibble() %>% filter(type=='gene')
geneAnno$ensg = substr(geneAnno$gene_id,1,15)

fu=openxlsx::read.xlsx(('https://static-content.springer.com/esm/art%3A10.1038%2Fs41588-022-01104-0/MediaObjects/41588_2022_1104_MOESM3_ESM.xlsx'),'Supplementary Table 11')
fu$p_TADA_ASD[fu$p_TADA_ASD==0] = min(fu$p_TADA_ASD[fu$p_TADA_ASD >0],na.rm=T)
fu$p_TADA_NDD[fu$p_TADA_NDD==0] = min(fu$p_TADA_NDD[fu$p_TADA_NDD >0],na.rm=T)

geneAnno.logit = data.frame(ASD_fuTADA= -log10(fu$p_TADA_ASD)[match(geneAnno$ensg, fu$gene_id)])
geneAnno.logit$NDD_fuTADA = -log10(fu$p_TADA_NDD)[match(geneAnno$ensg, fu$gene_id)]

SCZ.schema = read_tsv('ref/risk_genes/SCHEMA_gene_results.tsv')
Warning: One or more parsing issues, see `problems()` for details
Rows: 18321 Columns: 26
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr  (8): gene_id, group, OR (PTV), OR (Class I), OR (Class II), OR (PTV) up...
dbl (16): Case PTV, Ctrl PTV, Case mis3, Ctrl mis3, Case mis2, Ctrl mis2, P ...
lgl  (2): De novo mis3, De novo mis2

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
geneAnno.logit$SCZ_schema = -log10(SCZ.schema$`P meta`[match(geneAnno$ensg, SCZ.schema$gene_id)])

BIP.bipex = read_tsv('ref/risk_genes/BipEx_gene_results.tsv') %>% filter(group=="Bipolar Disorder")
Rows: 119958 Columns: 20
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr  (4): gene_id, group, damaging_missense_fisher_gnom_non_psych_OR, ptv_fi...
dbl (16): n_cases, n_controls, damaging_missense_case_count, damaging_missen...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
geneAnno.logit$BIP.bipex = -log10(BIP.bipex$ptv_fisher_gnom_non_psych_pval[match(geneAnno$ensg,BIP.bipex$gene_id)])

EPI.epi25 = read_tsv('ref/risk_genes/Epi25_gene_results.tsv') %>% filter(group=="EPI")
Rows: 71456 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): gene_id, group
dbl (9): xcase_lof, xctrl_lof, pval_lof, xcase_mpc, xctrl_mpc, pval_mpc, xca...
lgl (1): pval_infrIndel

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
geneAnno.logit$EPI.epi25 = -log10(EPI.epi25$pval[match(geneAnno$ensg,EPI.epi25$gene_id)])



networks = data.frame(net="isoExpr", module = net.isoExpr$module.number, transcript_id = names(net.isoExpr$colors))
networks = rbind(networks, data.frame(net='isoUsage', module = net.isoFr$module.number, transcript_id = names(net.isoFr$colors)))
networks$gene_id = datAnno$gene_id[match(networks$transcript_id, datAnno$transcript_id)]
networks = rbind(networks, data.frame(net='geneExpr', module = net.geneExpr$module.number, transcript_id = NA, gene_id=names(net.geneExpr$colors)))
networks$ensg = substr(networks$gene_id,1,15)


df_logit.gene = data.frame()
binaryVec = rep(NA, times=nrow(geneAnno))
names(binaryVec) = geneAnno$ensg

for(this_net in unique(networks$net)) {
  all_mods = unique(na.omit(networks$module[networks$net ==this_net & networks$module!=0]))
  
  for(this_mod in all_mods) {
    
    this_binary_vec = binaryVec
    this_binary_vec[names(this_binary_vec) %in% networks$ensg[networks$net==this_net]] = 0 
    this_binary_vec[names(this_binary_vec) %in% networks$ensg[networks$net==this_net & networks$module==this_mod]] = 1
    
    for(this_rare_var in colnames(geneAnno.logit)) {
      this_glm = summary(glm(this_binary_vec ~ geneAnno.logit[,this_rare_var] + width + log10(width), data=geneAnno, family='binomial'))
    df_logit.gene = rbind(df_logit.gene, data.frame(net = this_net, mod = this_mod, set = this_rare_var, t(this_glm$coefficients[2,])))
      
    }
  }
}
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df_logit.gene$OR = exp(df_logit.gene$Estimate)
df_logit.gene$P = df_logit.gene$Pr...z..
df_logit.gene$P[df_logit.gene$OR < 1] = 1
df_logit.gene$Q = p.adjust(df_logit.gene$P,'fdr')

df_logit.gene$label = signif((df_logit.gene$OR),3)
df_logit.gene$label[df_logit.gene$Q>.05] = ""
FigS4
g1 = ggplot(df_logit.gene, aes(y=factor(mod,levels=c(100:1)),x=set, label=label, fill=-log10(P))) + geom_tile() + scale_fill_gradient(low='white', high='red') + geom_text(size=2) + theme(axis.text.x = element_text(angle=-45,hjust=0)) + facet_wrap(~net, scales='free') + labs(x="", y="")
g1

ggsave(g1,file="output/figures/supplement/FigS4_rareVar_geneLogit.pdf")
Saving 7 x 5 in image
TableS4C
TableS4C <- df_logit.gene %>% mutate(module=paste0(net, ".M", mod, "_", labels2colors(mod)))
write_tsv(TableS4C, file="output/tables/TableS4C_rareVarlogit.tsv")

GWAS Enrichments

df_gwas = data.frame()
gwas = dir("data/LDSC_cell_type_specific/networks/isoExpr_cts_result/", pattern="txt")
for(this_gwas in gwas) {
  this_ldsc = read.table(paste0("data/LDSC_cell_type_specific/networks/isoExpr_cts_result/",this_gwas),header = T)
  colnames(this_ldsc)[1] = 'module'
  this_ldsc$gwas = gsub(".cell_type_results.txt", "", this_gwas)
  this_ldsc$net = 'isoExpr'
  df_gwas = rbind(df_gwas, this_ldsc)
}

gwas = dir("data/LDSC_cell_type_specific/networks/isoUsage_cts_result/", pattern="txt")
for(this_gwas in gwas) {
  this_ldsc = read.table(paste0("data/LDSC_cell_type_specific/networks/isoUsage_cts_result/",this_gwas),header = T)
  colnames(this_ldsc)[1] = 'module'
  this_ldsc$gwas = gsub(".cell_type_results.txt", "", this_gwas)
  this_ldsc$net = 'isoUsage'
  df_gwas = rbind(df_gwas, this_ldsc)
}

gwas = dir("data/LDSC_cell_type_specific/networks/geneExpr_cts_result/", pattern="txt")
for(this_gwas in gwas) {
  this_ldsc = read.table(paste0("data/LDSC_cell_type_specific/networks/geneExpr_cts_result/",this_gwas),header = T)
  colnames(this_ldsc)[1] = 'module'
  this_ldsc$gwas = gsub(".cell_type_results.txt", "", this_gwas)
  this_ldsc$net = 'geneExpr'
  df_gwas = rbind(df_gwas, this_ldsc)
}


df_gwas$fdr = p.adjust(df_gwas$Coefficient_P_value,'fdr')
df_gwas$mod = as.numeric(gsub("M", "", unlist(lapply(strsplit(unlist(lapply(strsplit(df_gwas$module, "[.]"),'[',2)), '_'),'[',1))))
df_gwas$Zscore = df_gwas$Coefficient/df_gwas$Coefficient_std_error

RBP Enrichments

networks = list("net.isoExpr" = net.isoExpr$colors[datAnno$transcript_id], 
                "net.isoFr" = net.isoFr$colors[datAnno$transcript_id],
                "net.geneExpr" = net.geneExpr$colors[datAnno$gene_id])

df_fisher_rbp = data.frame()

for(this_net in names(networks)) {
  all_mods = unique(na.omit(networks[[this_net]][networks[[this_net]] != 'grey']))

  for(this_mod in all_mods) {
    
    if(this_net == "net.geneExpr") {
      modGenes = substr(unique(na.omit(names(networks[[this_net]][networks[[this_net]] == this_mod]))), 1,15)
      modGeneg.bg = substr(unique(na.omit(names(networks[[this_net]]))),1,15)
    } else {
      modTx = unique(na.omit(names(networks[[this_net]][networks[[this_net]] == this_mod])))
      modGenes = unique(datAnno$ensg[match(modTx, datAnno$transcript_id)])
      modGeneg.bg = unique(datAnno$ensg[match(names(networks[[this_net]]), datAnno$transcript_id)])
    } 
  for(this_dataset in unique(rbp_targets$dataset)) {
    this_rbp = rbp_targets %>% filter(dataset == this_dataset) %>% mutate(target = paste0(RBP, "_", data.type, "_", regulation, "_", msORhs)) %>% dplyr::select(target) %>% unique()  %>% pull()
    target_genes = rbp_targets %>% filter(dataset == this_dataset) %>% dplyr::select(ENSG) %>% pull()
 
 if(grepl("Human",this_rbp)) {
   this_or = ORA(modGenes, target_genes, unique(datAnno$ensg), unique(datAnno$ensg))
 } else {
  this_or = ORA(modGenes, target_genes, unique(datAnno$ensg), human_mouse_bg)
 }
 df_fisher_rbp = rbind(df_fisher_rbp, data.frame(net = this_net, mod = this_mod, dataset = this_dataset, target = this_rbp, t(this_or)))
  }
}}
  

df_fisher_rbp$OR = as.numeric(df_fisher_rbp$OR)
  df_fisher_rbp$Fisher.p[df_fisher_rbp$OR<1] = 1
  df_fisher_rbp$Fisher.p = p.adjust(as.numeric(df_fisher_rbp$Fisher.p),'fdr')
  df_fisher_rbp$label = signif(df_fisher_rbp$OR,2)
  df_fisher_rbp$label[df_fisher_rbp$Fisher.p>.001] = ''
  df_fisher_rbp<-df_fisher_rbp %>% left_join(data.frame(mod.num = 1:100, mod=labels2colors(1:100)))
Joining, by = "mod"
  df_fisher_rbp$mod.num = factor(df_fisher_rbp$mod.num,levels=c(100:1))

  ggplot(df_fisher_rbp,aes(y=(mod.num), x=target, label=label, fill=-log10(Fisher.p))) + geom_tile() + facet_wrap(~net, scales='free') + geom_text(size=2)+ scale_fill_gradient(low='white',high='red') + theme(axis.text.x = element_text(angle=90,hjust=1))

# ggplot(df_fisher_rbp,aes(y=reorder(paste0(net, ".M", mod.num, "_", target),  -Fisher.p), x=-log10(Fisher.p), label=label, fill=net)) + 
#   geom_bar(stat='identity') + geom_text(size=2)+  theme(axis.text.x = element_text(angle=90,hjust=1))

Annotate Individual Modules

IsoExpr Network

if(FALSE) {
this_datExpr = datExpr.isoExpr
this_net = net.isoExpr
this_net$module = paste0("M", net.isoExpr$cut2$labels, ".", labels2colors(net.isoExpr$cut2$labels))
this_net$module.number = net.isoExpr$cut2$labels
this_net$colors = labels2colors(net.isoExpr$cut2$labels)
fileBaseNet="data/working/WGCNA/final/datExpr.isoExpr_batchCorrected_92k"
  

#Generate MEs and kME table for Network
MEs = moduleEigengenes(t(this_datExpr), colors=this_net$module.number)
kMEtable = signedKME(t(this_datExpr), datME =MEs$eigengenes,corFnc = "bicor")

this_anno = datAnno[match(rownames(this_datExpr), datAnno$transcript_id),]
this_anno$module = this_net$module
this_anno$module.color = this_net$colors
this_anno$module.number = this_net$module.number
idx = grep("^TALON", this_anno$transcript_name)
this_anno$transcript_name[idx] = paste0(this_anno$gene_name[idx], '_', this_anno$transcript_name[idx])

cell_anno = as.data.frame(matrix('grey', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
    marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
    cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}
  
tidyMods = tibble(transcript_id = rownames(kMEtable), module=this_net$module, module.color = this_net$colors, module.number=this_net$module.number, kMEtable)
tidyMods <- tidyMods %>% pivot_longer(-c(transcript_id,module,module.color,module.number), names_to = "kME_to_module", values_to = 'kME') %>% mutate(kMEtoMod = gsub("kME", "", kME_to_module)) %>% filter(module!=0, module != "grey", kMEtoMod !='grey', kMEtoMod != 0) %>% dplyr::select(-kME_to_module)
tidyMods <- tidyMods %>% left_join(this_anno)
tidyMods %>% group_by(module) %>% filter(kMEtoMod == module.number) %>% slice_max(kME,n=10)


pdf(file=paste0(fileBaseNet, ".pdf"),width=15,height=8)
plotDendroAndColors(this_net$tree, colors=cbind((this_net$colors),cell_anno), dendroLabels = F)


ggplot(df_fisher.celltype %>% filter(net=="isoExpr"), aes(x=cell,y=factor(mod,levels=c(100:1)), label=label, fill=-log10(Fisher.p))) + geom_tile() + scale_fill_gradient(low='white', high='red') + geom_text(size=3) + theme(axis.text.x = element_text(angle=-45,hjust=0)) + labs(x="",y="")



for(i in 1:(ncol(MEs$eigengenes))) {
  print(i)
  this_mod = gsub("ME", "", colnames(MEs$eigengenes)[i])
    mod_genes = tidyMods %>% filter(module.number==this_mod,kMEtoMod==this_mod) %>% arrange(-kME) %>% dplyr::select(ensg) %>% pull()
  if(!this_mod %in% c("grey", 0, "M0.grey")) {
  this_plot = data.frame(datMeta, eig=MEs$eigengenes[,i])
  g1 = ggplot(this_plot, aes(x=Region, y=eig, color=Subject)) + geom_point() + ggtitle(paste0("Module ",this_mod, ": ", labels2colors(as.numeric(this_mod)), " n=", length(mod_genes)))
  g2 = tidyMods %>% filter(module.number==this_mod, kMEtoMod==this_mod) %>% slice_max(kME,n=25) %>% ggplot(aes(x=reorder(transcript_name, kME), y=kME, fill=novelty)) + geom_bar(stat='identity') + coord_flip() + labs(x="") + scale_fill_manual(values = colorVector)
  
  
  g3=ggplot(df_fisher.celltype %>% filter(net=="isoExpr",mod==this_mod),aes(x=reorder(cell, -Fisher.p), y=-log10(Fisher.p), fill=OR)) + geom_bar(stat='identity') + 
    coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')
  
  # Add pathway enrichments
  path = gprofiler2::gost(query=mod_genes,ordered_query = T,correction_method = 'fdr',
                        custom_bg = unique(datAnno$ensg), sources = c("GO","KEGG", "REACTOME"))
  if(!is.null(path)) {
    df_path = as_tibble(path$result)
    df_path <- df_path %>% filter(term_size > 5, term_size < 1000)
  g4 <- df_path %>% group_by(source) %>% slice_min(p_value,n=5,with_ties = F) %>% ungroup() %>%
    ggplot(aes(x=reorder(term_name, -p_value), y=-log10(p_value), fill=source)) + geom_bar(stat='identity') + coord_flip() + theme_bw() + labs(x="")  + 
    facet_grid(source~., space = 'free', scales='free') + theme(legend.position = 'none')
  } else {
    print("No pathway enrichment for this module")
    g4=NULL
  }
  
  # RBP
   g5=ggplot(df_fisher_rbp %>% filter(net=="net.isoExpr",mod.num==this_mod,Fisher.p<.05),aes(x=reorder(target, -Fisher.p), y=-log10(Fisher.p), fill=OR)) + geom_bar(stat='identity',position = position_dodge2()) + 
    coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')+ ggtitle("RBP Enrichment")
   
    # Rare Var
   g6=ggplot(df_logit.gene %>% filter(net=="isoExpr",mod==this_mod),aes(x=reorder(set, -Q), y=-log10(Q), fill=OR)) + geom_bar(stat='identity',position = position_dodge2()) + 
    coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')+ ggtitle("Rare Var Enrichment")
   
     # GWAS
   g7=ggplot(df_gwas %>% filter(net=="isoExpr",mod==this_mod),aes(x=reorder(gwas, -fdr), y=-log10(fdr), fill=Zscore)) + geom_bar(stat='identity',position = position_dodge2()) + scale_fill_gradient2()+
    coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red') + ggtitle("GWAS Enrichment")
   

gridExtra::grid.arrange(grobs=list(g1,g2,g3,g4,g5,g6,g7),layout_matrix=rbind(c(1,3,5),c(2,3,5), c(2,4,7), c(6,4,7)))
  }
}
dev.off()
}

IsoUsage Network

if(FALSE) {
this_datExpr = datExpr.isoFr
this_net = net.isoFr
this_net$module = paste0("M", this_net$module.number, ".", labels2colors(this_net$module.number))
fileBaseNet="data/working/WGCNA/final/datExpr.localIF_batchCorrected_103k"
  

#Generate MEs and kME table for Network
MEs = moduleEigengenes(t(this_datExpr), colors=this_net$module.number)
kMEtable = signedKME(t(this_datExpr), datME =MEs$eigengenes,corFnc = "bicor")

this_anno = datAnno[match(rownames(this_datExpr), datAnno$transcript_id),]
idx = grep("^TALON", this_anno$transcript_name)
this_anno$transcript_name[idx] = paste0(this_anno$gene_name[idx], '_', this_anno$transcript_name[idx])

cell_anno = as.data.frame(matrix('grey', nrow=nrow(this_anno), ncol = length(unique(celltypemarkers$Cluster))))
colnames(cell_anno) = sort(unique(celltypemarkers$Cluster))
for(this_cell in unique(celltypemarkers$Cluster)) {
    marker_genes = celltypemarkers %>% filter(Cluster == this_cell) %>% dplyr::select(Ensembl) %>% pull()
    cell_anno[this_anno$ensg %in% marker_genes, this_cell] = "red"
}
  
tidyMods = tibble(transcript_id = rownames(kMEtable), module=this_net$module, module.color = this_net$colors, module.number=this_net$module.number, kMEtable)
tidyMods <- tidyMods %>% pivot_longer(-c(transcript_id,module,module.color,module.number), names_to = "kME_to_module", values_to = 'kME') %>% mutate(kMEtoMod = gsub("kME", "", kME_to_module)) %>% filter(module!=0, module != "grey", kMEtoMod !='grey', kMEtoMod != 0) %>% dplyr::select(-kME_to_module)
tidyMods <- tidyMods %>% left_join(this_anno)
tidyMods %>% group_by(module) %>% filter(kMEtoMod == module) %>% slice_max(kME,n=10)


pdf(file=paste0(fileBaseNet, ".pdf"),width=15,height=8)
plotDendroAndColors(this_net$tree, colors=cbind((this_net$colors),cell_anno), dendroLabels = F)

ggplot(df_fisher.celltype %>% filter(net=="isoUsage"), aes(x=cell,y=factor(mod,levels=c(100:1)), label=label, fill=-log10(Fisher.p))) + geom_tile() + scale_fill_gradient(low='white', high='red') + geom_text(size=3) + theme(axis.text.x = element_text(angle=-45,hjust=0)) + labs(x="",y="")



for(i in 1:(ncol(MEs$eigengenes))) {
  print(i)
  this_mod = gsub("ME", "", colnames(MEs$eigengenes)[i])
  mod_genes = tidyMods %>% filter(module.number==this_mod,kMEtoMod==this_mod) %>% arrange(-kME) %>% dplyr::select(ensg) %>% pull()
  if(!this_mod %in% c("grey", 0, "M0.grey")) {
  this_plot = data.frame(datMeta, eig=MEs$eigengenes[,i])
  g1 = ggplot(this_plot, aes(x=Region, y=eig, color=Subject)) + geom_point() + ggtitle(paste0("Module ",this_mod, ": ", labels2colors(as.numeric(this_mod)), " n=", length(mod_genes)))
  g2 = tidyMods %>% filter(module.number==this_mod, kMEtoMod==this_mod) %>% slice_max(kME,n=25) %>% ggplot(aes(x=reorder(transcript_name, kME), y=kME, fill=novelty)) + geom_bar(stat='identity') + coord_flip() + labs(x="") + scale_fill_manual(values = colorVector)
  
  
  g3=ggplot(df_fisher.celltype %>% filter(net=="isoExpr",mod==this_mod),aes(x=reorder(cell, -Fisher.p), y=-log10(Fisher.p), fill=OR)) + geom_bar(stat='identity') + 
    coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')
  
  # Add pathway enrichments
  path = gprofiler2::gost(query=mod_genes,ordered_query = T,correction_method = 'fdr',
                        custom_bg = unique(datAnno$ensg), sources = c("GO","KEGG", "REACTOME"))
  if(!is.null(path)) {
    df_path = as_tibble(path$result)
    df_path <- df_path %>% filter(term_size > 5, term_size < 1000)
  g4 <- df_path %>% group_by(source) %>% slice_min(p_value,n=5,with_ties = F) %>% ungroup() %>%
    ggplot(aes(x=reorder(term_name, -p_value), y=-log10(p_value), fill=source)) + geom_bar(stat='identity') + coord_flip() + theme_bw() + labs(x="")  + 
    facet_grid(source~., space = 'free', scales='free') + theme(legend.position = 'none')
  } else {
    print("No pathway enrichment for this module")
    g4=NULL
  }
  
  # RBP
   g5=ggplot(df_fisher_rbp %>% filter(net=="net.isoFr",mod.num==this_mod,Fisher.p<.05),aes(x=reorder(target, -Fisher.p), y=-log10(Fisher.p), fill=OR)) + geom_bar(stat='identity',position = position_dodge2()) + 
    coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')+ ggtitle("RBP Enrichment")
   
    # Rare Var
   g6=ggplot(df_logit.gene %>% filter(net=="isoUsage",mod==this_mod),aes(x=reorder(set, -Q), y=-log10(Q), fill=OR)) + geom_bar(stat='identity',position = position_dodge2()) + 
    coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red')+ ggtitle("Rare Var Enrichment")
   
     # GWAS
   g7=ggplot(df_gwas %>% filter(net=="isoUsage",mod==this_mod),aes(x=reorder(gwas, -fdr), y=-log10(fdr), fill=Zscore)) + geom_bar(stat='identity',position = position_dodge2()) + scale_fill_gradient2()+
    coord_flip() + labs(x="") + geom_hline(yintercept = -log10(.05),lty=2,col='red') + ggtitle("GWAS Enrichment")
   

gridExtra::grid.arrange(grobs=list(g1,g2,g3,g4,g5,g6,g7),layout_matrix=rbind(c(1,3,5),c(2,3,5), c(2,4,7), c(6,4,7)))
  }
}
dev.off()
}

Combined Network Plots

Network Cluster

df1 = data.frame(net.isoFr$MEs$eigengenes)
colnames(df1) = paste0("isoUsage_", gsub("ME", "", colnames(df1)))

df2 = data.frame(net.isoExpr$MEs$eigengenes)
colnames(df2) = paste0("isoExpr_",  gsub("ME", "", colnames(df2)))

df3 = data.frame(net.geneExpr$MEs$eigengenes)
colnames(df3) = paste0("geneExpr_",  gsub("ME", "", colnames(df3)))

df = cbind(df1,df2,df3)
tree = hclust(as.dist(1-bicor(df)),method = 'average')


# Hierarchically cluster the sample-eigenegene correlations
# and save the dendrogram representation
dist_mat <- as.dist(1-bicor((df)))
clust <- hclust(dist_mat,method = 'average')
dend <- as.dendrogram(clust)

# Define the mapping between network names and colors
labelColors = list(
    "geneExpr" = "#d62828",
    "isoExpr" = "#219ebc",
    "isoUsage" = "#ff9914")

# Iterate over the dendrogram leafs and manually change the node
# parameters according to the label name (the column name)
colLab <- function(n) {
    if (is.leaf(n)) {
        
        # Grab the attributes of the leaf/node
        a <- attributes(n)
        
        # Print the name so that later on you can arrange the heatmap
        # to match the order in the dendrogram
        cat(as.character(a$label))
        cat(", ")
        
        # Extract the name of the network (this regex extracts dev_gene from dev_gene_blahblah)
        net_name <- str_extract(a$label, "[:alnum:]+")
        
        # Assign the text color according to the mapping in labelColors
        if (net_name %in% names(labelColors)) {
            labCol <- labelColors[[net_name]] 
        } else {
            labCol <- "#5603ad"
        }
        
        # Assign the node color to match the WGCNA color (saddlebrown, turquoise, etc.)
        mod_color <- str_match(a$label,"[^_]+_(.*)")[2]
        
        # Add the new node attributes onto the existing node attributes
        attr(n, "nodePar") <- list(a$nodePar,
                                 cex = 1,
                                 pch = 15,
                                 lab.col = labCol, # This is the color of the leaf label (the text)
                                 col = mod_color)  # This is the color of the leaf (the square)
        }
    return(n)
}

# Apply the iteration function onto our saved dendrogram
new_dendro <- dendrapply(dend, colLab)
isoExpr_salmon, isoExpr_purple, isoUsage_orange, isoExpr_darkgrey, isoExpr_mediumpurple3, isoExpr_palevioletred3, isoExpr_floralwhite, isoExpr_green, isoUsage_white, isoExpr_skyblue, isoExpr_darkorange2, isoUsage_darkgrey, isoExpr_sienna3, isoUsage_green, isoUsage_brown, isoUsage_grey, isoExpr_paleturquoise, isoExpr_royalblue, isoUsage_pink, isoExpr_plum1, isoExpr_steelblue, isoExpr_magenta, isoExpr_white, isoUsage_royalblue, isoExpr_darkmagenta, isoExpr_plum2, isoUsage_darkred, isoUsage_salmon, isoExpr_lightcyan1, isoExpr_greenyellow, isoExpr_saddlebrown, isoExpr_darkslateblue, isoExpr_orangered4, isoUsage_purple, isoUsage_saddlebrown, isoExpr_lightyellow, isoExpr_darkturquoise, isoExpr_skyblue3, isoUsage_lightcyan, isoUsage_darkturquoise, isoUsage_lightgreen, geneExpr_cyan, geneExpr_darkturquoise, geneExpr_lightyellow, geneExpr_lightcyan, geneExpr_blue, geneExpr_grey, geneExpr_lightgreen, geneExpr_darkgreen, geneExpr_midnightblue, geneExpr_darkred, geneExpr_magenta, isoExpr_black, isoExpr_darkorange, isoExpr_yellowgreen, isoExpr_darkgreen, isoExpr_orange, isoExpr_ivory, isoExpr_violet, isoExpr_darkolivegreen, isoExpr_lightcyan, isoUsage_magenta, isoExpr_salmon4, isoExpr_lightgreen, isoExpr_tan, isoUsage_black, isoUsage_tan, isoUsage_greenyellow, isoExpr_brown, isoUsage_skyblue, isoExpr_brown4, isoExpr_red, isoExpr_cyan, isoExpr_pink, isoExpr_thistle2, isoExpr_thistle1, isoUsage_midnightblue, isoExpr_bisque4, isoUsage_red, isoUsage_cyan, isoUsage_darkorange, isoExpr_grey60, isoUsage_darkgreen, geneExpr_purple, geneExpr_greenyellow, geneExpr_yellow, geneExpr_turquoise, isoUsage_blue, isoExpr_grey, isoExpr_turquoise, geneExpr_royalblue, isoUsage_turquoise, isoExpr_blue, geneExpr_salmon, geneExpr_black, geneExpr_brown, isoUsage_grey60, isoExpr_lightsteelblue1, isoExpr_midnightblue, isoExpr_darkred, isoUsage_lightyellow, isoExpr_yellow, isoUsage_yellow, geneExpr_green, geneExpr_tan, geneExpr_grey60, geneExpr_pink, geneExpr_red, 
# Plot the resulting dendrogram
options(repr.plot.width = 30, repr.plot.height = 5, repr.plot.res = 600)
par(cex=0.8, mar = c(10,2,2,2))
plot(new_dendro,
        main = "Clustering of module eigengenes (bicor)",
        xlab = "", sub = "")

Module UMAP

library(Seurat)
Attaching SeuratObject
Attaching sp
library(tidyverse)
library(cowplot)
library(patchwork)

Attaching package: 'patchwork'
The following object is masked from 'package:cowplot':

    align_plots
library(igraph)

Attaching package: 'igraph'
The following objects are masked from 'package:dplyr':

    as_data_frame, groups, union
The following objects are masked from 'package:purrr':

    compose, simplify
The following object is masked from 'package:tidyr':

    crossing
The following object is masked from 'package:tibble':

    as_data_frame
The following objects are masked from 'package:stats':

    decompose, spectrum
The following object is masked from 'package:base':

    union
library(hdWGCNA) # https://smorabit.github.io/hdWGCNA/#installation
                 # options(timeout = 600)


this_datExpr = datExpr.isoFr
this_net = net.isoFr
this_net$colors = labels2colors(net.isoFr$cut2$labels)
names(this_net$colors) = names(net.isoFr$cut2$labels)
MEs = moduleEigengenes(t(this_datExpr), colors=this_net$colors)
kMEtable = signedKME(t(this_datExpr), datME =MEs$eigengenes,corFnc = "bicor")
Warning in bicor(datExpr, datME, , use = "p"): bicor: zero MAD in variable 'x'.
Pearson correlation was used for individual columns with zero (or missing) MAD.
this_anno = datAnno[match(rownames(this_datExpr), datAnno$transcript_id),]
this_anno$module = this_net$module
this_anno$module.color = this_net$colors
this_anno$module.number = this_net$module.number
idx = grep("^TALON", this_anno$transcript_name)
this_anno$transcript_name[idx] = paste0(this_anno$gene_name[idx], '_', this_anno$transcript_name[idx])

modules_to_plot = c("turquoise", "blue", "brown", "pink")
#modules_to_plot = unique(this_net$colors)

tidyMods = tibble(transcript_id = rownames(kMEtable), module=this_net$colors,
                kMEtable)
tidyMods <- tidyMods %>% pivot_longer(-c(transcript_id,module), names_to = "kME_to_module", values_to = 'kME') %>% mutate(kMEtoMod = gsub("kME", "", kME_to_module)) %>% filter(module!=0, module != "grey", kMEtoMod !='grey', kMEtoMod != 0) %>% dplyr::select(-kME_to_module)
tidyMods <- tidyMods %>% left_join(datAnno)
Joining, by = "transcript_id"
hubs100 <- tidyMods %>% group_by(module) %>% filter(module %in% modules_to_plot, kMEtoMod == module) %>% slice_max(kME,n=100) %>% dplyr::select(transcript_id) %>% pull()
Adding missing grouping variables: `module`
hubs25 <- tidyMods %>% group_by(module) %>% filter(module %in% modules_to_plot, kMEtoMod == module) %>% slice_max(kME,n=25) %>% dplyr::select(transcript_id) %>% pull()
Adding missing grouping variables: `module`
hubs5 <- tidyMods %>% group_by(module) %>% filter(module %in% modules_to_plot, kMEtoMod == module) %>% slice_max(kME,n=5) %>% dplyr::select(transcript_id) %>% pull()
Adding missing grouping variables: `module`
isoTOM = TOMsimilarityFromExpr(t(this_datExpr[hubs25,]),networkType = 'signed', power = 14)
TOM calculation: adjacency..
..will not use multithreading.
 Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
colnames(isoTOM) = rownames(isoTOM) = hubs25

df=uwot::umap(isoTOM,n_neighbors = 25,metric = 'cosine',spread = 20,min_dist = .4)
plot(df)

umap_df <- as.data.frame(df)
colnames(umap_df) <- c("UMAP1", "UMAP2")
umap_df$gene <- hubs25
umap_df$module <- this_net$colors[umap_df$gene]
umap_df$color <- net.isoExpr$cut2$labels[umap_df$gene]
umap_df$hub <- ifelse(
    umap_df$gene %in% as.character(unlist(hubs5)), 'hub', 'other'
)
umap_df <- umap_df %>% left_join(tidyMods %>% filter(module == kMEtoMod) %>% dplyr::select(gene=transcript_id, module, kME), by = c("module", "gene"))
umap_df <- umap_df %>% left_join(datAnno %>% dplyr::select(gene_name, transcript_id), by=c("gene" = "transcript_id"))

mods <- levels(factor(umap_df$module))
mods <- mods[mods != 'grey']

ggplot(umap_df, aes(x=UMAP1, y=UMAP2)) +
  geom_point(
   color=umap_df$module, # color each point by WGCNA module
   size=umap_df$kME # size of each point based on intramodular connectivity
  ) +
  umap_theme()

subset_TOM <- isoTOM[umap_df$gene, umap_df$gene[umap_df$hub == 'hub']]
g1 = graph.adjacency(isoTOM[umap_df$gene, umap_df$gene], mode='undirected', diag=F,weighted = T)
layoutMat <- layout.fruchterman.reingold(g1)
cairo_pdf(file='output/figures/Fig4/IsoUsage_hairballs.pdf', width=20,height=20,family='serif')
plot.igraph(g1, layout=as.matrix(umap_df[,c(1:2)]),
            vertex.label = paste0(umap_df$gene_name,"_", substr(umap_df$gene,1,15)),
            vertex.family='serif',
            edge.curved=0, edge.alpha=.5,
            edge.color = ifelse(E(g1)$weight > .25, "grey", NA),
            edge.width = E(g1)$weight,
            vertex.label.cex=0.5, vertex.color = umap_df$module,
            vertex.size=umap_df$kME^5*4)
dev.off()
quartz_off_screen 
                2 
  selected_modules <- data.frame(gene_name = umap_df$gene, module = this_net$module.number[umap_df$gene],color= this_net$colors[umap_df$gene])
  selected_modules <- cbind(selected_modules, umap_df[,c('UMAP1', 'UMAP2', 'hub', 'kME')])

  selected_modules$label <- ifelse(selected_modules$hub %in% "hub", selected_modules$gene_name, '')
  selected_modules$fontcolor <- ifelse(selected_modules$color == 'black', 'gray50', 'black')

  # set frome color
  # same color as module for all genes, black outline for the selected hub genes
  selected_modules$framecolor <- ifelse(selected_modules$hub %in% "hub", 'black', selected_modules$color)




edge_df <- subset_TOM %>% reshape2::melt()
  print(dim(edge_df))
[1] 2000    3
    edge_df$color <- future.apply::future_sapply(1:nrow(edge_df), function(i){
      gene1 = as.character(edge_df[i,'Var1'])
      gene2 = as.character(edge_df[i,'Var2'])

      col1 <- selected_modules[selected_modules$gene_name == gene1, 'color']
      col2 <- selected_modules[selected_modules$gene_name == gene2, 'color']

      if(col1 == col2){
        col = col1
      } else{
        col = 'grey90'
      }
      col
  })

  edge_df <- edge_df %>% subset(color != 'grey90')


  # subset edges:
  groups <- unique(edge_df$color)

    # get top strongest edges
    temp <- do.call(rbind, lapply(groups, function(cur_group){
      cur_df <- edge_df %>% subset(color == cur_group)
      n_edges <- nrow(cur_df)
      cur_df %>% dplyr::top_n(round(n_edges * .5), wt=value)
    }))

  edge_df <- temp
  print(dim(edge_df))
[1] 248   4
    edge_df <- edge_df %>% group_by(color) %>% mutate(value=hdWGCNA::scale01(value))

  edge_df$color <- sapply(1:nrow(edge_df), function(i){
    a = edge_df$value[i]
    #if(edge_df$value[i] < 0.05){a=0.05}
    alpha(edge_df$color[i], alpha=a)
  })


  g <- igraph::graph_from_data_frame(
    edge_df,
    directed=FALSE,
    vertices=selected_modules
  )

  l <- igraph::layout_with_fr(g,)
plot(
    g, layout=l,
    edge.color=adjustcolor(E(g)$color, alpha.f=.25),
    vertex.size=V(g)$size,
    edge.curved=0,
    edge.width=0.5,
    vertex.color=V(g)$color,
    vertex.frame.color=V(g)$color,
    vertex.label=V(g)$label,
    vertex.label.family='Helvetica', #vertex.label.font=vertex_df$font,
    vertex.label.font = 3,
    vertex.label.color = V(g)$fontcolor,
    vertex.label.cex=.5
  )

hub_list = hubs5
names(hub_list) = this_net$colors[hubs5]
hub_labels <- as.character(unlist(hub_list))

g<- igraph::graph_from_data_frame(edge_df, directed=F)
plot(g, layout= as.matrix(umap_df[,c(1:2)]))

IsoExpr.M11 Hairball

this_datExpr = datExpr.isoExpr
this_net = net.isoExpr
this_net$colors = labels2colors(this_net$cut2$labels)
names(this_net$colors) = names(this_net$cut2$labels)
MEs = moduleEigengenes(t(this_datExpr), colors=this_net$colors)
kMEtable = signedKME(t(this_datExpr), datME =MEs$eigengenes,corFnc = "bicor")

this_anno = datAnno[match(rownames(this_datExpr), datAnno$transcript_id),]
this_anno$module = this_net$module
this_anno$module.color = this_net$colors
this_anno$module.number = this_net$module.number
idx = grep("^TALON", this_anno$transcript_name)
this_anno$transcript_name[idx] = paste0(this_anno$gene_name[idx], '_', this_anno$transcript_name[idx])

modules_to_plot = c("greenyellow")
#modules_to_plot = unique(this_net$colors)

tidyMods = tibble(transcript_id = rownames(kMEtable), module=this_net$colors,
                kMEtable)
tidyMods <- tidyMods %>% pivot_longer(-c(transcript_id,module), names_to = "kME_to_module", values_to = 'kME') %>% mutate(kMEtoMod = gsub("kME", "", kME_to_module)) %>% filter(module!=0, module != "grey", kMEtoMod !='grey', kMEtoMod != 0) %>% dplyr::select(-kME_to_module)
tidyMods <- tidyMods %>% left_join(datAnno)
Joining, by = "transcript_id"
hubs100 <- tidyMods %>% group_by(module) %>% filter(module %in% modules_to_plot, kMEtoMod == module) %>% slice_max(kME,n=100) %>% dplyr::select(transcript_id) %>% pull()
Adding missing grouping variables: `module`
hubs25 <- tidyMods %>% group_by(module) %>% filter(module %in% modules_to_plot, kMEtoMod == module) %>% slice_max(kME,n=25) %>% dplyr::select(transcript_id) %>% pull()
Adding missing grouping variables: `module`
hubs5 <- tidyMods %>% group_by(module) %>% filter(module %in% modules_to_plot, kMEtoMod == module) %>% slice_max(kME,n=5) %>% dplyr::select(transcript_id) %>% pull()
Adding missing grouping variables: `module`
isoTOM = TOMsimilarityFromExpr(t(this_datExpr[hubs25,]),networkType = 'signed', power = 14)
TOM calculation: adjacency..
..will not use multithreading.
 Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
colnames(isoTOM) = rownames(isoTOM) = hubs25

df=uwot::umap(isoTOM,n_neighbors = 25,metric = 'cosine',spread = 20,min_dist = .4)
plot(df)

umap_df <- as.data.frame(df)
colnames(umap_df) <- c("UMAP1", "UMAP2")
umap_df$gene <- hubs25
umap_df$module <- this_net$colors[umap_df$gene]
umap_df$color <- net.isoExpr$cut2$labels[umap_df$gene]
umap_df$hub <- ifelse(
    umap_df$gene %in% as.character(unlist(hubs5)), 'hub', 'other'
)
umap_df <- umap_df %>% left_join(tidyMods %>% filter(module == kMEtoMod) %>% dplyr::select(gene=transcript_id, module, kME), by = c("module", "gene"))
umap_df <- umap_df %>% left_join(datAnno %>% dplyr::select(gene_name, transcript_id), by=c("gene" = "transcript_id"))

mods <- levels(factor(umap_df$module))
mods <- mods[mods != 'grey']

ggplot(umap_df, aes(x=UMAP1, y=UMAP2)) +
  geom_point(
   color=umap_df$module, # color each point by WGCNA module
   size=umap_df$kME # size of each point based on intramodular connectivity
  ) +
  umap_theme()

subset_TOM <- isoTOM[umap_df$gene, umap_df$gene[umap_df$hub == 'hub']]
g1 = graph.adjacency(isoTOM[umap_df$gene, umap_df$gene], mode='undirected', diag=F,weighted = T)
layoutMat <- layout.fruchterman.reingold(g1)
cairo_pdf(file='output/figures/Fig4/IsoExpr_hairballs.pdf', width=20,height=20,family='serif')
plot.igraph(g1, layout=as.matrix(umap_df[,c(1:2)]),
            vertex.label = paste0(umap_df$gene_name,"_", substr(umap_df$gene,1,15)),
            vertex.family='serif',
            edge.curved=0, edge.alpha=.5,
            edge.color = ifelse(E(g1)$weight > .25, "grey", NA),
            edge.width = E(g1)$weight,
            vertex.label.cex=0.5, vertex.color = umap_df$module,
            vertex.size=umap_df$kME^5*4)
dev.off()
quartz_off_screen 
                2