suppressPackageStartupMessages({
library(IsoformSwitchAnalyzeR)
library(rtracklayer)
library(DESeq2)
library(ggVennDiagram)
library(tidyverse)
library(readr)
})
Figure 3 - IsoformSwitchAnalyzeR
packageVersion('IsoformSwitchAnalyzeR')
[1] '1.16.0'
= c(
colorVector "Known" = "#009E73",
"ISM" = "#0072B2",
"ISM_Prefix" = "#005996",
"ISM_Suffix" = "#378bcc",
"NIC" = "#D55E00",
"NNC" = "#E69F00",
"Other" = "#000000"
)= colorVector[-2] colorVector_ismSplit
Build switch list
Make design
= tribble(
myDesign ~sampleID, ~condition, ~donor,
"VZ_209", "VZ", "209",
"VZ_334", "VZ", "334",
"VZ_336", "VZ", "336",
"CP_209", "CP", "209",
"CP_334", "CP", "334",
"CP_336", "CP", "336",
%>%
) ::mutate(
dplyr::across(c(condition, donor), as_factor)
dplyr )
Extract Expresssion
= read_table("data/cp_vz_0.75_min_7_recovery_talon_abundance_filtered.tsv.gz") cts
── Column specification ────────────────────────────────────────────────────────
cols(
.default = col_double(),
annot_gene_id = col_character(),
annot_transcript_id = col_character(),
annot_gene_name = col_character(),
annot_transcript_name = col_character(),
gene_novelty = col_character(),
transcript_novelty = col_character(),
ISM_subtype = col_character()
)
ℹ Use `spec()` for the full column specifications.
= cts %>%
cts mutate(
VZ_209 = rowSums(across(matches("209_.*_VZ"))),
VZ_334 = rowSums(across(matches("334_.*_VZ"))),
VZ_336 = rowSums(across(matches("336_.*_VZ"))),
CP_209 = rowSums(across(matches("209_.*_CP"))),
CP_334 = rowSums(across(matches("334_.*_CP"))),
CP_336 = rowSums(across(matches("336_.*_CP"))),
.keep = "unused"
%>%
) ::select(!c("gene_ID", "transcript_ID", "annot_transcript_name")) %>%
dplyr::rename(
dplyrgene_id = "annot_gene_id",
transcript_id = "annot_transcript_id",
gene_name = "annot_gene_name"
%>%
) mutate(
gene_novelty = as.factor(gene_novelty),
transcript_novelty = as.factor(transcript_novelty),
ISM_subtype = ISM_subtype %>% na_if("None") %>% as.factor()
)= cts %>%
talonExpression ::select(transcript_id, starts_with(c("VZ", "CP"))) %>%
dplyr::rename(isoform_id = "transcript_id")
dplyr talonExpression
# A tibble: 214,516 × 7
isoform_id VZ_209 VZ_334 VZ_336 CP_209 CP_334 CP_336
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ENST00000494149.2_2 0 0 2 0 0 0
2 ENST00000424587.2 0 0 0 0 0 2
3 ENST00000445840.1 1 1 0 0 0 0
4 ENST00000457540.1_1 0 1 1 0 0 0
5 ENST00000414273.1_1 0 0 1 0 0 0
6 ENST00000655765.1_2 0 2 0 2 1 2
7 ENST00000457084.1_2 0 2 0 0 0 0
8 ENST00000591702.1_2 1 0 2 2 0 0
9 ENST00000644482.1_1 3 0 0 0 0 0
10 ENST00000445118.7_3 1 0 0 8 0 0
# … with 214,506 more rows
Create pre-filtered switchAnalyzeRlist
IsoformSwitchAnalyzeR will make assumptions based on whether file extension is .gtf(.gz) or .gff(.gz)… was necessary to symlink _corrected.gtf.cds.gff
.
= "data/working/talonSwitchList_preFilter.rds"
rdata_path if(!file.exists(rdata_path)) {
<- importRdata(
talonSwitchList isoformCountMatrix = talonExpression,
designMatrix = myDesign,
isoformExonAnnoation = 'data/cp_vz_0.75_min_7_recovery_talon.gtf.gz',
isoformNtFasta = 'data/sqanti/cp_vz_0.75_min_7_recovery_talon_corrected.fasta.gz',
addAnnotatedORFs = FALSE,
fixStringTieAnnotationProblem = FALSE # otherwise will mess up gene_ids
)<- addORFfromGTF(
talonSwitchList switchAnalyzeRlist = talonSwitchList,
pathToGTF = 'data/sqanti/cp_vz_0.75_min_7_recovery_talon_corrected.gtf.cds.gtf.gz'
)<- preFilter(
talonSwitchList switchAnalyzeRlist = talonSwitchList,
geneExpressionCutoff = 1, # default
isoformExpressionCutoff = 0, # default
IFcutoff = 0.01, # default
removeSingleIsoformGenes = TRUE, # default
reduceToSwitchingGenes = FALSE, # default (we didn't run DEXSeq yet)
keepIsoformInAllConditions = TRUE # we only have 2 conditions so doesn't matter
)saveRDS(talonSwitchList, file = rdata_path)
else {
} = readRDS(rdata_path)
talonSwitchList
}summary(talonSwitchList)
This switchAnalyzeRlist list contains:
102319 isoforms from 10809 genes
1 comparison from 2 conditions (in total 6 samples)
Feature analyzed:
[1] "ntSequence, ORFs"
switchPlot(
talonSwitchList,gene='KMT2E'
)
Warning in switchPlot(talonSwitchList, gene = "KMT2E"): We recomend
running the isoform switching analysis before doing the transcript plot.
See ?detectIsoformSwitching for more details
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Run DESeq2
DTE
= talonExpression %>% filter(isoform_id %in% talonSwitchList$isoformFeatures$isoform_id)
cts_preFilter cts_preFilter
# A tibble: 102,319 × 7
isoform_id VZ_209 VZ_334 VZ_336 CP_209 CP_334 CP_336
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ENST00000669922.1_2 4 10 4 6 9 10
2 ENST00000659124.1_2 4 0 0 9 0 0
3 ENST00000441765.6_3 1 0 0 0 1 0
4 ENST00000668541.1_2 0 0 0 5 1 5
5 ENST00000327044.7_3 283 672 708 804 820 665
6 ENST00000428771.6_3 0 1 0 0 2 0
7 ENST00000304952.10_3 0 25 41 3 5 16
8 ENST00000481869.1_2 0 6 4 2 2 6
9 ENST00000484667.2_2 0 4 5 0 0 9
10 ENST00000379370.7_2 4 12 7 9 34 8
# … with 102,309 more rows
= DESeqDataSetFromMatrix(
dds as.data.frame(cts_preFilter),
as.data.frame(myDesign),
~ donor + condition,
tidy = T
)
converting counts to integer mode
# takes 30sec
system.time({
= DESeq(dds)
dds })
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
user system elapsed
23.712 0.397 24.398
= DESeq2::results(dds)
DTE_results DTE_results
log2 fold change (MLE): condition CP vs VZ
Wald test p-value: condition CP vs VZ
DataFrame with 102319 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue
<numeric> <numeric> <numeric> <numeric> <numeric>
ENST00000669922.1_2 6.794078 0.3497446 0.926990 0.3772904 0.7059578
ENST00000659124.1_2 2.340796 0.1825893 1.972493 0.0925678 0.9262469
ENST00000441765.6_3 0.406806 -0.0633991 4.033013 -0.0157200 0.9874577
ENST00000668541.1_2 1.695927 3.8310438 2.135802 1.7937258 0.0728569
ENST00000327044.7_3 615.853668 0.3351470 0.261315 1.2825405 0.1996531
... ... ... ... ... ...
TALONT003163865 1.27255 2.91192 2.53106 1.150474 0.249949
TALONT003172883 1.32847 2.46363 2.50056 0.985233 0.324510
TALONT003175107 1.61344 3.01946 2.33696 1.292045 0.196342
TALONT003196342 1.10300 2.77499 2.66225 1.042347 0.297251
TALONT003225592 1.41593 1.80728 2.62236 0.689179 0.490711
padj
<numeric>
ENST00000669922.1_2 0.925055
ENST00000659124.1_2 NA
ENST00000441765.6_3 NA
ENST00000668541.1_2 NA
ENST00000327044.7_3 0.570881
... ...
TALONT003163865 NA
TALONT003172883 NA
TALONT003175107 NA
TALONT003196342 NA
TALONT003225592 NA
= DTE_results %>%
DTE_results as_tibble(rownames = "isoform_id") %>%
mutate(padj = replace_na(padj, 1))
DGE
Mike note – lets calculate DGE for all genes (expressed) not just those in talonSwitchList_part1, which filters out genes with only 1 isoform, etc Should be ~25k
= read_table("data/cp_vz_talon_abundance.tsv.gz") %>%
cts_gene mutate(
VZ_209 = rowSums(across(matches("209_.*_VZ"))),
VZ_334 = rowSums(across(matches("334_.*_VZ"))),
VZ_336 = rowSums(across(matches("336_.*_VZ"))),
CP_209 = rowSums(across(matches("209_.*_CP"))),
CP_334 = rowSums(across(matches("334_.*_CP"))),
CP_336 = rowSums(across(matches("336_.*_CP"))),
.keep = "unused"
%>%
) group_by(annot_gene_id, annot_gene_name, gene_novelty) %>%
summarize(across(starts_with(c("VZ", "CP")), sum), .groups = "drop") %>%
::rename(
dplyrgene_id = "annot_gene_id",
gene_name = "annot_gene_name"
%>%
) mutate(gene_novelty = as.factor(gene_novelty))
── Column specification ────────────────────────────────────────────────────────
cols(
.default = col_double(),
annot_gene_id = col_character(),
annot_transcript_id = col_character(),
annot_gene_name = col_character(),
annot_transcript_name = col_character(),
gene_novelty = col_character(),
transcript_novelty = col_character(),
ISM_subtype = col_character()
)
ℹ Use `spec()` for the full column specifications.
= cts_gene %>%
cts_gene_preFilter filter(gene_id %in% unique(cts$gene_id)) %>% # Filtering for those with at least one detected isoform
::select(gene_id, starts_with(c("VZ", "CP")))
dplyr cts_gene_preFilter
# A tibble: 24,554 × 7
gene_id VZ_209 VZ_334 VZ_336 CP_209 CP_334 CP_336
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ENSG00000000003.15_4 443 993 812 335 483 305
2 ENSG00000000419.12_5 30 119 108 388 123 311
3 ENSG00000000457.14_6 149 300 222 192 149 148
4 ENSG00000000460.17_6 103 423 275 99 132 76
5 ENSG00000000938.13_5 0 3 10 14 4 11
6 ENSG00000000971.16_3 13 17 23 21 26 9
7 ENSG00000001036.14_4 206 243 415 163 105 205
8 ENSG00000001084.13_9 49 73 51 54 65 44
9 ENSG00000001167.14_3 184 401 249 436 525 286
10 ENSG00000001460.18_6 5 9 15 44 21 42
# … with 24,544 more rows
= DESeqDataSetFromMatrix(
dds_gene as.data.frame(cts_gene_preFilter),
as.data.frame(myDesign),
~ donor + condition,
tidy = T
)
converting counts to integer mode
# takes 10sec
system.time({
= DESeq(dds_gene)
dds_gene })
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
user system elapsed
7.720 0.246 7.983
= DESeq2::results(dds_gene)
DGE_results DGE_results
log2 fold change (MLE): condition CP vs VZ
Wald test p-value: condition CP vs VZ
DataFrame with 24554 rows and 6 columns
baseMean log2FoldChange lfcSE stat pvalue
<numeric> <numeric> <numeric> <numeric> <numeric>
ENSG00000000003.15_4 547.45338 -1.127697 0.224974 -5.01257 5.37067e-07
ENSG00000000419.12_5 168.65442 1.554264 0.394700 3.93784 8.22188e-05
ENSG00000000457.14_6 189.49516 -0.588598 0.272137 -2.16287 3.05513e-02
ENSG00000000460.17_6 172.14571 -1.381243 0.295422 -4.67548 2.93261e-06
ENSG00000000938.13_5 6.44186 1.487234 1.237223 1.20207 2.29335e-01
... ... ... ... ... ...
TALONG000171040 7.30690 6.13371 1.58942 3.85908 0.000113814
TALONG000171269 1.51623 3.77994 2.59371 1.45735 0.145019484
TALONG000180651 3.39053 4.85165 1.95562 2.48088 0.013106017
TALONG000182469 2.31534 4.58042 2.20513 2.07717 0.037785814
TALONG000183977 1.57984 3.87094 2.55344 1.51597 0.129526613
padj
<numeric>
ENSG00000000003.15_4 6.36749e-06
ENSG00000000419.12_5 6.41440e-04
ENSG00000000457.14_6 1.04347e-01
ENSG00000000460.17_6 3.06182e-05
ENSG00000000938.13_5 4.59846e-01
... ...
TALONG000171040 0.000856288
TALONG000171269 NA
TALONG000180651 0.053168102
TALONG000182469 0.123652258
TALONG000183977 NA
= DGE_results %>%
DGE_results as_tibble(rownames = "gene_id") %>%
mutate(padj = replace_na(padj, 1))
Switch analysis part 1
= "data/working/talonSwitchList_part1.rds"
rdata_path if (!file.exists(rdata_path)) {
### DEXSeq DTU - takes 15min
<- isoformSwitchTestDEXSeq(
talonSwitchList_part1 switchAnalyzeRlist = talonSwitchList,
reduceToSwitchingGenes = FALSE
)
### Add DTE/DGE to switchList
= match(talonSwitchList_part1$isoformFeatures$isoform_id, DTE_results$isoform_id)
idx $isoformFeatures$iso_q_value = DTE_results$padj[idx]
talonSwitchList_part1
= match(talonSwitchList_part1$isoformFeatures$gene_id, DGE_results$gene_id)
idx $isoformFeatures$gene_q_value = DGE_results$padj[idx]
talonSwitchList_part1
### Extract AA sequences
$aaSequence = NULL
talonSwitchList_part1= talonSwitchList_part1$isoformFeatures
isoformFeatures_part1 $isoformFeatures = isoformFeatures_part1 %>%
talonSwitchList_part1as_tibble() %>%
group_by(gene_id) %>%
mutate(
isoform_switch_q_value = if_else(any(
# our actual filtering criteria - genes with DTU, DTE, or DGE
< 0.05 & dIF > 0.1) | iso_q_value < 0.05 | gene_q_value < 0.05
(isoform_switch_q_value 0, 1),
), dIF = 1
%>%
) ungroup() %>%
as.data.frame()
<- extractSequence(
talonSwitchList_part1 switchAnalyzeRlist = talonSwitchList_part1,
pathToOutput = "data/working/IsoformSwitchAnalyzeR/",
extractNTseq = TRUE,
extractAAseq = TRUE,
removeShortAAseq = TRUE,
removeLongAAseq = FALSE,
onlySwitchingGenes = TRUE
)$isoformFeatures = isoformFeatures_part1
talonSwitchList_part1
### Save to file
saveRDS(talonSwitchList_part1, file = rdata_path)
else {
} = readRDS(rdata_path)
talonSwitchList_part1
}summary(talonSwitchList_part1)
This switchAnalyzeRlist list contains:
102319 isoforms from 10809 genes
1 comparison from 2 conditions (in total 6 samples)
Switching features:
Comparison Isoforms Switches Genes
1 VZ vs CP 1860 1073 1237
Feature analyzed:
[1] "Isoform Switch Identification, ntSequence, ORFs, aaSequence"
WARNING: extractSequence()
silently caches its result in switchList$aaSequence
. If this object exists, extractSequence()
will return the same result, no matter if you adjust alpha
, dIFcutoff
, or even re-run isoformSwitchTestDEXSeq()
.
What are the exact isoforms extractSequence()
will return sequences for? (Slightly fewer if removeShortAAseq = TRUE
)
= talonSwitchList_part1$orfAnalysis %>% as_tibble() %>%
orf_isoforms drop_na(orfTransciptStart) %>%
pull(isoform_id)
$isoformFeatures %>% as_tibble() %>%
talonSwitchList_part1group_by(gene_id) %>% filter(any(isoform_switch_q_value < 0.05 & abs(dIF) > 0.1)) %>% ungroup() %>%
filter(isoform_id %in% orf_isoforms)
# A tibble: 12,026 × 30
iso_ref gene_…¹ isofo…² gene_id condi…³ condi…⁴ gene_…⁵ gene_…⁶ iso_b…⁷
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 isoComp_0000… geneCo… ENST00… ENSG00… VZ CP TSPAN6 protei… protei…
2 isoComp_0000… geneCo… TALONT… ENSG00… VZ CP TSPAN6 protei… <NA>
3 isoComp_0000… geneCo… TALONT… ENSG00… VZ CP TSPAN6 protei… <NA>
4 isoComp_0000… geneCo… TALONT… ENSG00… VZ CP TSPAN6 protei… <NA>
5 isoComp_0000… geneCo… TALONT… ENSG00… VZ CP TSPAN6 protei… <NA>
6 isoComp_0000… geneCo… TALONT… ENSG00… VZ CP TSPAN6 protei… <NA>
7 isoComp_0000… geneCo… TALONT… ENSG00… VZ CP TSPAN6 protei… <NA>
8 isoComp_0000… geneCo… ENST00… ENSG00… VZ CP C1orf1… protei… protei…
9 isoComp_0000… geneCo… ENST00… ENSG00… VZ CP C1orf1… protei… protei…
10 isoComp_0000… geneCo… ENST00… ENSG00… VZ CP C1orf1… protei… protei…
# … with 12,016 more rows, 21 more variables: gene_overall_mean <dbl>,
# gene_value_1 <dbl>, gene_value_2 <dbl>, gene_stderr_1 <dbl>,
# gene_stderr_2 <dbl>, gene_log2_fold_change <dbl>, gene_q_value <dbl>,
# iso_overall_mean <dbl>, iso_value_1 <dbl>, iso_value_2 <dbl>,
# iso_stderr_1 <dbl>, iso_stderr_2 <dbl>, iso_log2_fold_change <dbl>,
# iso_q_value <dbl>, IF_overall <dbl>, IF1 <dbl>, IF2 <dbl>, dIF <dbl>,
# isoform_switch_q_value <dbl>, gene_switch_q_value <dbl>, PTC <lgl>, and …
Table S3
What genes does IsoformSwitchAnalyzeR think are significant?
extractTopSwitches(
filterForConsequences = FALSE, n=Inf
talonSwitchList_part1, %>% as_tibble() )
# A tibble: 1,237 × 7
gene_ref gene_id gene_…¹ condi…² condi…³ gene_sw…⁴ Rank
<chr> <chr> <chr> <chr> <chr> <dbl> <int>
1 geneComp_00001085 ENSG00000073584.20… SMARCE1 VZ CP 1.03e-211 1
2 geneComp_00000969 ENSG00000070087.14… PFN2 VZ CP 6.32e-118 2
3 geneComp_00008384 ENSG00000155849.15… ELMO1 VZ CP 3.74e-116 3
4 geneComp_00000525 ENSG00000044115.21… CTNNA1 VZ CP 3.84e-103 4
5 geneComp_00005804 ENSG00000133884.10… DPF2 VZ CP 4.94e- 95 5
6 geneComp_00020536 ENSG00000263001.6_6 GTF2I VZ CP 9.35e- 94 6
7 geneComp_00001542 ENSG00000087274.17… ADD1 VZ CP 2.19e- 85 7
8 geneComp_00003418 ENSG00000111667.13… USP5 VZ CP 6.38e- 83 8
9 geneComp_00004817 ENSG00000124783.14… SSR1 VZ CP 2.45e- 80 9
10 geneComp_00000441 ENSG00000033627.16… ATP6V0… VZ CP 4.61e- 74 10
# … with 1,227 more rows, and abbreviated variable names ¹gene_name,
# ²condition_1, ³condition_2, ⁴gene_switch_q_value
$isoformFeatures %>% as_tibble() %>%
talonSwitchList_part1group_by(gene_ref, gene_id, gene_name, condition_1, condition_2, gene_switch_q_value) %>%
filter(any(isoform_switch_q_value < 0.05 & abs(dIF) > 0.1)) %>%
summarize() %>%
arrange(gene_switch_q_value)
`summarise()` has grouped output by 'gene_ref', 'gene_id', 'gene_name',
'condition_1', 'condition_2'. You can override using the `.groups` argument.
# A tibble: 1,237 × 6
# Groups: gene_ref, gene_id, gene_name, condition_1, condition_2 [1,237]
gene_ref gene_id gene_name conditio…¹ condi…² gene_sw…³
<chr> <chr> <chr> <chr> <chr> <dbl>
1 geneComp_00001085 ENSG00000073584.20_8 SMARCE1 VZ CP 1.03e-211
2 geneComp_00000969 ENSG00000070087.14_5 PFN2 VZ CP 6.32e-118
3 geneComp_00008384 ENSG00000155849.15_4 ELMO1 VZ CP 3.74e-116
4 geneComp_00000525 ENSG00000044115.21_7 CTNNA1 VZ CP 3.84e-103
5 geneComp_00005804 ENSG00000133884.10_4 DPF2 VZ CP 4.94e- 95
6 geneComp_00020536 ENSG00000263001.6_6 GTF2I VZ CP 9.35e- 94
7 geneComp_00001542 ENSG00000087274.17_6 ADD1 VZ CP 2.19e- 85
8 geneComp_00003418 ENSG00000111667.13_4 USP5 VZ CP 6.38e- 83
9 geneComp_00004817 ENSG00000124783.14_6 SSR1 VZ CP 2.45e- 80
10 geneComp_00000441 ENSG00000033627.16_4 ATP6V0A1 VZ CP 4.61e- 74
# … with 1,227 more rows, and abbreviated variable names ¹condition_1,
# ²condition_2, ³gene_switch_q_value
What transcripts does IsoformSwitchAnalyzeR think are significant?
extractTopSwitches(
extractGenes = FALSE, filterForConsequences = FALSE, n=Inf
talonSwitchList_part1, %>% as_tibble() )
Warning in .fun(piece, ...): Less than n genes genes with significant switches
were found. Returning those.
# A tibble: 1,860 × 12
iso_ref gene_…¹ isofo…² gene_id gene_…³ condi…⁴ condi…⁵ IF1 IF2 dIF
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 isoComp_0… geneCo… ENST00… ENSG00… SMARCE1 VZ CP 0.57 0.164 -0.405
2 isoComp_0… geneCo… ENST00… ENSG00… PFN2 VZ CP 0.25 0.021 -0.228
3 isoComp_0… geneCo… ENST00… ENSG00… ELMO1 VZ CP 0.404 0.024 -0.38
4 isoComp_0… geneCo… ENST00… ENSG00… CTNNA1 VZ CP 0.541 0.129 -0.413
5 isoComp_0… geneCo… ENST00… ENSG00… DPF2 VZ CP 0.082 0.553 0.47
6 isoComp_0… geneCo… ENST00… ENSG00… GTF2I VZ CP 0.128 0.017 -0.111
7 isoComp_0… geneCo… TALONT… ENSG00… ADD1 VZ CP 0.31 0.059 -0.251
8 isoComp_0… geneCo… ENST00… ENSG00… CTNNA1 VZ CP 0.124 0.32 0.197
9 isoComp_0… geneCo… ENST00… ENSG00… USP5 VZ CP 0.314 0.693 0.379
10 isoComp_0… geneCo… ENST00… ENSG00… USP5 VZ CP 0.561 0.185 -0.376
# … with 1,850 more rows, 2 more variables: isoform_switch_q_value <dbl>,
# Rank <int>, and abbreviated variable names ¹gene_ref, ²isoform_id,
# ³gene_name, ⁴condition_1, ⁵condition_2
$isoformFeatures %>% as_tibble() %>%
talonSwitchList_part1filter(isoform_switch_q_value < 0.05 & abs(dIF) > 0.1) %>%
::select(iso_ref, gene_ref, isoform_id, gene_id, gene_name, condition_1, condition_2, IF1, IF2, dIF, isoform_switch_q_value) %>%
dplyrarrange(isoform_switch_q_value)
# A tibble: 1,860 × 11
iso_ref gene_…¹ isofo…² gene_id gene_…³ condi…⁴ condi…⁵ IF1 IF2 dIF
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 isoComp… geneCo… ENST00… ENSG00… SMARCE1 VZ CP 0.570 0.164 -0.405
2 isoComp… geneCo… ENST00… ENSG00… PFN2 VZ CP 0.250 0.0213 -0.228
3 isoComp… geneCo… ENST00… ENSG00… ELMO1 VZ CP 0.404 0.0237 -0.380
4 isoComp… geneCo… ENST00… ENSG00… CTNNA1 VZ CP 0.541 0.129 -0.413
5 isoComp… geneCo… ENST00… ENSG00… DPF2 VZ CP 0.0823 0.553 0.470
6 isoComp… geneCo… ENST00… ENSG00… GTF2I VZ CP 0.128 0.0167 -0.111
7 isoComp… geneCo… TALONT… ENSG00… ADD1 VZ CP 0.310 0.059 -0.251
8 isoComp… geneCo… ENST00… ENSG00… CTNNA1 VZ CP 0.124 0.320 0.197
9 isoComp… geneCo… ENST00… ENSG00… USP5 VZ CP 0.314 0.693 0.379
10 isoComp… geneCo… ENST00… ENSG00… USP5 VZ CP 0.561 0.185 -0.376
# … with 1,850 more rows, 1 more variable: isoform_switch_q_value <dbl>, and
# abbreviated variable names ¹gene_ref, ²isoform_id, ³gene_name,
# ⁴condition_1, ⁵condition_2
Create our Table S3
Note: isoformSwitchAnalyzer uses both q-value (<.05) and effect size cutoff |dIF>.1| for DTU calling. Lets remove the dIF effect size cutoff here so that we can directly compare DGE, DTE, and DTU genes in the Venn Diagram below
= talonSwitchList_part1$isoformFeatures %>%
tableS3 as_tibble() %>%
::select(isoform_id, gene_id, gene_name, condition_1, condition_2) %>%
dplyrleft_join(
$isoformSwitchAnalysis %>% dplyr::select(isoform_id, dIF, pvalue, padj)
talonSwitchList_part1%>%
) ::rename(
dplyrDTU_dIF = "dIF",
DTU_pval = "pvalue",
DTU_qval = "padj"
%>%
) mutate(
DTU = DTU_qval < 0.05 # & abs(DTU_dIF) > 0.1
%>%
) left_join(
%>% dplyr::select(isoform_id, log2FoldChange, pvalue, padj)
DTE_results %>%
) ::rename(
dplyrDTE_log2FC = "log2FoldChange",
DTE_pval = "pvalue",
DTE_qval = "padj"
%>%
) mutate(
DTE = DTE_qval < 0.05
%>%
) left_join(
%>% dplyr::select(gene_id, log2FoldChange, pvalue, padj)
DGE_results %>%
) ::rename(
dplyrDGE_log2FC = "log2FoldChange",
DGE_pval = "pvalue",
DGE_qval = "padj"
%>%
) mutate(
DGE = DGE_qval < 0.05
)
Joining, by = "isoform_id"
Joining, by = "isoform_id"
Joining, by = "gene_id"
tableS3
# A tibble: 102,319 × 17
isoform_id gene_id gene_…¹ condi…² condi…³ DTU_dIF DTU_pval DTU_qval DTU
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <lgl>
1 ENST0000037… ENSG00… TSPAN6 VZ CP -0.347 4.89e-14 9.84e-12 TRUE
2 TALONT00074… ENSG00… TSPAN6 VZ CP 0.140 1.42e- 8 1.23e- 6 TRUE
3 TALONT00074… ENSG00… TSPAN6 VZ CP 0.196 1.36e- 6 7.89e- 5 TRUE
4 TALONT00074… ENSG00… TSPAN6 VZ CP 0.00667 6.29e- 1 9.24e- 1 FALSE
5 TALONT00074… ENSG00… TSPAN6 VZ CP -0.00633 4.95e- 1 8.74e- 1 FALSE
6 TALONT00074… ENSG00… TSPAN6 VZ CP -0.00867 1.89e- 1 6.47e- 1 FALSE
7 TALONT00074… ENSG00… TSPAN6 VZ CP 0.017 1.48e- 3 3.08e- 2 TRUE
8 ENST0000037… ENSG00… DPM1 VZ CP -0.0317 8.43e- 1 9.76e- 1 FALSE
9 ENST0000037… ENSG00… DPM1 VZ CP -0.0273 2.34e- 1 7.00e- 1 FALSE
10 ENST0000037… ENSG00… DPM1 VZ CP 0.0993 1.48e- 1 5.89e- 1 FALSE
# … with 102,309 more rows, 8 more variables: DTE_log2FC <dbl>, DTE_pval <dbl>,
# DTE_qval <dbl>, DTE <lgl>, DGE_log2FC <dbl>, DGE_pval <dbl>,
# DGE_qval <dbl>, DGE <lgl>, and abbreviated variable names ¹gene_name,
# ²condition_1, ³condition_2
%>% write_tsv("output/tables/TableS3_v3.tsv.gz")
tableS3
= tableS3 %>%
tableS3b_geneLevel group_by(gene_name, gene_id) %>%
summarize(
DTU = any(DTU), DTE = any(DTE), DGE = any(DGE),
DTU_qval_min = min(DTU_qval), DTU_pval_min = min(DTU_pval),
DTE_qval_min = min(DTE_qval), DTE_pval_min = min(DTE_pval),
DGE_pval = min(DGE_pval), DGE_qval = min(DGE_qval)
)
`summarise()` has grouped output by 'gene_name'. You can override using the
`.groups` argument.
%>% write_tsv("output/tables/TableS3b_geneLevel.tsv.gz") tableS3b_geneLevel
Note: log2fc in switchlist are based on the calculated RPKM values, not raw counts, so they don’t agree with DESEq2 log2fc columns.
Sanity check: The isoform_switch_q_value column in switchList\(isoformFeatures comes from the padj column in switchList\)isoformSwitchAnalysis (which is the DEXSeq results). gene_switch_q_value is just the minimum padj of any isoform for that gene.
left_join(
$isoformFeatures %>% as_tibble(),
talonSwitchList_part1$isoformSwitchAnalysis %>% as_tibble(),
talonSwitchList_part1by = "isoform_id"
%>%
) filter(isoform_switch_q_value != padj)
# A tibble: 0 × 39
# … with 39 variables: iso_ref.x <chr>, gene_ref.x <chr>, isoform_id <chr>,
# gene_id <chr>, condition_1.x <chr>, condition_2.x <chr>, gene_name <chr>,
# gene_biotype <chr>, iso_biotype <chr>, gene_overall_mean <dbl>,
# gene_value_1 <dbl>, gene_value_2 <dbl>, gene_stderr_1 <dbl>,
# gene_stderr_2 <dbl>, gene_log2_fold_change <dbl>, gene_q_value <dbl>,
# iso_overall_mean <dbl>, iso_value_1 <dbl>, iso_value_2 <dbl>,
# iso_stderr_1 <dbl>, iso_stderr_2 <dbl>, iso_log2_fold_change <dbl>, …
left_join(
$isoformFeatures %>% as_tibble(),
talonSwitchList_part1$isoformSwitchAnalysis %>% as_tibble(),
talonSwitchList_part1by = "isoform_id"
%>%
) group_by(gene_id) %>%
filter(gene_switch_q_value != min(padj))
# A tibble: 0 × 39
# Groups: gene_id [0]
# … with 39 variables: iso_ref.x <chr>, gene_ref.x <chr>, isoform_id <chr>,
# gene_id <chr>, condition_1.x <chr>, condition_2.x <chr>, gene_name <chr>,
# gene_biotype <chr>, iso_biotype <chr>, gene_overall_mean <dbl>,
# gene_value_1 <dbl>, gene_value_2 <dbl>, gene_stderr_1 <dbl>,
# gene_stderr_2 <dbl>, gene_log2_fold_change <dbl>, gene_q_value <dbl>,
# iso_overall_mean <dbl>, iso_value_1 <dbl>, iso_value_2 <dbl>,
# iso_stderr_1 <dbl>, iso_stderr_2 <dbl>, iso_log2_fold_change <dbl>, …
Fig3
Fig3a: Volcano Plots
$novelty2 = as.character(cts$transcript_novelty)
cts$novelty2[which(cts$novelty2=="ISM" & cts$ISM_subtype=="Prefix")] = "ISM_Prefix"
cts$novelty2[which(cts$novelty2=="ISM" & cts$ISM_subtype=="Suffix")] = "ISM_Suffix"
cts$novelty2[cts$novelty2 %in% c("Antisense", "Genomic", "Intergenic", "ISM")] = "Other"
cts$novelty2 = factor(cts$novelty2,levels=c("Known", "ISM_Prefix", "ISM_Suffix", "NIC", "NNC", "Other"))
cts
= talonSwitchList_part1$isoformFeatures %>%
data_to_label1 filter((-log10(isoform_switch_q_value) > 50 & (abs(dIF) > .1)) |
abs(dIF) > .5 & isoform_switch_q_value < 1e-10))
(= talonSwitchList_part1$isoformFeatures %>%
data_to_label2 filter(isoform_switch_q_value < .05, gene_name %in% c("SRRM1", "PTBP2", "ELAVL2", "ELAVL4", "RBFOX2","NF1", "TSC2", "UBE3A", "KMT2E", "KMT5B", "SMARCD3", "SMARCA1", "FOXP2", "GRIA3", "VAMP1", "GAD1", "NLGN4X", "NRXN1")) %>% group_by(gene_name) %>% slice_max(n = 1, abs(dIF))
<- bind_rows(data_to_label1,data_to_label2)
data_to_label
= ggplot(data=talonSwitchList_part1$isoformFeatures %>% left_join(cts %>% dplyr::select(isoform_id=transcript_id, novelty=novelty2)),
Fig3a aes(x=dIF, y=-log10(isoform_switch_q_value))) +
geom_point(
aes(color=novelty), # default cutoff
size=2, alpha=.5) +
geom_hline(yintercept = -log10(0.05), linetype='dashed',color='red') + # default cutoff
labs(x='difference in isoform fraction (dIF)', y='-log10 ( Isoform Switch Q Value )') +
theme_bw() + xlim(-1,1) + scale_color_manual(values=colorVector_ismSplit) +
::geom_text_repel(data = data_to_label %>% filter(dIF < 0),aes(label=gene_name, segment.size = .1),size=3,force = 10, max.overlaps = 50,nudge_y = 10, nudge_x = -.2) +
ggrepel::geom_text_repel(data = data_to_label %>% filter(dIF > 0),aes(label=gene_name, segment.size = .1),size=3,force = 10, max.overlaps = 50,nudge_y = 10, nudge_x = .2) + theme(legend.position = 'none') + geom_vline(xintercept = 0,lty=1,color='darkgrey') ggrepel
Joining, by = "isoform_id"
Fig3a
Warning: Removed 4 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_text_repel).
ggsave(Fig3a, file="output/figures/Fig3/Fig3a.pdf",width=6,height=4)
Warning: Removed 4 rows containing missing values (geom_point).
Removed 1 rows containing missing values (geom_text_repel).
= ggplot(data=talonSwitchList_part1$isoformFeatures %>% left_join(cts %>% dplyr::select(isoform_id=transcript_id, novelty=novelty2)),
Fig3a.fg aes(x=dIF, y=-log10(isoform_switch_q_value))) +
geom_point(
aes(color=novelty), color=NA, # default cutoff
size=2, alpha=.5) +
geom_hline(yintercept = -log10(0.05), linetype='dashed',color='red') + # default cutoff
labs(x='difference in isoform fraction (dIF)', y='-log10 ( Isoform Switch Q Value )') +
theme_bw() + xlim(-1,1) + scale_color_manual(values=colorVector_ismSplit) +
::geom_text_repel(data = data_to_label %>% filter(dIF < 0),aes(label=gene_name, segment.size = .1),size=3,force = 10, max.overlaps = 50,nudge_y = 10, nudge_x = -.2) +
ggrepel::geom_text_repel(data = data_to_label %>% filter(dIF > 0),aes(label=gene_name, segment.size = .1),size=3,force = 10, max.overlaps = 50,nudge_y = 10, nudge_x = .2)+theme(legend.position = 'none') + geom_vline(xintercept = 0,lty=1,color='darkgrey') ggrepel
Joining, by = "isoform_id"
Fig3a.fg
Warning: Removed 102319 rows containing missing values (geom_point).
Removed 1 rows containing missing values (geom_text_repel).
ggsave(Fig3a.fg, file="output/figures/Fig3/Fig3a_fg.pdf",width=6,height=4)
Warning: Removed 102319 rows containing missing values (geom_point).
Removed 1 rows containing missing values (geom_text_repel).
=ggplot(data=talonSwitchList_part1$isoformFeatures %>% left_join(cts %>% dplyr::select(isoform_id=transcript_id, novelty=novelty2)),
Fig3a.bg aes(x=dIF, y=-log10(isoform_switch_q_value))) +
geom_point(
aes(color=novelty), # default cutoff
size=2, alpha=.5) + scale_color_manual(values=colorVector_ismSplit) + theme_void() + theme(legend.position = 'none')
Joining, by = "isoform_id"
ggsave(Fig3a.bg, file="output/figures/Fig3/Fig3a_bg.jpg",width=6,height=4)
Warning: Removed 4 rows containing missing values (geom_point).
Fig3a_2: Pie Chart
= talonSwitchList_part1$isoformFeatures %>%
this_df left_join(cts %>% dplyr::select(isoform_id=transcript_id, novelty=novelty2)) %>%
filter(isoform_switch_q_value < .05) %>% group_by(novelty) %>% summarise(switches=n_distinct(isoform_id))
Joining, by = "isoform_id"
<-this_df %>% arrange(desc(novelty)) %>%
this_dfmutate(prop = switches / sum(switches) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
= ggplot(this_df, aes(x="", y=prop, fill=novelty)) +
Fig3a_part2 geom_bar(stat="identity", width=1, color="white") +
coord_polar("y",start=0) +
theme_void() + theme(legend.position = 'none', plot.title = element_text(hjust=.5)) +
scale_fill_manual(values=colorVector_ismSplit) +
geom_text(aes(y=ypos, label = paste0(novelty,"\n(N=", switches, ")")), color = "white", size=5)
Fig3a_part2
ggsave(Fig3a_part2, file="output/figures/Fig3/Fig3a_part2.pdf",width=4,height=4)
Fig3b: DGE vs DTU plots
= talonSwitchList_part1$isoformFeatures %>%
this_df left_join(cts %>% dplyr::select(isoform_id=transcript_id, novelty=novelty2))
Joining, by = "isoform_id"
$gene_q_value = DGE_results$padj[match(this_df$gene_id, DGE_results$gene_id)]
this_df
= ggplot(data=this_df,
Fig3b aes(x=gene_log2_fold_change, y=dIF)) +
geom_point(
aes(color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05, # default cutoff
shape = abs(gene_log2_fold_change) > .1 & gene_q_value < .05),
size=1, alpha=.5,
+
) geom_hline(yintercept = 0, linetype='dashed') + # default cutoff
geom_vline(xintercept =0, linetype='dashed') + # default cutoff
labs(y='difference in isoform fraction (dIF)', x='Gene log2FC') +
theme_bw() + xlim(-5,5)+
scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) +
scale_shape_manual('Signficant\nDGE', values = c("FALSE"=1, "TRUE"=16)) +
::geom_text_repel(data = talonSwitchList_part1$isoformFeatures %>% filter(abs(dIF) > .5 & isoform_switch_q_value < 0.05), aes(label=gene_name),size=3, max.overlaps = 20)
ggrepel
Fig3b
Warning: Removed 31 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_text_repel).
Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
ggsave(Fig3b, file="output/figures/Fig3/Fig3b.pdf",width=8,height=5)
Warning: Removed 31 rows containing missing values (geom_point).
Warning: Removed 1 rows containing missing values (geom_text_repel).
Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
## DGE Volcano plot colored by DTU
= DGE_results %>% left_join(cts_gene %>% dplyr::select(gene_id, gene_name)) %>%
this_df left_join(tableS3 %>% dplyr::select(gene_id, DTU) %>% group_by(gene_id) %>% summarise(DTU = any(DTU)))
Joining, by = "gene_id"
Joining, by = "gene_id"
$DTU[is.na(this_df$DTU)] = F
this_df$DTU = factor(this_df$DTU, levels=c("TRUE", "FALSE"))
this_dfggplot(this_df %>% filter(DTU==FALSE), aes(x=log2FoldChange, y=-log10(padj))) + geom_point(alpha=.25, color='grey') + geom_point(data=this_df %>% filter(DTU==TRUE), alpha=.5, color='red') + theme_bw()
Fig3c: Venn Diagrams
= tableS3 %>% group_by(gene_id) %>% summarise(DTE = any(DTE), DGE=any(DGE), DTU=any(DTU)) %>% dplyr::select(-gene_id)
gene_overlaps
# Sanity check: 2,679 of 10809 genes with DTU (iso_q_value< .05)
=extractTopSwitches(
switchesextractGenes = TRUE, dIFcutoff = 0, filterForConsequences = FALSE, n=Inf
talonSwitchList_part1, %>% as_tibble()
) table(gene_overlaps$DTU)
FALSE TRUE
8130 2679
# Gene-level DGE: 4475 of 24554 -- note different backgrounds
table(DGE_results$padj < .05)
FALSE TRUE
20079 4475
# Fisher's Exact Test
source(file = "code/fisher_overlap.R")
ORA(tableS3$gene_id[tableS3$DTU_qval<.05],DGE_results$gene_id[DGE_results$padj<.05], tableS3$gene_id, DGE_results$gene_id)
OR Fisher p -95%CI
"2.00059991005475" "1.21479676549559e-46" "1.81968255413102"
+95%CI Overlap Reference List
"2.19910780950278" "1010" "2898"
Input List Background % List Overlap
"2679" "10809" "34.9"
# 1411 isoform-switching genes without even nominally significant evidence of DGE
length(intersect(tableS3$gene_id[tableS3$DTU_qval<.05], DGE_results$gene_id[DGE_results$pvalue>.05]))
[1] 1411
# 1669 isoform-switching genes without significant evidence of DGE (using adjusted p-values)
length(intersect(tableS3$gene_id[tableS3$DTU_qval<.05], DGE_results$gene_id[DGE_results$padj>.05]))
[1] 1669
= ggVennDiagram(list(DTU = which(gene_overlaps$DTU),
Fig3c DGE = which(gene_overlaps$DGE),
DTE = which(gene_overlaps$DTE))) +
scale_fill_gradient(low="grey",high = "red")
Fig3c
ggsave(Fig3c, file="output/figures/Fig3/Fig3c.pdf",width=8,height=5)
Switch analysis part 2 (functional consequences)
IUPred and SignalP are difficult to run. Non-webserver IUPred2A/3 only takes a single sequence at a time, so requires a wrapper. Although “V5 is supported” for SignalP (I guess through the webserver?) v5.0 and v6.0 no longer seem to produce the expected output (-f summary
) of v4.1.
Currently, trying to import from the webtools_v3
crashes R.
= "data/working/talonSwitchList_part2.rds"
rdata_path if (!file.exists(rdata_path)) {
<- isoformSwitchAnalysisPart2(
talonSwitchList_part2 switchAnalyzeRlist = talonSwitchList_part1,
n = 10, # number of PDF plots to generate
removeNoncodinORFs = FALSE,
pathToCPC2resultFile = "data/working/IsoformSwitchAnalyzeR_webtools_v4/result_cpc2.txt",
pathToPFAMresultFile = "data/working/IsoformSwitchAnalyzeR_webtools_v4/result_pfam_scan.txt",
# might want to disable smoothing through `analyzeIUPred2A` since IUPred3 now
# performs smoothing:
pathToIUPred2AresultFile = "data/working/IsoformSwitchAnalyzeR_webtools_v4/result_iupred3.txt.gz",
pathToSignalPresultFile = "data/working/IsoformSwitchAnalyzeR_webtools_v4/result_signalP.txt",
pathToOutput = "output/IsoformSwitchAnalyzeR/",
outputPlots = T
)# todo - split this into individual functions because `analyzeSwitchConsequences`
# is still filtering on dIF in addition to qval
<- analyzeSwitchConsequences(talonSwitchList_part2, onlySigIsoforms = T, dIFcutoff = 0, consequencesToAnalyze = c('tss','tts','last_exon','isoform_length','exon_number','intron_structure','ORF_length', '5_utr_seq_similarity', '5_utr_length', '3_utr_seq_similarity', '3_utr_length','coding_potential','ORF_seq_similarity','NMD_status','domains_identified','signal_peptide_identified'))
talonSwitchList_part2
saveRDS(talonSwitchList_part2, file=rdata_path)
else {
} = readRDS(rdata_path)
talonSwitchList_part2 }
Plot a gene:
pdf(file="output/figures/Fig3/Fig3_SMARCC2.pdf", height=4, width=8)
switchPlot(
dIFcutoff = .1,logYaxis = TRUE,
talonSwitchList_part2,gene='SMARCC2'
)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
dev.off()
quartz_off_screen
2
pdf(file="output/figures/Fig4/FigS4_NF1.pdf", height=4, width=6)
switchPlot(
dIFcutoff = .1,logYaxis = TRUE,
talonSwitchList_part2,gene='NF1'
)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
dev.off()
quartz_off_screen
2
switchPlot(
talonSwitchList_part2,gene='TALONG000088362'
)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Functional Analysis of Isoform Switches:
pdf(file='output/figures/Fig3/Fig3e.pdf', width=7,height=5)
<- extractConsequenceEnrichment(
switch_consequences dIFcutoff = 0,countGenes = F,
talonSwitchList_part2,returnResult = T # if TRUE returns a data.frame with the summary statistics
)dev.off()
quartz_off_screen
2
write_tsv(switch_consequences, file="output/tables/TableS3_switchConsequences_summary_v2.tsv")
write_tsv(talonSwitchList_part2$switchConsequence, file="output/tables/TableS3_switchConsequences_data.tsv")
= extractConsequenceSummary(
consequence includeCombined = T,
talonSwitchList_part2, dIFcutoff=0, returnResult = T)
extractSplicingEnrichment(
dIFcutoff = 0,onlySigIsoforms = T,
talonSwitchList_part2,countGenes = F,
returnResult = TRUE # if TRUE returns a data.frame with the summary statistics
)
condition_1 condition_2 AStype nUp nDown propUp
1 VZ CP A3 gain (paired with A3 loss) 603 472 0.5609302
2 VZ CP A5 gain (paired with A5 loss) 526 466 0.5302419
3 VZ CP ATSS gain (paired with ATSS loss) 868 679 0.5610860
4 VZ CP ATTS gain (paired with ATTS loss) 690 544 0.5591572
5 VZ CP ES (paired with EI) 776 1153 0.4022810
6 VZ CP IR gain (paired with IR loss) 266 265 0.5009416
7 VZ CP MEE gain (paired with MEE loss) 4 3 0.5714286
8 VZ CP MES (paired with MEI) 364 587 0.3827550
propUpCiLo propUpCiHi propUpPval propUpQval Significant Comparison
1 0.5306659 0.5908602 7.202013e-05 1.152322e-04 TRUE VZ\nvs\nCP
2 0.4986297 0.5616742 6.097909e-02 8.130545e-02 FALSE VZ\nvs\nCP
3 0.5359374 0.5860023 1.705724e-06 4.548597e-06 TRUE VZ\nvs\nCP
4 0.5309374 0.5870945 3.592843e-05 7.185686e-05 TRUE VZ\nvs\nCP
5 0.3803020 0.4245576 8.845503e-18 7.076402e-17 TRUE VZ\nvs\nCP
6 0.4575691 0.5443036 1.000000e+00 1.000000e+00 FALSE VZ\nvs\nCP
7 0.1840516 0.9010117 1.000000e+00 1.000000e+00 FALSE VZ\nvs\nCP
8 0.3517411 0.4144974 4.775090e-13 1.910036e-12 TRUE VZ\nvs\nCP
extractSplicingSummary(talonSwitchList_part2,
splicingToAnalyze = 'all',dIFcutoff = 0,
onlySigIsoforms = T,asFractionTotal = F)
Alternative TSS isoform switches
=talonSwitchList_part2$switchConsequence %>% as_tibble() %>%
TSS_switches filter(featureCompared == 'tss', isoformsDifferent==TRUE) %>%
::select(gene_name, isoformUpregulated, isoformDownregulated) %>%
dplyrleft_join(talonSwitchList_part2$isoformFeatures %>% dplyr::select(isoformUpregulated=isoform_id, dIF, isoform_switch_q_value))
Joining, by = "isoformUpregulated"
# Top 10 TSS switches by isoform fraction
%>% arrange(-abs(dIF)) TSS_switches
# A tibble: 2,452 × 5
gene_name isoformUpregulated isoformDownregulated dIF isoform_switch_q_…¹
<chr> <chr> <chr> <dbl> <dbl>
1 S100A16 ENST00000368705.2_2 ENST00000368706.9_3 0.811 5.86e- 3
2 CPVL TALONT000753261 ENST00000265394.10_3 0.769 7.02e-16
3 ZNF358 ENST00000596712.1_3 ENST00000597229.2_2 0.766 7.57e- 4
4 PLAUR ENST00000340093.8_5 TALONT000368020 0.697 5.55e- 8
5 ELMO1 ENST00000396045.7_2 ENST00000310758.8_2 0.654 1.67e-39
6 ELMO1 ENST00000396045.7_2 ENST00000448602.5_2 0.654 1.67e-39
7 ELMO1 ENST00000396045.7_2 TALONT000761865 0.654 1.67e-39
8 BIN1 ENST00000316724.10_4 ENST00000348750.8_3 0.624 1.51e- 5
9 BIN1 ENST00000316724.10_4 ENST00000393040.7_3 0.624 1.51e- 5
10 BIN1 ENST00000316724.10_4 ENST00000409400.1_4 0.624 1.51e- 5
# … with 2,442 more rows, and abbreviated variable name ¹isoform_switch_q_value
for(top_genes in TSS_switches %>% arrange(-abs(dIF)) %>% dplyr::select(gene_name) %>%
unique() %>% head(10) %>% pull()) {
switchPlot(
talonSwitchList_part2,gene=top_genes)
}
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
# Top 10 TSS switches by q-value
%>% arrange(isoform_switch_q_value) TSS_switches
# A tibble: 2,452 × 5
gene_name isoformUpregulated isoformDownregulated dIF isoform_switch_q_…¹
<chr> <chr> <chr> <dbl> <dbl>
1 CTNNA1 ENST00000540387.5_1 ENST00000302763.12_2 0.197 1.04e-83
2 PKM TALONT000323027 TALONT000323086 0.453 9.72e-73
3 TPM1 ENST00000334895.10_2 ENST00000358278.7_3 0.475 4.54e-65
4 TPM1 ENST00000334895.10_2 ENST00000558868.5_1 0.475 4.54e-65
5 TPM1 ENST00000334895.10_2 ENST00000560959.5_1 0.475 4.54e-65
6 KIF2A ENST00000407818.7_2 ENST00000401507.7_2 0.276 1.96e-54
7 KIF2A ENST00000407818.7_2 TALONT000664196 0.276 1.96e-54
8 ABAT ENST00000396600.6_1 ENST00000268251.13_2 0.317 4.11e-51
9 ABAT ENST00000396600.6_1 ENST00000425191.6_1 0.317 4.11e-51
10 ABAT ENST00000396600.6_1 TALONT000412117 0.317 4.11e-51
# … with 2,442 more rows, and abbreviated variable name ¹isoform_switch_q_value
for(top_genes in TSS_switches %>% arrange(isoform_switch_q_value) %>% dplyr::select(gene_name) %>%
unique() %>% head(10) %>% pull()) {
switchPlot(
talonSwitchList_part2,gene=top_genes)
}
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
write_tsv(TSS_switches, file="output/tables/TableS3_tss_switches.tsv")
Plot interesting disease genes
= "output/figures/switch_plots/switch_plots_diseaseGenes.pdf"
pdf_path if(!file.exists(pdf_path)) {
= read.csv('ref/ASD+SCZ+DDD_2022.csv')
disease_genes = extractTopSwitches(talonSwitchList_part2, filterForConsequences = T, n = Inf)
interesting_genes = interesting_genes$gene_name
interesting_genes = interesting_genes[interesting_genes %in% disease_genes$Gene]
interesting_genes
pdf(file=pdf_path)
for(this_gene in interesting_genes) {
switchPlot(
talonSwitchList_part2,gene=this_gene
)print(this_gene)
}dev.off()
}
Plot genes with top switching novel isoforms
= "output/figures/switch_plots/Fig3_switch_plots_novelIsoforms.pdf"
pdf_path if(!file.exists(pdf_path)) {
= read.csv('ref/ASD+SCZ+DDD_2022.csv')
disease_genes = talonSwitchList_part2$isoformFeatures %>% left_join(cts %>% dplyr::select(isoform_id=transcript_id, novelty=novelty2))
switches <- switches %>% filter(novelty!="Known" & isoform_switch_q_value < 10e-15 ) %>% dplyr::arrange(isoform_switch_q_value) %>% dplyr::select(gene_name) %>% pull() %>% unique()
interesting_genes
= unique(c(interesting_genes[1:10], interesting_genes[interesting_genes%in% disease_genes$Gene]))
interesting_genes
interesting_genes
pdf(file = pdf_path)
for(this_gene in interesting_genes) {
switchPlot(
talonSwitchList_part2,gene=this_gene
)print(this_gene)
}dev.off()
}
this is a disgusting hack
<- function(x) str_extract(x, "^[^\\.]*")
strp
= function(switchList) {
strippedSwitchList $isoformFeatures$isoform_id <- strp(switchList$isoformFeatures$isoform_id)
switchList"exons"]]@elementMetadata@listData[["isoform_id"]] <- strp(switchList[["exons"]]@elementMetadata@listData[["isoform_id"]])
switchList[[$isoformCountMatrix$isoform_id <- strp(switchList$isoformCountMatrix$isoform_id)
switchList$isoformRepExpression$isoform_id <- strp(switchList$isoformRepExpression$isoform_id)
switchList$orfAnalysis$isoform_id <- strp(switchList$orfAnalysis$isoform_id)
switchList$isoformSwitchAnalysis$isoform_id <- strp(switchList$isoformSwitchAnalysis$isoform_id)
switchList$domainAnalysis$isoform_id <- strp(switchList$domainAnalysis$isoform_id)
switchList$idrAnalysis$isoform_id <- strp(switchList$idrAnalysis$isoform_id)
switchList$signalPeptideAnalysis$isoform_id <- strp(switchList$signalPeptideAnalysis$isoform_id)
switchList$AlternativeSplicingAnalysis$isoform_id <- strp(switchList$AlternativeSplicingAnalysis$isoform_id)
switchList
$switchConsequence$isoformUpregulated <- strp(switchList$switchConsequence$isoformUpregulated)
switchList$switchConsequence$isoformDownregulated <- strp(switchList$switchConsequence$isoformDownregulated)
switchListreturn(switchList)
}
= strippedSwitchList(talonSwitchList_part2) talonSwitchList_part2_strp
theme_set(theme_gray(base_size = 18))
theme_update(
axis.text = element_text(color="black"),
plot.title = element_text(size = rel(1), hjust = 0.5)
)
export at 8 x 6in
switchPlot(talonSwitchList_part2_strp, gene = "KMT2E")
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
switchPlot(talonSwitchList_part2_strp, gene = "UBE3A")
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.