Figure 5 - Seurat Clustering

Author

Ashok Patowary

library(Seurat)
Attaching SeuratObject
Attaching sp
library(tidyverse)
── Attaching packages
───────────────────────────────────────
tidyverse 1.3.2 ──
✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.0      ✔ stringr 1.5.0 
✔ readr   2.1.2      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(patchwork)
library(sctransform)
library(cowplot)

Attaching package: 'cowplot'

The following object is masked from 'package:patchwork':

    align_plots
library(data.table)

Attaching package: 'data.table'

The following objects are masked from 'package:dplyr':

    between, first, last

The following object is masked from 'package:purrr':

    transpose
library(RColorBrewer)
#rm(list=ls())
#isoT_7k.data <- data.frame(fread("zcat Isoform_counts_7189_allSingleCells_4seurat.tsv.gz", header=TRUE, sep="auto"), row.names = 1)

#With new Id
isoT_7k.data <- data.frame(fread("data/AllBarcode_newId_with_geneID.txt.gz", header=TRUE, sep="auto"), row.names = 1)
dim(isoT_7k.data)
[1] 143490   7189
isoT7k<- CreateSeuratObject(counts = isoT_7k.data, project = "7kTranscript", min.cells = 3, min.features = 85)
Warning: Feature names cannot have underscores ('_'), replacing with dashes
('-')
isoT7k
An object of class Seurat 
90595 features across 4924 samples within 1 assay 
Active assay: RNA (90595 features, 0 variable features)
head(isoT7k@meta.data)
                 orig.ident nCount_RNA nFeature_RNA
End_ACGGCATAGTAC        End        230          190
End_ACTGACTGATCC        End        162          141
End_ACTGGCACCGAG        End        150          120
End_AGCTAGCCCATG        End        277          219
End_AGTTTAGCGGTC        End       1207          871
End_ATCCGACCATCA        End        132           98
# Compute percent mito and ribo ratio
isoT7k[["percent.mt"]] <- PercentageFeatureSet(isoT7k, pattern = "^MT-")
isoT7k[["percent.RP"]] <- PercentageFeatureSet(isoT7k, pattern = "^RP")

####DropOut
dropouts <- Matrix::colSums(isoT7k@assays$RNA@data == 0)/nrow(isoT7k@assays$RNA)
isoT7k[['dropouts']] <- dropouts

head(isoT7k@meta.data)
                 orig.ident nCount_RNA nFeature_RNA percent.mt percent.RP
End_ACGGCATAGTAC        End        230          190  0.4347826   24.78261
End_ACTGACTGATCC        End        162          141  1.2345679   29.62963
End_ACTGGCACCGAG        End        150          120  0.6666667   13.33333
End_AGCTAGCCCATG        End        277          219  6.4981949   22.38267
End_AGTTTAGCGGTC        End       1207          871  3.0654515   13.42171
End_ATCCGACCATCA        End        132           98  2.2727273   12.87879
                  dropouts
End_ACGGCATAGTAC 0.9979028
End_ACTGACTGATCC 0.9984436
End_ACTGGCACCGAG 0.9986754
End_AGCTAGCCCATG 0.9975826
End_AGTTTAGCGGTC 0.9903858
End_ATCCGACCATCA 0.9989183
# Add number of genes per UMI for each cell to metadata
isoT7k$log10GenesPerUMI <- log10(isoT7k$nFeature_RNA) / log10(isoT7k$nCount_RNA)
head(isoT7k@meta.data)
                 orig.ident nCount_RNA nFeature_RNA percent.mt percent.RP
End_ACGGCATAGTAC        End        230          190  0.4347826   24.78261
End_ACTGACTGATCC        End        162          141  1.2345679   29.62963
End_ACTGGCACCGAG        End        150          120  0.6666667   13.33333
End_AGCTAGCCCATG        End        277          219  6.4981949   22.38267
End_AGTTTAGCGGTC        End       1207          871  3.0654515   13.42171
End_ATCCGACCATCA        End        132           98  2.2727273   12.87879
                  dropouts log10GenesPerUMI
End_ACGGCATAGTAC 0.9979028        0.9648671
End_ACTGACTGATCC 0.9984436        0.9727108
End_ACTGGCACCGAG 0.9986754        0.9554660
End_AGCTAGCCCATG 0.9975826        0.9582246
End_AGTTTAGCGGTC 0.9903858        0.9540225
End_ATCCGACCATCA 0.9989183        0.9390034
# Create metadata dataframe
metadataisoT7k <- isoT7k@meta.data

# Add cell IDs to metadata
metadataisoT7k$cells <- str_split(rownames(metadataisoT7k), pattern = "_", simplify = T)[,2]

# Rename columns
metadataisoT7k <- metadataisoT7k %>%
  dplyr::rename(CellType = orig.ident,
                nUMI = nCount_RNA,
                nGene = nFeature_RNA)

##Create method column
metadataisoT7k$method <- "LongRead"
#Donor Innformation
donor<-read.table("ref/polioudakis_neuron2020/TableS3_Cell_metadata.csv", header=TRUE, row.names=1)
df<- as.data.frame.matrix(donor)

# Add additional innfo into the metadata
#metadataisoT7k$DonorID=df$DonorID[match(metadataisoT7k$cells,rownames(df))]
metadataisoT7k$Layer=df$Layer[match(metadataisoT7k$cells,rownames(df))]
metadataisoT7k$Donor=df$Donor[match(metadataisoT7k$cells,rownames(df))]
metadataisoT7k$Gestation_week=df$Gestation_week[match(metadataisoT7k$cells,rownames(df))]

# Add metadata back to Seurat object
isoT7k@meta.data<- metadataisoT7k
head(isoT7k@meta.data)
                 CellType nUMI nGene percent.mt percent.RP  dropouts
End_ACGGCATAGTAC      End  230   190  0.4347826   24.78261 0.9979028
End_ACTGACTGATCC      End  162   141  1.2345679   29.62963 0.9984436
End_ACTGGCACCGAG      End  150   120  0.6666667   13.33333 0.9986754
End_AGCTAGCCCATG      End  277   219  6.4981949   22.38267 0.9975826
End_AGTTTAGCGGTC      End 1207   871  3.0654515   13.42171 0.9903858
End_ATCCGACCATCA      End  132    98  2.2727273   12.87879 0.9989183
                 log10GenesPerUMI        cells   method Layer Donor
End_ACGGCATAGTAC        0.9648671 ACGGCATAGTAC LongRead    CP   370
End_ACTGACTGATCC        0.9727108 ACTGACTGATCC LongRead    CP   370
End_ACTGGCACCGAG        0.9554660 ACTGGCACCGAG LongRead    GZ   372
End_AGCTAGCCCATG        0.9582246 AGCTAGCCCATG LongRead    CP   371
End_AGTTTAGCGGTC        0.9540225 AGTTTAGCGGTC LongRead    CP   371
End_ATCCGACCATCA        0.9390034 ATCCGACCATCA LongRead    CP   370
                 Gestation_week
End_ACGGCATAGTAC             18
End_ACTGACTGATCC             18
End_ACTGGCACCGAG             18
End_AGCTAGCCCATG             17
End_AGTTTAGCGGTC             17
End_ATCCGACCATCA             18
####Some Quality Check
#VlnPlot(isoT7k, features = c("nGene", "nUMI", "percent.mt", "percent.RP", "dropouts"), ncol = 5, group.by="method", pt.size=0.25)
VlnPlot(isoT7k, features = c("nGene", "nUMI", "percent.mt"), ncol = 3, group.by="method", pt.size=0.25)

FeatureScatter(isoT7k, feature1 = "nUMI", feature2 = "percent.mt", group.by="method")

FeatureScatter(isoT7k, feature1 = "nUMI", feature2 = "nGene", group.by="method")

isoT7k_filter <- subset(x = isoT7k, 
                        subset= (nGene >= 85) & 
                          (nGene < 2000)  &
                          (percent.mt < 7))
####Some Quality Check after Filtering
VlnPlot(isoT7k_filter, features = c("nGene", "nUMI", "percent.mt", "percent.RP", "dropouts"), ncol = 5, group.by="method", pt.size=0.25)

# Output a logical vector for every gene on whether the more than zero counts per cell
# Extract counts
counts <- GetAssayData(object = isoT7k_filter, slot = "counts")

# Output a logical vector for every gene on whether the more than zero counts per cell
nonzero <- counts > 0

# Filter Isoforms
keep_genes <- Matrix::rowSums(nonzero) >= 3

# Only keeping those filtered isoforms 
filtered_counts <- counts[keep_genes, ]

# Reassign to filtered Seurat object
isoT7k_filter <- CreateSeuratObject(filtered_counts, meta.data = isoT7k_filter@meta.data)
######Filter Known Cells
isoT4k_sub <- subset(x = isoT7k_filter,
                        subset= CellType != "Unknown" ) 

# Output a logical vector for every gene on whether the more than zero counts per cell
# Extract counts
counts <- GetAssayData(object = isoT4k_sub, slot = "counts")

# Output a logical vector for every gene on whether the more than zero counts per cell
nonzero <- counts > 0

# Filter non expresses isoforms
keep_genes <- Matrix::rowSums(nonzero) >= 0

# Only keeping filtered isoforms
filtered_counts <- counts[keep_genes, ]

# Reassign to filtered Seurat object and Running seurat

isoT4k_filter <- CreateSeuratObject(filtered_counts, meta.data = isoT4k_sub@meta.data) %>%
  SCTransform(
    verbose = FALSE, 
    assay = 'RNA',
    variable.features.n = 10000,
    #return.only.var.genes = FALSE,
    new.assay.name = 'SCT', 
    vars.to.regress = "percent.mt" #,
    #n_genes=10000
  ) %>%
  RunPCA( )
PC_ 1 
Positive:  STMN2-ENST00000220876, MAP1B-TALONT000671508, TUBB3-ENST00000315491, NEUROD6-ENST00000297142, NEUROD2-ENST00000302584, RTN1-ENST00000342503, NSG2-ENST00000303177, GAP43-ENST00000305124, GPM6A-ENST00000393658, TUBB-ENST00000327892 
       NEFM-ENST00000221166, SOX11-ENST00000322002, SOX4-ENST00000244745, SLA-ENST00000338087, INA-ENST00000369849, SYT4-ENST00000255224, TUBB2A-ENST00000333628, NEFL-TALONT000583190, CALM1-ENST00000356978, ENC1-ENST00000302351 
       CRYM-TALONT000431241, AC004158.1-ENST00000564508, PFN2-ENST00000239940, ANKRD18CP-TALONT000751502, MLLT3-TALONT000705935, LMO3-ENST00000354662, DLX6-AS1-TALONT000801216, ATP1B1-ENST00000367815, LMO3-ENST00000441439, BASP1-ENST00000322611 
Negative:  VIM-ENST00000224237, FOS-ENST00000303562, PTN-ENST00000348225, JUN-ENST00000371222, EGR1-ENST00000239938, HES1-ENST00000232424, ID4-ENST00000378700, SOX2-ENST00000325404, CCN1-ENST00000451137, PEA15-ENST00000360472 
       FOS-ENST00000554617, FABP5-ENST00000297258, PTN-TALONT000822332, HSPA1B-ENST00000375650, SFRP1-ENST00000220772, HSPA1A-ENST00000375651, SOX9-ENST00000245479, GPM6B-ENST00000454189, PTN-TALONT000822343, SFRP1-TALONT000603415 
       IER2-ENST00000588173, SPARC-ENST00000231061, FABP7-ENST00000368444, TUBA1B-ENST00000336023, HMGB2-ENST00000296503, H4C3-ENST00000377803, HMGN2-ENST00000361427, TTYH1-ENST00000376530, FGFBP3-ENST00000311575, JUNB-ENST00000302754 
PC_ 2 
Positive:  H4C3-ENST00000377803, HMGB2-ENST00000296503, TUBA1B-ENST00000336023, HMGN2-ENST00000361427, UBE2C-ENST00000356455, H1-3-ENST00000244534, PPP1R17-ENST00000342032, SOX4-ENST00000244745, HES6-ENST00000272937, KIFC1-ENST00000428849 
       KPNA2-ENST00000330459, PENK-ENST00000451791, CCND2-ENST00000261254, EOMES-ENST00000449599, ARL6IP1-ENST00000304414, ENC1-ENST00000302351, CCNB1-ENST00000256442, HES6-ENST00000409160, PLK1-ENST00000300093, GADD45G-ENST00000252506 
       NNAT-ENST00000649451, SSTR2-ENST00000357585, IGFBPL1-ENST00000377694, ENC1-ENST00000618628, CDKN2C-ENST00000396148, ANKRD18CP-TALONT000751502, CADM1-ENST00000616271, SMOC1-ENST00000361956, NHLH1-ENST00000302101, POU3F2-ENST00000328345 
Negative:  FABP7-ENST00000368444, STMN2-ENST00000220876, TUBB2A-ENST00000333628, MAP1B-TALONT000671508, PTN-ENST00000348225, VIM-ENST00000224237, NEFM-ENST00000221166, FOS-ENST00000303562, TUBB2B-ENST00000259818, RTN1-ENST00000342503 
       CALM1-ENST00000356978, TUBB3-ENST00000315491, NEFL-TALONT000583190, GAP43-ENST00000305124, JUN-ENST00000371222, HSPA1A-ENST00000375651, HSPA1B-ENST00000375650, PEA15-ENST00000360472, INA-ENST00000369849, FGFBP3-ENST00000311575 
       SYT4-ENST00000255224, PTN-TALONT000822332, TTYH1-ENST00000376530, HOPX-ENST00000337881, NEUROD2-ENST00000302584, LMO4-ENST00000370542, NEUROD6-ENST00000297142, PTN-TALONT000822343, FOS-ENST00000554617, GPM6B-ENST00000454189 
PC_ 3 
Positive:  SOX4-ENST00000244745, DLX6-AS1-TALONT000801216, FOS-ENST00000303562, ENC1-ENST00000302351, EGR1-ENST00000239938, DLX2-ENST00000234198, BTG1-ENST00000256015, SST-ENST00000287641, JUN-ENST00000371222, ANKRD18CP-TALONT000751502 
       DLX5-ENST00000648378, FOS-ENST00000554617, CITED2-ENST00000367651, IER2-ENST00000588173, CCN1-ENST00000451137, SOX2-OT-ENST00000596250, DLX6-AS1-ENST00000458352, CADM1-ENST00000616271, PLS3-ENST00000355899, SCGN-ENST00000377961 
       GAD2-ENST00000428517, HES1-ENST00000232424, DLX1-ENST00000361725, RASD1-ENST00000225688, RBP1-ENST00000232219, SOX2-OT-ENST00000600801, MAF-ENST00000393350, ENC1-ENST00000618628, JUNB-ENST00000302754, MAFB-ENST00000373313 
Negative:  H4C3-ENST00000377803, FABP7-ENST00000368444, TUBA1B-ENST00000336023, HMGB2-ENST00000296503, NEFM-ENST00000221166, MAP1B-TALONT000671508, NEFL-TALONT000583190, TUBB-ENST00000327892, TUBB2A-ENST00000333628, UBE2C-ENST00000356455 
       STMN2-ENST00000220876, CALM1-ENST00000356978, PTN-ENST00000348225, GAP43-ENST00000305124, H1-3-ENST00000244534, HMGN2-ENST00000361427, TUBB2B-ENST00000259818, KPNA2-ENST00000330459, VIM-ENST00000224237, ARL6IP1-ENST00000304414 
       LMO4-ENST00000370542, TUBB3-ENST00000315491, CCNB1-ENST00000256442, SYT4-ENST00000255224, KIFC1-ENST00000428849, NPY-ENST00000242152, NEUROD6-ENST00000297142, PEA15-ENST00000360472, PTN-TALONT000822332, INA-ENST00000369849 
PC_ 4 
Positive:  DLX6-AS1-TALONT000801216, DLX2-ENST00000234198, SST-ENST00000287641, DLX5-ENST00000648378, H4C3-ENST00000377803, SOX2-OT-ENST00000596250, DLX6-AS1-ENST00000458352, NSG2-ENST00000303177, CXCR4-ENST00000241393, SCGN-ENST00000377961 
       GAD2-ENST00000428517, PFN2-ENST00000239940, CRYM-TALONT000431241, PDE4DIP-TALONT000551834, DLX1-ENST00000361725, SOX2-OT-ENST00000600801, PLS3-ENST00000355899, LMO3-ENST00000354662, RBP1-ENST00000232219, MAF-ENST00000393350 
       CALB2-TALONT000509228, LMO3-ENST00000441439, SOX2-ENST00000325404, C11orf96-ENST00000617612, ATP1B1-ENST00000367815, MEG3-ENST00000451743, CALB2-ENST00000302628, PCDH9-ENST00000377861, MEG3-ENST00000522771, AC004158.1-ENST00000564508 
Negative:  ENC1-ENST00000302351, ANKRD18CP-TALONT000751502, NEUROD6-ENST00000297142, NEUROD2-ENST00000302584, RASD1-ENST00000225688, POU3F2-ENST00000328345, EEF1A1-ENST00000309268, SLA-ENST00000338087, HS3ST1-ENST00000002596, SOX11-ENST00000322002 
       JUN-ENST00000371222, SDCBP-ENST00000260130, HES6-ENST00000409160, ENC1-ENST00000618628, HES6-ENST00000272937, GPM6A-ENST00000393658, SLC17A6-ENST00000263160, IGFBPL1-ENST00000377694, PPP1R17-ENST00000342032, EPHB6-TALONT000824230 
       CCND2-ENST00000261254, NRN1-ENST00000244766, NHLH1-ENST00000302101, SOX4-ENST00000244745, FOS-ENST00000303562, MLLT3-TALONT000705935, PENK-ENST00000451791, SSTR2-ENST00000357585, EGR1-ENST00000239938, GADD45G-ENST00000252506 
PC_ 5 
Positive:  EGR1-ENST00000239938, FOS-ENST00000303562, CCN1-ENST00000451137, IER2-ENST00000588173, FOS-ENST00000554617, H4C3-ENST00000377803, HES1-ENST00000232424, CCN2-ENST00000367976, CRYM-TALONT000431241, JUN-ENST00000371222 
       LMO3-ENST00000354662, NR4A1-ENST00000394825, LMO3-ENST00000441439, CRYAB-ENST00000650687, MAP1B-TALONT000671508, CALM1-ENST00000356978, GAP43-ENST00000305124, NEFM-ENST00000221166, JUNB-ENST00000302754, SPARC-ENST00000231061 
       STMN2-ENST00000220876, H1-3-ENST00000244534, NEFL-TALONT000583190, ATP1B1-ENST00000367815, ZFP36-ENST00000597629, NSG2-ENST00000303177, BCYRN1-ENST00000418539, LMO3-TALONT000528879, BTG2-ENST00000290551, PCDH9-ENST00000377861 
Negative:  PTN-ENST00000348225, FABP7-ENST00000368444, PEA15-ENST00000360472, PTN-TALONT000822332, HOPX-ENST00000337881, HSPA1B-ENST00000375650, ENC1-ENST00000302351, PTN-TALONT000822343, SOX4-ENST00000244745, HSPA1A-ENST00000375651 
       FABP5-ENST00000297258, GPM6B-ENST00000454189, DLX6-AS1-TALONT000801216, SFRP1-ENST00000220772, SFRP1-TALONT000603415, TUBB2B-ENST00000259818, ANKRD18CP-TALONT000751502, CXCR4-ENST00000241393, TTYH1-ENST00000376530, VIM-ENST00000224237 
       FGFBP3-ENST00000311575, PPP1R17-ENST00000342032, DLX2-ENST00000234198, CADM1-ENST00000616271, CRH-ENST00000276571, SDCBP-ENST00000260130, RASD1-ENST00000225688, DLX5-ENST00000648378, SST-ENST00000287641, CCND2-ENST00000261254 
####Some Quality Check after Filtering
VlnPlot(isoT4k_filter, features = c("nGene", "nUMI", "percent.mt", "percent.RP", "dropouts"), ncol = 5, group.by="method", pt.size=0.25)

# Plot PCA
PCAPlot(isoT4k_filter ) 

#FeaturePlot(isoT4k_filter,  reduction='pca', features=c("nGene", "nUMI",  "percent.mt", "percent.RP"), ncol = 2)
####Elbow Plot
ElbowPlot(isoT4k_filter, ndims = 50)

# Run UMAP
isoT4k_filter<- RunUMAP(isoT4k_filter,
                        n.neighbors = 15,
                        min.dist = 0.15,
                        dims = 1:26)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
16:09:21 UMAP embedding parameters a = 1.414 b = 0.9488
16:09:21 Read 4167 rows and found 26 numeric columns
16:09:21 Using Annoy for neighbor search, n_neighbors = 15
16:09:21 Building Annoy index with metric = cosine, n_trees = 50
0%   10   20   30   40   50   60   70   80   90   100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
16:09:21 Writing NN index file to temp file /var/folders/qr/3swxhlmx2xgb5qkpdbhqkkd80000gp/T//RtmpPj1I38/file14997514f7fc2
16:09:22 Searching Annoy index using 1 thread, search_k = 1500
16:09:22 Annoy recall = 100%
16:09:24 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 15
16:09:24 Initializing from normalized Laplacian + noise (using irlba)
16:09:24 Commencing optimization for 500 epochs, with 87210 positive edges
16:09:29 Optimization finished
# Determine the K-nearest neighbor graph
isoT4k_filter <- FindNeighbors(object = isoT4k_filter, 
                               reduction = "pca",
                               features = VariableFeatures(object = isoT4k_filter),
                               dims = 1:26,
                               nn.method = "annoy") # annoy or rann
Computing nearest neighbor graph
Computing SNN
isoT4k_filter <- FindClusters(isoT4k_filter, verbose = FALSE,
                              resolution = seq(1, 2, 0.1))
Idents(isoT4k_filter) <- "SCT_snn_res.1.6"
Idents(isoT4k_filter) <- isoT4k_filter$SCT_snn_res.1.6
new.cluster.ids <- c("ExM", "ExN-ExM", "ExM-U", "ExN-1", "ExN-2", "In",
                     "vRG-ExN", "oRG", "IP", "ExDp", "ExN-3", "vRG", "PgS", "PgG2M", "Mic/Per/End")
names(new.cluster.ids) <- levels(isoT4k_filter)
isoT4k_filter <- RenameIdents(isoT4k_filter, new.cluster.ids)
isoT4k_filter@meta.data$isoCellType <- Idents(isoT4k_filter)
Idents(isoT4k_filter) <- isoT4k_filter$isoCellType
write.table(isoT4k_filter@meta.data, file='output/tables/TableS5A.txt', quote=FALSE, sep='\t', col.names = TRUE)

##used the old metadata file
#ap4<-read.table("meta_2212022_v3.txt", header=T)
#df4<- as.data.frame.matrix(ap4)
#isoT4k_filter@meta.data<- df4
#Idents(isoT4k_filter) <- isoT4k_filter$isoCellType
p1<- DimPlot(isoT4k_filter, reduction = "umap",label = TRUE, pt.size = 0.5, label.size = 3.5,repel=TRUE) +
            NoLegend() + 
            ggtitle('Cells Clustered by Isoform Expression') +
            theme(plot.title = element_text(size = 10, face = "bold")) +
            ggeasy::easy_center_title() &
            theme(text = element_text(), #face = "bold"
            axis.text=element_text(size=6), #angle=45, hjust=1,
            axis.title = element_text(size=8 ),   #face="bold"),
            axis.title.y.right = element_text(size = 2),
            legend.text=element_text(size=8),
            legend.title=element_text(size=8),
            axis.line = element_line(size=0.5))


p2<- DimPlot(isoT4k_filter, reduction = "umap",label = TRUE, group.by = "CellType", pt.size = 0.5, label.size = 3.5, repel=TRUE) + 
            NoLegend() + 
            ggtitle('Cells Clustered by Gene Expression') +
            theme(plot.title = element_text(size = 10, face = "bold")) +
            ggeasy::easy_center_title() &
            theme(text = element_text(), #face = "bold"
            axis.text=element_text(size=6), #angle=45, hjust=1,
            axis.title = element_text(size=8 ),   #face="bold"),
            axis.title.y.right = element_text(size = 2),
            legend.text=element_text(size=8),
            legend.title=element_text(size=8),
            axis.line = element_line(size=0.5))

p1 + p2

ggsave("output/figures/Fig5/Fig5_UMAP.pdf", width = 8, height = 10/3)
# Select the RNA counts slot to be the default assay
Idents(isoT4k_filter) <- isoT4k_filter$isoCellType
DefaultAssay(isoT4k_filter) <- "RNA"

# Normalize RNA data for visualization purposes
isoT4k_filter <- NormalizeData(isoT4k_filter, normalization.method = "LogNormalize", scale.factor = 100000, verbose = FALSE)

#Scal Data
all.genes <- rownames(isoT4k_filter)
isoT4k_filter <- ScaleData(isoT4k_filter, features = all.genes)
Centering and scaling data matrix
if (!file.exists("data/working/isoT4k_filter.rds")) {
  saveRDS(isoT4k_filter, "data/working/isoT4k_filter.rds")
}