We will be analyzing the CITE-seq human PBMC data. This data can be installed with SeuratData.
# install Seurat v4.0.0 and SeuratData
if (!requireNamespace("Seurat", quietly = TRUE) | utils::packageVersion("Seurat") < "4.0.0")
remotes::install_version("Seurat", version = "4.0.0")
if (!requireNamespace("SeuratData", quietly = TRUE))
# Find highly variable features/genes
cbmc <- FindVariableFeatures(cbmc, selection.method = "vst", verbose = FALSE)
data_rna <- cbmc@assays$RNA@counts[VariableFeatures(cbmc),]
cbmc_rna <- InitiateFuseNet(raw_data = data_rna, project_name = "RNA", normalization = "cosine", pca_dims = 20, kernel = "gaussian", k = 15)
cbmc_rna <- GeomSketch(object = cbmc_rna, geom_pca_dims = 20, geom_size = 3000, sketch_n_pca = 20, sketch_k = 15)
ptm <- proc.time()
cbmc_rna <- RunFuseNet(object = cbmc_rna, n_iters = 100, pca_dims = 20, k = 15, ratio = 0.5, norm_type = "l1")
ptm1 <- proc.time()
print(x = ptm1 - ptm)
## user system elapsed
## 21.310 11.901 51.186
data_prt <- cbmc@assays$ADT@counts
cbmc_prt <- InitiateFuseNet(raw_data = data_prt, project_name = "Protein", normalization = "cosine", kernel = "gaussian", k = 15, verbose = FALSE)
cbmc_prt <- GeomSketch(object = cbmc_prt, geom_size = 3000, geom_pca_dims = 3, sketch_n_pca = 0, sketch_k = 15)
ptm <- proc.time()
cbmc_prt <- RunFuseNet(object = cbmc_prt, n_iters = 100, ratio = 0.5, k = 15, norm_type = "l1")
ptm1 <- proc.time()
print(x = ptm1 - ptm)
## user system elapsed
## 14.273 8.310 36.338
We see that the mouse cells, which were used as the spike-in controls, were defined almost exclusively by RNA modality (RNA weight > 0.8). This is because there is no anti-mouse antibodies were used in the study.
In general lymphocytes were well defined in the ADT modality (protein weights > 0.6). Especially, CD8+ and CD4+ naive and memory T cells were more defined by the ADT data, on the other hand, dendritic cells (DC and pDCs) were more defined in the RNA data due to the absence of its surface markers in the ADT data.
We can perform UMAP dimensional reduction or clustering on the fused data for downstream analysis.
umap.dims <- uwot::umap(X = fused$fused_dist, n_components = 2)
dimnames(x = umap.dims) <- list(Cells(cbmc), c("UMAP_1", "UMAP_2"))
cbmc@reductions$umap <- CreateDimReducObject(embeddings = umap.dims, key = "UMAP_", assay = c("RNA", "ADT"))
cbmc@reductions$umap@cell.embeddings = umap.dims
DimPlot(object = cbmc, group.by = "rna_annotations", reduction = "umap", label = TRUE, repel = TRUE, label.box = TRUE)
