scRNA-seq data analysis using Monocle3 combined with Seurat3

single cell and single learning

Posted by Chevy on September 17, 2019

scRNA-seq data analysis using Monocle3 combined with Seurat3

徐春晖

2019-09-17


基本步骤

[TOC]


Tips: 从Seurat3 pbmc构建数据到Monocle3

#Extract count data, phenotype data, and feature data from the Seurat Object.
counts.data <- as(as.matrix(Seurat.object@assays$RNA@data), 'sparseMatrix')
pheno.data <- new('AnnotatedDataFrame', data = Seurat.object@meta.data)
feature.data <- data.frame(gene_short_name = row.names(counts.data), row.names = row.names(counts.data))
feature.data <- new('AnnotatedDataFrame', data = feature.data)

#Construct a CellDataSet.
cds <- newCellDataSet(counts.data, 
                      phenoData = pheno.data, 
                      featureData = feature.data)

Step1: data read-in

# ===================================================================data readin
filter_umi <- 450
name <- character()
temp <- list.files(path = "../Raw_data/",pattern="*_gene_exon_tagged.dge.txt.gz")
temp

for (s in 1:length(temp)) {    
    name[i] <- unlist(strsplit(temp[i],"_out_gene_exon_tagged.dge.txt.gz"))[1]
    message(paste(name[i], "is loading"))
    
    tmpvalue<-read.table(paste0("../Raw_data/", temp[i]), 
                         sep = "\t", quote = "", row.names = 1, header = T)
    message(paste(name[i], "is loaded, now is adding name"))
    
    colnames(tmpvalue) <- paste0(name[i], "-", colnames(tmpvalue))
    message(paste0(name[i], "'s name added, now filtering ", filter_umi))
    
    tmpvalue <- tmpvalue[,colSums(tmpvalue) >= filter_umi]
    message(paste(name[i], "cells above", filter_umi, "filtered"))
    
    assign(name[i], tmpvalue)
    rm(tmpvalue)
}
message("data loading done, and strat merge counts file")

# ==============================================================UMI summary
summary <- NULL
for (s in 1:length(temp)) {
    summary <- cbind(summary, as.matrix(summary(colSums(get(name[s])))))
}
colnames(summary) <- paste0(name, "'s UMI")
summary

# ===============================================================data merge
dge <- get(name[1])
for(p in 2:length(temp)) {
    dge_temp <- get(name[p])
    dge_merge <- dplyr::full_join(dge %>% mutate(name = rownames(dge)), 
                                  dge_temp %>% mutate(name = rownames(dge_temp)))
    rownames(dge_merge) <- dge_merge$name
    dge <-  dplyr::select(dge_merge, -(name)) %>% na.replace(0)
    rm(dge_temp)
    rm(dge_merge
}
message("data merge done")

Step2: cds construction

cds <- new_cell_data_set(expression_data = as.matrix(dge),
                         cell_metadata = cell_metadata,
                         gene_metadata = gene_metadata)

Step3: normalization and scale and PCA

cds <- preprocess_cds(cds, 
                      method = "PCA", 
                      # use_genes = c(pbmc@assays$SCT@var.features),
                      num_dim = 50,
                      norm_method = "log", 
                      residual_mod

Step4: UMAP or tSNE

cds <- reduce_dimension(cds, 
                        cores = 4,
                        reduction_method = "UMAP", 
                        preprocess_method = "PCA")
                        
plot_cells(cds, 
           reduction_method = "UMAP", 
           color_cells_by = "Treat", 
           label_cell_groups = FALSE,
           cell_size = 1)

Step5: clustering cells

cds = cluster_cells(cds, 
                    reduction_method = "UMAP", 
                    k = 200)
plot_cells(cds, reduction_method = "UMAP", cell_size = 1)

Step6: trajectory construction

cds <- learn_graph(cds)
# 目前只支持针对UMAP结果做路径构建

step7: order pesudotime

cds <-  order_cells(cds)
plot_cells(cds,
           color_cells_by = "pseudotime",
           label_cell_groups = F,
           label_leaves = T,
           label_branch_points = T,
           graph_label_size = 4,
           cell_size = 2) 

问题

  • 如何将Seurat3的PCA结果(和normalized counts)替换cds对应的步骤结果,也就是Step3,S4对象实在是比较难理解
  • 因为trajectory只支持UMAP,如何取舍tSNE