How to analyze dnbc4tools output in R and Python
Load the filtered gene expression matrix using the Read10X function.
library(Seurat)
counts.data <- Read10X(data.dir = "/outs/filter_matrix")
You can load the data using either the h5ad file or the MEX matrix directory.
1. Read h5ad format (recommended):
import scanpy as sc
adata = sc.read_h5ad('/outs/filter_feature.h5ad')
2. Read matrix format:
import scanpy as sc
adata = sc.read_10x_mtx('/outs/filter_matrix')
Load the filtered peak matrix and create a Seurat object.
require(magrittr)
require(readr)
require(Matrix)
require(tidyr)
require(dplyr)
library(Signac)
library(Seurat)
# Function to read dnbc4tools ATAC output
read_signac_C4 <- function(mex_dir_path, fragments, singlecellmetadata){
mtx_path <- paste(mex_dir_path, "matrix.mtx.gz", sep = '/')
feature_path <- paste(mex_dir_path, "peaks.bed.gz", sep = '/')
barcode_path <- paste(mex_dir_path, "barcodes.tsv.gz", sep = '/')
features <- readr::read_tsv(feature_path, col_names = F) %>% tidyr::unite(feature)
barcodes <- readr::read_tsv(barcode_path, col_names = F) %>% tidyr::unite(barcode)
mtx <- Matrix::readMM(mtx_path) %>%
magrittr::set_rownames(features$feature) %>%
magrittr::set_colnames(barcodes$barcode)
metadata <- read.csv(
file = singlecellmetadata,
header = TRUE,
row.names = 1
)
chrom_assay <- CreateChromatinAssay(
counts = mtx,
sep = c("_", "_"),
fragments = fragments,
min.cells = 10,
min.features = 200
)
scATAC <- CreateSeuratObject(
counts = chrom_assay,
assay = "peaks",
meta.data = metadata
)
return(scATAC)
}
Create Arrow files from the fragments.tsv.gz file.
library(ArchR)
ArrowFiles <- createArrowFiles(
inputFiles = "/outs/fragments.tsv.gz",
sampleNames = "MySample",
filterTSS = 4,
filterFrags = 1000,
addTileMat = TRUE,
addGeneScoreMat = TRUE
)
Load the filtered peak matrix into an AnnData object.
import os
from scipy.sparse import csr_matrix
import anndata
import pandas as pd
import scipy.io
import scipy.sparse
def read_atac_C4(path):
mtx_path = os.path.join(path, "matrix.mtx.gz")
barcode_path = os.path.join(path, "barcodes.tsv.gz")
feature_path = os.path.join(path, "peaks.bed.gz")
obs = pd.read_csv(barcode_path, header=None, index_col=0, sep="\t")
obs.index.name = "barcode"
var = pd.read_csv(feature_path, header=None, sep="\t")
var.columns = ['chrom', 'chromStart', 'chromEnd']
var.index = var['chrom'].astype(str) + ':' + var['chromStart'].astype(str) + '-' + var['chromEnd'].astype(str)
var.index.name = "peak"
mtx = csr_matrix(scipy.io.mmread(mtx_path).T)
adata = anndata.AnnData(mtx, obs=obs, var=var)
adata.var_names_make_unique()
return adata
| File / Directory | Description | Analysis Type |
|---|---|---|
filter_matrix/ |
Filtered gene expression matrix (MEX format). | scRNA-seq |
filter_peak_matrix/ |
Filtered peak accessibility matrix (MEX format). | scATAC-seq |
filter_feature.h5ad |
Filtered matrix in AnnData format (Python-ready). | scRNA-seq |
fragments.tsv.gz |
Fragment file for ATAC analysis. | scATAC-seq |
singlecell.csv |
Cell metadata and QC metrics. | All |
For detailed output descriptions, see the Output Files Guide