GSE213469 Processing Pipeline

ATAC-seq code_examples 2 steps

Publication

Early transcriptional and epigenetic divergence of CD8+ T cells responding to acute versus chronic infection.

PLoS biology (2023) — PMID 36716323

Dataset

GSE213469

Open chromatin regions at the single cell level of CD8+ T cells from the spleens of LCMV-Armstrong or LCMV-Clone 13 infected mice

Warning: Pipeline descriptions and code snippets may be inferred or AI-generated. Use them only as a starting point to guide analysis, and validate before use.
  1. 1

    Alignment, cell barcode processing, umis processing, abundance measurements: Cellranger ATAC (version 1.2.0)

    Cell Ranger v1.2.0 GitHub
    $ Bash example
    # Install Cell Ranger ATAC (version 1.2.0) - download and extract the tarball
    # For example:
    # wget https://cf.10xgenomics.com/releases/cell-atac/cellranger-atac-1.2.0.tar.gz
    # tar -xzf cellranger-atac-1.2.0.tar.gz
    # export PATH=/path/to/cellranger-atac-1.2.0:$PATH
    
    # Download Cell Ranger ATAC reference data (e.g., GRCh38 for version 1.2.0)
    # For example:
    # wget https://cf.10xgenomics.com/releases/cell-atac/refdata-cellranger-atac-GRCh38-1.2.0.tar.gz
    # tar -xzf refdata-cellranger-atac-GRCh38-1.2.0.tar.gz
    # REF_PATH="$(pwd)/refdata-cellranger-atac-GRCh38-1.2.0"
    
    # Define variables
    SAMPLE_ID="my_atac_sample"
    FASTQ_DIR="/path/to/your/fastqs"
    REFERENCE_PATH="/path/to/refdata-cellranger-atac-GRCh38-1.2.0"
    
    # Run Cell Ranger ATAC count for alignment, cell barcode processing, UMI processing, and abundance measurements
    cellranger atac count \
        --id="${SAMPLE_ID}" \
        --fastqs="${FASTQ_DIR}" \
        --reference="${REFERENCE_PATH}"
  2. 2

    QC, cell filtering, dimension reduction: Signac (version 1.5.0)

    Signac v1.5.0 GitHub
    $ Bash example
    # Install R and necessary packages if not already present
    # R -e 'install.packages("devtools")'
    # R -e 'devtools::install_github("timoast/signac@release/1.5.0")'
    # R -e 'install.packages("Seurat")'
    # R -e 'install.packages("GenomeInfoDb")'
    # R -e 'install.packages("EnsDb.Hsapiens.v86")' # Example for hg38 annotation
    
    # Define input and output paths
    FRAGMENTS_FILE="input/fragments.tsv.gz"
    CELL_BARCODES_FILE="input/cell_barcodes.tsv"
    OUTPUT_DIR="output_signac"
    
    # Define reference genome (e.g., hg38)
    GENOME_ASSEMBLY="hg38"
    
    mkdir -p ${OUTPUT_DIR}
    
    # Create an R script to perform QC, cell filtering, and dimension reduction using Signac
    cat << 'EOF' > ${OUTPUT_DIR}/signac_analysis.R
    library(Signac)
    library(Seurat)
    library(GenomeInfoDb)
    library(EnsDb.Hsapiens.v86) # For hg38 annotation
    
    # --- Configuration ---
    fragments_file <- Sys.getenv("FRAGMENTS_FILE")
    cell_barcodes_file <- Sys.getenv("CELL_BARCODES_FILE")
    output_dir <- Sys.getenv("OUTPUT_DIR")
    genome_assembly <- Sys.getenv("GENOME_ASSEMBLY")
    
    # Load genome annotation
    if (genome_assembly == "hg38") {
      annotations <- Get  (EnsDb.Hsapiens.v86)
      seqlevelsStyle(annotations) <- 'UCSC'
      genome(annotations) <- "hg38"
    } else {
      stop("Unsupported genome assembly. Please add appropriate annotation package.")
    }
    
    # --- Data Loading ---
    # Create a ChromatinAssay object from the fragments file
    frags <- CreateFragmentObject(
      path = fragments_file,
      cells = read.table(cell_barcodes_file, header = FALSE)$V1
    )
    
    # Create a Seurat object
    chrom_assay <- CreateChromatinAssay(
      counts = frags,
      sep = c(":", "-"),
      genome = genome_assembly,
      fragments = frags,
      min.cells = 10,
      min.features = 200
    )
    
    # Add the assay to a Seurat object
    obj <- CreateSeuratObject(
      counts = chrom_assay,
      assay = "ATAC"
    )
    
    # Add annotations
    Annotation(obj) <- annotations
    
    # --- Quality Control ---
    # Compute QC metrics
    obj <- NucleosomeSignal(object = obj)
    obj <- TSSEnrichment(object = obj, fast = FALSE)
    obj <- CountFragments(object = obj)
    
    # Add blacklist ratio (requires blacklist regions for the specific genome)
    # For hg38, you might use: blacklist_hg38_unified <- blacklist_hg38_unified
    # obj$blacklist_ratio <- FractionCountsInRegion(object = obj, regions = blacklist_hg38_unified)
    
    # Plot QC metrics (optional, for visualization)
    # pdf(file.path(output_dir, "qc_plots.pdf"))
    # VlnPlot(obj, features = c("nCount_ATAC", "TSS.enrichment", "nucleosome_signal"), pt.size = 0.1, ncol = 3)
    # dev.off()
    
    # --- Cell Filtering ---
    # Filter cells based on QC metrics
    obj_filtered <- subset(
      x = obj,
      subset = nCount_ATAC < 100000 & 
               nCount_ATAC > 500 & 
               TSS.enrichment > 2 & 
               nucleosome_signal < 2 # & 
               # blacklist_ratio < 0.05 # Uncomment if blacklist ratio is calculated
    )
    
    # --- Normalization and Dimension Reduction ---
    # Call peaks (optional, but often done before dimension reduction for feature selection)
    # obj_filtered <- CallPeaks(obj_filtered, macs2.path = "/path/to/macs2") # Requires MACS2
    
    # Create a gene activity matrix (optional, for multi-modal analysis)
    # gene.activities <- GeneActivity(obj_filtered)
    # obj_filtered[['RNA']] <- CreateAssayObject(counts = gene.activities)
    
    # Normalize and scale data
    obj_filtered <- RunTFIDF(obj_filtered)
    obj_filtered <- FindTopFeatures(obj_filtered, min.cutoff = 'q0')
    obj_filtered <- RunSVD(obj_filtered)
    
    # Build a graph and cluster cells
    obj_filtered <- RunUMAP(obj_filtered, reduction = "lsi", dims = 2:30)
    obj_filtered <- FindNeighbors(obj_filtered, reduction = "lsi", dims = 2:30)
    obj_filtered <- FindClusters(obj_filtered, verbose = FALSE, algorithm = 3)
    
    # Save the processed Seurat object
    saveRDS(obj_filtered, file = file.path(output_dir, "signac_processed_seurat_object.rds"))
    
    # Save UMAP coordinates and cluster assignments
    write.csv(Embeddings(obj_filtered, reduction = "umap"), file = file.path(output_dir, "umap_coordinates.csv"))
    write.csv(obj_filtered@meta.data$seurat_clusters, file = file.path(output_dir, "cell_clusters.csv"))
    
    message("Signac analysis complete. Output saved to ", output_dir)
    EOF
    
    # Execute the R script
    FRAGMENTS_FILE="${FRAGMENTS_FILE}" CELL_BARCODES_FILE="${CELL_BARCODES_FILE}" OUTPUT_DIR="${OUTPUT_DIR}" GENOME_ASSEMBLY="${GENOME_ASSEMBLY}" Rscript ${OUTPUT_DIR}/signac_analysis.R
Raw Source Text
Alignment, cell barcode processing, umis processing, abundance measurements: Cellranger ATAC  (version 1.2.0)
QC, cell filtering, dimension reduction: Signac (version 1.5.0)
Assembly: mm10
Supplementary files format and content: Tab-separated values files and matrix files
← Back to Analysis