GSE205549 Processing Pipeline

ATAC-seq code_examples 3 steps

Publication

Small intestine and colon tissue-resident memory CD8<sup>+</sup> T cells exhibit molecular heterogeneity and differential dependence on Eomes.

Immunity (2023) — PMID 36580919

Dataset

GSE205549

Small intestine and colon tissue-resident memory CD8+ T cells exhibit transcriptional, epigenetic, and functional heterogeneity in concert with diffe…

Warning: Pipeline descriptions and code snippets may be inferred or AI-generated. Use them only as a starting point to guide analysis, and validate before use.
  1. 1

    Cell Ranger ATAC (v6.0.1) was used to process sequencing information and single cell barcodes.

    Cell Ranger v6.0.1
    $ Bash example
    # Install Cell Ranger ATAC (e.g., download from 10x Genomics website and add to PATH)
    # wget https://cf.10xgenomics.com/releases/cell-ranger-atac/cellranger-atac-6.0.1.tar.gz
    # tar -xzf cellranger-atac-6.0.1.tar.gz
    # export PATH=/path/to/cellranger-atac-6.0.1:$PATH
    
    # Placeholder for reference data. Download the appropriate reference from 10x Genomics.
    # For example, for human GRCh38:
    # wget https://cf.10xgenomics.com/releases/cell-ranger-atac/refdata-cellranger-atac-GRCh38-1.2.0.tar.gz
    # tar -xzf refdata-cellranger-atac-GRCh38-1.2.0.tar.gz
    REF_DIR="refdata-cellranger-atac-GRCh38-1.2.0"
    
    # Placeholder for input FASTQ files directory. This directory should contain the FASTQ files generated by the sequencer.
    FASTQ_DIR="/path/to/your/fastq_files"
    
    # Placeholder for output sample ID. This will be the name of the output directory.
    SAMPLE_ID="my_atac_sample"
    
    cellranger atac --id="${SAMPLE_ID}" --fastqs="${FASTQ_DIR}" --reference="${REF_DIR}"
  2. 2

    Cellranger outputs were analyzed in R using Signac (v1.6.0)

    Cell Ranger v1.6.0
    $ Bash example
    # Install R package Signac if not already present
    # R -e "install.packages('Signac', repos='https://cloud.r-project.org')"
    # R -e "install.packages('Seurat', repos='https://cloud.r-project.org')" # Signac depends on Seurat
    
    # Create a placeholder R script for Signac analysis
    cat << 'EOF' > analyze_cellranger_outputs.R
    # Load Signac and Seurat libraries
    library(Signac, quietly = TRUE)
    library(Seurat, quietly = TRUE)
    
    # Placeholder for loading Cell Ranger outputs and performing Signac analysis
    # Example:
    # # Assuming Cell Ranger output directory is 'cellranger_output_dir'
    # # and contains 'filtered_feature_bc_matrix' and 'fragments.tsv.gz'
    #
    # # Load 10x Genomics data
    # counts <- Read10X(data.dir = "cellranger_output_dir/filtered_feature_bc_matrix")
    # fragments <- CreateFragmentObject(path = "cellranger_output_dir/fragments.tsv.gz")
    #
    # # Create a Seurat object
    # seurat_object <- CreateSeuratObject(counts = counts)
    # seurat_object <- SetAssayData(seurat_object, assay = "ATAC", slot = "fragments", new.data = fragments)
    #
    # # Perform Signac analysis steps (e.g., normalization, dimensionality reduction, clustering)
    # seurat_object <- RunTFIDF(seurat_object)
    # seurat_object <- FindTopFeatures(seurat_object, min.cutoff = 'q0')
    # seurat_object <- RunSVD(seurat_object)
    # seurat_object <- RunUMAP(seurat_object, dims = 2:30, reduction = 'lsi')
    # seurat_object <- FindNeighbors(seurat_object, reduction = 'lsi', dims = 2:30)
    # seurat_object <- FindClusters(seurat_object, verbose = FALSE, algorithm = 3)
    #
    # # Save results
    # saveRDS(seurat_object, file = "signac_analysis_results.rds")
    
    message("Signac analysis script executed. Replace this with actual analysis logic.")
    EOF
    
    # Execute the R script
    Rscript analyze_cellranger_outputs.R
  3. 3

    Annotation, quality control, dimensional reduction, and UMAPcreationwerecompleted using the standard Signac workflow.

    Signac v1.11.0
    $ Bash example
    # Install R and necessary packages if not already installed
    # conda create -n signac_env r-base r-essentials -y
    # conda activate signac_env
    # R -q -e "install.packages('devtools')"
    # R -q -e "devtools::install_github('satijalab/seurat', ref = 'develop')" # For latest Seurat
    # R -q -e "devtools::install_github('mojaveazure/seurat-wrappers')"
    # R -q -e "devtools::install_github('timoast/signac')"
    # R -q -e "BiocManager::install(c('EnsDb.Hsapiens.v86', 'GenomeInfoDb'))"
    
    # Create an R script for the Signac workflow
    cat << 'EOF' > run_signac_workflow.R
    library(Signac)
    library(Seurat)
    library(GenomeInfoDb)
    library(EnsDb.Hsapiens.v86) # For hg38 annotation
    
    # --- Configuration ---
    # Replace with actual input files and paths
    fragment_file <- "path/to/fragments.tsv.gz" # e.g., from Cell Ranger ATAC output
    cell_barcodes_file <- "path/to/singlecell.csv" # e.g., from Cell Ranger ATAC output
    output_dir <- "signac_output"
    project_name <- "scATAC_Project"
    genome_assembly <- "hg38" # Placeholder: Human genome assembly
    
    # Create output directory if it doesn't exist
    if (!dir.exists(output_dir)) {
      dir.create(output_dir)
    }
    
    # --- 1. Load Data and Create Seurat Object ---
    # Get gene annotations for hg38
    annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
    seqlevelsStyle(annotations) <- 'UCSC'
    
    # Load fragment file and create a ChromatinAssay object
    # This assumes a 10x Genomics fragments.tsv.gz file format.
    # For other formats, adjust `ReadFragments` or `CreateFragmentObject`.
    # For a runnable example, we'll use a dummy object if files don't exist.
    
    if (file.exists(fragment_file) && file.exists(cell_barcodes_file)) {
      # Load cell barcodes
      metadata <- read.csv(cell_barcodes_file, header = TRUE, row.names = 1)
      
      # Create a ChromatinAssay object
      chrom_assay <- CreateChromatinAssay(
        counts = NULL, # Counts matrix can be generated later or loaded separately
        fragments = fragment_file,
        genome = genome_assembly,
        min.cells = 10, # Filter cells with fewer than 10 fragments
        min.features = 200 # Filter features (peaks) present in fewer than 200 cells
      )
      
      # Create Seurat object
      seurat_obj <- CreateSeuratObject(
        counts = GetAssayData(chrom_assay, slot = "counts"), # Use counts from the assay
        assay = 'ATAC',
        meta.data = metadata
      )
      seurat_obj[['ATAC']] <- chrom_assay
      
    } else {
      message("Input fragment or cell barcode files not found. Creating a dummy Seurat object for demonstration.")
      # Create a dummy Seurat object for demonstration purposes if files are not found
      # In a real scenario, you would load your actual data.
      set.seed(123)
      dummy_counts <- matrix(sample(0:5, 100*50, replace = TRUE), nrow = 100, ncol = 50)
      rownames(dummy_counts) <- paste0("peak_", 1:100)
      colnames(dummy_counts) <- paste0("cell_", 1:50)
      dummy_metadata <- data.frame(row.names = colnames(dummy_counts), nCount_ATAC = colSums(dummy_counts), nFeature_ATAC = colSums(dummy_counts > 0))
      seurat_obj <- CreateSeuratObject(counts = dummy_counts, assay = 'ATAC', meta.data = dummy_metadata)
      # Add dummy fragment information for Signac functions that expect it
      seurat_obj[['ATAC']] <- CreateChromatinAssay(counts = dummy_counts, genome = 'hg38')
    }
    
    # Add annotations to the Seurat object
    Annotation(seurat_obj[['ATAC']]) <- annotations
    
    # --- 2. Quality Control ---
    # Compute QC metrics
    seurat_obj <- NucleosomeSignal(object = seurat_obj)
    seurat_obj <- TSSEnrichment(object = seurat_obj, fast = FALSE)
    
    # Visualize QC metrics (optional, for interactive analysis)
    # VlnPlot(object = seurat_obj, features = c('nCount_ATAC', 'nFeature_ATAC', 'TSS.enrichment', 'nucleosome_signal'), pt.size = 0.1, ncol = 4)
    # FragmentHistogram(object = seurat_obj, group.by = 'orig.ident')
    
    # Filter cells based on QC metrics
    seurat_obj <- subset(
      x = seurat_obj,
      subset = nCount_ATAC < 100000 & 
               nCount_ATAC > 500 & 
               nucleosome_signal < 2 & 
               TSS.enrichment > 1
    )
    
    # --- 3. Normalization and Dimensional Reduction ---
    # Normalize data
    seurat_obj <- RunTFIDF(seurat_obj)
    seurat_obj <- FindTopFeatures(seurat_obj, min.cutoff = 'q0')
    seurat_obj <- RunSVD(seurat_obj)
    
    # Visualize LSI components (optional)
    # DepthCor(seurat_obj)
    
    # --- 4. UMAP Creation ---
    # Run UMAP
    seurat_obj <- RunUMAP(object = seurat_obj, reduction = 'lsi', dims = 2:30)
    
    # Find clusters
    seurat_obj <- FindNeighbors(object = seurat_obj, reduction = 'lsi', dims = 2:30)
    seurat_obj <- FindClusters(object = seurat_obj, verbose = FALSE, algorithm = 3)
    
    # Visualize UMAP (optional)
    # DimPlot(object = seurat_obj, label = TRUE, repel = TRUE) + NoLegend()
    
    # --- Save Results ---
    saveRDS(seurat_obj, file = file.path(output_dir, paste0(project_name, "_processed_seurat_object.rds")))
    message(paste0("Processed Seurat object saved to ", file.path(output_dir, paste0(project_name, "_processed_seurat_object.rds"))))
    
    # Optional: Save UMAP coordinates and cluster assignments to CSV
    write.csv(Embeddings(seurat_obj, reduction = "umap"), file = file.path(output_dir, "umap_coordinates.csv"))
    write.csv(seurat_obj@meta.data[, c("seurat_clusters")], file = file.path(output_dir, "cell_clusters.csv"))
    
    EOF
    
    # Execute the R script
    Rscript run_signac_workflow.R
    
Raw Source Text
Cell Ranger ATAC (v6.0.1) was used to process sequencing information and single cell barcodes.
Cellranger outputs were analyzed in R using Signac (v1.6.0)
Annotation, quality control, dimensional reduction, and UMAPcreationwerecompleted using the standard Signac workflow.
Assembly: mm10
Supplementary files format and content: 10x Genomics output files used: filtered_peak_bc_matrix.h5, singlecell.csv, fragments.tsv.gz, peaks.bed
← Back to Analysis