GSE205942 Processing Pipeline

GSE code_examples 3 steps

Publication

Small intestine and colon tissue-resident memory CD8<sup>+</sup> T cells exhibit molecular heterogeneity and differential dependence on Eomes.

Immunity (2023) — PMID 36580919

Dataset

GSE205942

Small intestine and colon tissue-resident memory CD8+ T cells exhibit transcriptional, epigenetic, and functional heterogeneity in concert with diffe…

Warning: Pipeline descriptions and code snippets may be inferred or AI-generated. Use them only as a starting point to guide analysis, and validate before use.
  1. 1

    Cell Ranger ATAC (v6.0.1) was used to process sequencing information and single cell barcodes.

    Cell Ranger v6.0.1
    $ Bash example
    # Install Cell Ranger ATAC (version 6.0.1 or compatible)
    # Download from 10x Genomics website and add to PATH, or use a container.
    # Example using a placeholder for installation:
    # conda install -c bioconda 10x-cellranger-atac=6.0.1
    
    # Define variables (replace with actual paths and IDs)
    SAMPLE_ID="my_atac_sample"
    FASTQ_DIR="/path/to/your/fastqs"
    REFERENCE_PATH="/path/to/refdata-cellranger-atac-GRCh38-1.2.0" # Example: GRCh38 reference
    
    # Run Cell Ranger ATAC to process sequencing information and single cell barcodes
    cellranger atac \
        --id=${SAMPLE_ID} \
        --fastqs=${FASTQ_DIR} \
        --reference=${REFERENCE_PATH}
  2. 2

    Cellranger outputs were analyzed in R using Signac (v1.6.0)

    Cell Ranger v1.6.0 GitHub
    $ Bash example
    # Install Signac and Seurat if not already installed
    # R -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")'
    # R -e 'BiocManager::install(c("Signac", "Seurat"))'
    
    # Define input and output paths
    CELLRANGER_OUTPUT_DIR="path/to/cellranger/outs" # e.g., /path/to/sample_id/outs
    SIGNAC_OUTPUT_DIR="path/to/signac_analysis"
    mkdir -p "${SIGNAC_OUTPUT_DIR}"
    
    # Create an R script for Signac analysis
    cat << 'EOF' > "${SIGNAC_OUTPUT_DIR}/signac_analysis.R"
    library(Signac)
    library(Seurat)
    library(GenomeInfoDb) # Often needed for Signac/Seurat
    
    # Define input Cell Ranger ATAC output directory
    cellranger_output_dir <- Sys.getenv("CELLRANGER_OUTPUT_DIR")
    if (cellranger_output_dir == "") {
      stop("CELLRANGER_OUTPUT_DIR environment variable not set.")
    }
    
    # Define output directory
    signac_output_dir <- Sys.getenv("SIGNAC_OUTPUT_DIR")
    if (signac_output_dir == "") {
      stop("SIGNAC_OUTPUT_DIR environment variable not set.")
    }
    
    message(paste("Loading Cell Ranger outputs from:", cellranger_output_dir))
    
    # Load Cell Ranger ATAC data
    # This assumes the standard Cell Ranger ATAC output structure:
    # filtered_feature_bc_matrix.h5 (or filtered_feature_bc_matrix/ for separate files)
    # fragments.tsv.gz
    # singlecell.csv (optional, for cell metrics)
    
    # Path to the filtered feature barcode matrix (HDF5 format)
    matrix_path <- file.path(cellranger_output_dir, "filtered_feature_bc_matrix.h5")
    if (!file.exists(matrix_path)) {
      # Fallback for older Cell Ranger ATAC versions or if separate files are preferred
      matrix_dir <- file.path(cellranger_output_dir, "filtered_feature_bc_matrix")
      if (dir.exists(matrix_dir)) {
        counts <- Read10x(data.dir = matrix_dir)
      } else {
        stop(paste("Cell Ranger matrix file or directory not found:", matrix_path, "or", matrix_dir))
      }
    } else {
      counts <- Read10x_h5(filename = matrix_path)
    }
    
    # Path to the fragments file
    fragment_path <- file.path(cellranger_output_dir, "fragments.tsv.gz")
    if (!file.exists(fragment_path)) {
      stop(paste("Cell Ranger fragments file not found:", fragment_path))
    }
    fragments <- CreateFragmentObject(path = fragment_path)
    
    # Path to the singlecell.csv file (optional, for metadata)
    singlecell_csv_path <- file.path(cellranger_output_dir, "singlecell.csv")
    cell_metrics <- NULL
    if (file.exists(singlecell_csv_path)) {
      cell_metrics <- read.csv(singlecell_csv_path, row.names = 1)
    }
    
    # Create a Seurat object
    # Assuming 'counts' contains peak counts (from Cell Ranger ATAC)
    # If Cell Ranger Multiome, 'counts' might be a list with 'Gene Expression' and 'Peaks'
    if ("Peaks" %in% names(counts)) { # For Cell Ranger Multiome
      peak_counts <- counts$Peaks
    } else { # For Cell Ranger ATAC
      peak_counts <- counts
    }
    
    seurat_object <- CreateSeuratObject(
      counts = peak_counts,
      assay = "ATAC" # Name the assay "ATAC"
    )
    
    # Add fragment information to the ATAC assay
    seurat_object[["ATAC"]] <- SetAssayData(
      object = seurat_object[["ATAC"]],
      slot = "fragments",
      new.data = fragments
    )
    
    # Add cell metrics if available
    if (!is.null(cell_metrics)) {
      seurat_object <- AddMetaData(seurat_object, metadata = cell_metrics)
    }
    
    # Add genome information (placeholder, replace with actual genome if known, e.g., "hg38", "mm10")
    # This is important for downstream functions like motif analysis
    # seqlevelsStyle(seurat_object[["ATAC"]]) <- "UCSC" # Ensure consistent chromosome naming
    # genome(seurat_object[["ATAC"]]) <- "hg38" # Placeholder
    
    # Perform initial processing (e.g., calculating QC metrics, normalization)
    # seurat_object <- NucleosomeSignal(object = seurat_object)
    # seurat_object <- TSSEnrichment(object = seurat_object, fast = FALSE)
    # seurat_object <- NormalizeData(object = seurat_object, assay = "ATAC", normalization.method = "CLR")
    # seurat_object <- FindVariableFeatures(object = seurat_object, assay = "ATAC")
    
    # Save the initial Seurat object
    saveRDS(seurat_object, file = file.path(signac_output_dir, "initial_seurat_object.rds"))
    
    message(paste("Initial Seurat object saved to:", file.path(signac_output_dir, "initial_seurat_object.rds")))
    message("Signac analysis script finished. Further analysis steps can be added.")
    EOF
    
    # Execute the R script
    CELLRANGER_OUTPUT_DIR="${CELLRANGER_OUTPUT_DIR}" SIGNAC_OUTPUT_DIR="${SIGNAC_OUTPUT_DIR}" Rscript "${SIGNAC_OUTPUT_DIR}/signac_analysis.R"
  3. 3

    Annotation, quality control, dimensional reduction, and UMAPcreationwerecompleted using the standard Signac workflow.

    Signac v1.x.x GitHub
    $ Bash example
    # Install R and necessary packages (commented out)
    # sudo apt-get update
    # sudo apt-get install -y r-base
    # R -e 'install.packages("BiocManager", repos="https://cloud.r-project.org")'
    # R -e 'BiocManager::install(c("Seurat", "Signac", "EnsDb.Hsapiens.v86"))' # Example annotation package for hg38
    
    # Create an R script for the Signac workflow
    cat << 'EOF' > run_signac_workflow.R
    # Load necessary libraries
    library(Seurat)
    library(Signac)
    library(GenomeInfoDb)
    library(EnsDb.Hsapiens.v86) # Example: Human genome annotation for hg38
    
    # --- Placeholder for input data and paths ---
    # Replace with actual paths to your 10x Genomics ATAC-seq data
    # For example:
    # input_matrix_path <- "path/to/filtered_feature_bc_matrix.h5"
    # input_fragments_path <- "path/to/fragments.tsv.gz"
    # output_seurat_object_path <- "processed_seurat_object.rds"
    
    # --- Reference Data ---
    # Get gene annotations for hg38 (or your relevant genome assembly)
    # This is used for the annotation step.
    annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
    seqlevelsStyle(annotations) <- 'UCSC'
    genome(annotations) <- "hg38" # Specify genome assembly
    
    # --- Standard Signac Workflow Steps (conceptual) ---
    # 1. Load data and create Seurat object
    #    (e.g., from 10x Genomics ATAC-seq output)
    # seurat_object <- CreateSeuratObject(
    #   counts = Read10x_h5(input_matrix_path)[["ATAC"]],
    #   assay = "ATAC",
    #   project = "ATAC_Project"
    # )
    # fragments <- CreateFragmentObject(input_fragments_path)
    # seurat_object <- SetAssayData(seurat_object, slot = "fragments", new.data = fragments)
    
    # 2. Annotation
    # seurat_object <- AddGeneAnnotation(object = seurat_object, annotation = annotations)
    
    # 3. Quality Control
    #    (e.g., filtering based on fragment counts, mitochondrial content)
    # seurat_object[["percent.mt"]] <- PercentageFeatureSet(seurat_object, pattern = "^MT-")
    # seurat_object <- subset(
    #   x = seurat_object,
    #   subset = nCount_ATAC < 100000 & # Example upper bound
    #            nCount_ATAC > 500 &    # Example lower bound
    #            percent.mt < 5         # Example mitochondrial content threshold
    # )
    
    # 4. Normalization and Dimensional Reduction (LSI)
    # seurat_object <- RunTFIDF(seurat_object)
    # seurat_object <- FindTopFeatures(seurat_object, min.cutoff = 'q0')
    # seurat_object <- RunSVD(seurat_object)
    
    # 5. UMAP creation
    # seurat_object <- RunUMAP(object = seurat_object, reduction = 'lsi', dims = 2:30)
    
    # 6. Save processed object (optional)
    # saveRDS(seurat_object, file = output_seurat_object_path)
    
    print("Signac workflow steps conceptually outlined.")
    print("Please replace placeholder comments with actual data loading and adjust parameters as needed.")
    EOF
    
    # Execute the R script
    Rscript run_signac_workflow.R
Raw Source Text
Cell Ranger ATAC (v6.0.1) was used to process sequencing information and single cell barcodes.
Cellranger outputs were analyzed in R using Signac (v1.6.0)
Annotation, quality control, dimensional reduction, and UMAPcreationwerecompleted using the standard Signac workflow.
Assembly: mm10
Supplementary files format and content: 10x Genomics output files used: filtered_peak_bc_matrix.h5, singlecell.csv, fragments.tsv.gz, peaks.bed
← Back to Analysis