#!/usr/bin/env bash
# Bulk download script for GSE86791
# Generated from Yeo Lab Publications Database
# Total files: 22

OUTDIR="GSE86791"
mkdir -p "$OUTDIR"
cd "$OUTDIR"

# --- GEO supplementary files ---
wget -nc "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE86nnn/GSE86791/suppl/GSE86791_RAW.tar" -O "GSE86791_RAW.tar"
wget -nc "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE86nnn/GSE86791/suppl/GSE86791_Table_S3_Host_APA_events.xlsx" -O "GSE86791_Table_S3_Host_APA_events.xlsx"
wget -nc "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE86nnn/GSE86791/suppl/GSE86791_Table_S6_AS_Changes_siCPEB1_CPEB1OE_HFFs_RNA-seq.xlsx" -O "GSE86791_Table_S6_AS_Changes_siCPEB1_CPEB1OE_HFFs_RNA-seq.xlsx"
wget -nc "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE86nnn/GSE86791/suppl/GSE86791_Table_S8_PolyA_Tail_Lengths_TAIL-seq.xlsx" -O "GSE86791_Table_S8_PolyA_Tail_Lengths_TAIL-seq.xlsx"

# --- SRA run files ---
# Tip: use 'fasterq-dump' from SRA Toolkit for FASTQ conversion
#      with optional renaming to original submitted FASTQ names.
# Set to 1 to rename SRR outputs (e.g. SRR_1.fastq.gz -> sample_R1.fastq.gz)
RENAME_SRA_TO_ORIGINAL=1

_compress_if_exists() {
  local stem="$1"
  if [ -f "${stem}.fastq" ]; then
    gzip -f "${stem}.fastq"
    echo "${stem}.fastq.gz"
    return 0
  fi
  if [ -f "${stem}.fq" ]; then
    gzip -f "${stem}.fq"
    echo "${stem}.fq.gz"
    return 0
  fi
  return 1
}

# Option A: Download via SRA Toolkit (recommended)
fasterq-dump --split-files "SRR4228549"
gz1="$(_compress_if_exists 'SRR4228549_1' || true)"
gz2="$(_compress_if_exists 'SRR4228549_2' || true)"
gzs="$(_compress_if_exists 'SRR4228549' || true)"
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz1" ]; then
  mv -f "$gz1" "Mock_R1_001.fastq.gz"
fi
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz2" ]; then
  mv -f "$gz2" "Mock_R2_001.fastq.gz"
fi

fasterq-dump --split-files "SRR4228550"
gz1="$(_compress_if_exists 'SRR4228550_1' || true)"
gz2="$(_compress_if_exists 'SRR4228550_2' || true)"
gzs="$(_compress_if_exists 'SRR4228550' || true)"
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz1" ]; then
  mv -f "$gz1" "HCMV_R1_001.fastq.gz"
fi
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz2" ]; then
  mv -f "$gz2" "HCMV_R2_001.fastq.gz"
fi

fasterq-dump --split-files "SRR4228551"
gz1="$(_compress_if_exists 'SRR4228551_1' || true)"
gz2="$(_compress_if_exists 'SRR4228551_2' || true)"
gzs="$(_compress_if_exists 'SRR4228551' || true)"
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz1" ]; then
  mv -f "$gz1" "siCPEB1_R1_001.fastq.gz"
fi
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz2" ]; then
  mv -f "$gz2" "siCPEB1_R2_001.fastq.gz"
fi

fasterq-dump --split-files "SRR4228552"
gz1="$(_compress_if_exists 'SRR4228552_1' || true)"
gz2="$(_compress_if_exists 'SRR4228552_2' || true)"
gzs="$(_compress_if_exists 'SRR4228552' || true)"
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz1" ]; then
  mv -f "$gz1" "GFP_OE_R1_001.fastq.gz"
fi
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz2" ]; then
  mv -f "$gz2" "GFP_OE_R2_001.fastq.gz"
fi

fasterq-dump --split-files "SRR4228553"
gz1="$(_compress_if_exists 'SRR4228553_1' || true)"
gz2="$(_compress_if_exists 'SRR4228553_2' || true)"
gzs="$(_compress_if_exists 'SRR4228553' || true)"
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz1" ]; then
  mv -f "$gz1" "CPEB1_OE1_R1_001.fastq.gz"
fi
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz2" ]; then
  mv -f "$gz2" "CPEB1_OE1_R2_001.fastq.gz"
fi

fasterq-dump --split-files "SRR4228554"
gz1="$(_compress_if_exists 'SRR4228554_1' || true)"
gz2="$(_compress_if_exists 'SRR4228554_2' || true)"
gzs="$(_compress_if_exists 'SRR4228554' || true)"
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz1" ]; then
  mv -f "$gz1" "CPEB1_OE2_R1_001.fastq.gz"
fi
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz2" ]; then
  mv -f "$gz2" "CPEB1_OE2_R2_001.fastq.gz"
fi

fasterq-dump --split-files "SRR4228555"
gz1="$(_compress_if_exists 'SRR4228555_1' || true)"
gz2="$(_compress_if_exists 'SRR4228555_2' || true)"
gzs="$(_compress_if_exists 'SRR4228555' || true)"
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz1" ]; then
  mv -f "$gz1" "uninf_R3.fastq.gz"
fi
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz2" ]; then
  mv -f "$gz2" "uninf_R5.fastq.gz"
fi

fasterq-dump --split-files "SRR4228556"
gz1="$(_compress_if_exists 'SRR4228556_1' || true)"
gz2="$(_compress_if_exists 'SRR4228556_2' || true)"
gzs="$(_compress_if_exists 'SRR4228556' || true)"
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz1" ]; then
  mv -f "$gz1" "inf_R3.fastq.gz"
fi
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz2" ]; then
  mv -f "$gz2" "inf_R5.fastq.gz"
fi

fasterq-dump --split-files "SRR4228557"
gz1="$(_compress_if_exists 'SRR4228557_1' || true)"
gz2="$(_compress_if_exists 'SRR4228557_2' || true)"
gzs="$(_compress_if_exists 'SRR4228557' || true)"
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz1" ]; then
  mv -f "$gz1" "infCPEB1_R3.fastq.gz"
fi
if [ "$RENAME_SRA_TO_ORIGINAL" = "1" ] && [ -n "$gz2" ]; then
  mv -f "$gz2" "infCPEB1_R5.fastq.gz"
fi


# Option B: Direct download (larger .sra files)
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228549/SRR4228549"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228550/SRR4228550"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228551/SRR4228551"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228552/SRR4228552"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228553/SRR4228553"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228554/SRR4228554"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228555/SRR4228555"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228556/SRR4228556"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228557/SRR4228557"

# --- Additional data files ---
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228557/SRR4228557"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228556/SRR4228556"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228555/SRR4228555"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228553/SRR4228553"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228554/SRR4228554"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228552/SRR4228552"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228550/SRR4228550"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228549/SRR4228549"
curl -L -O -C - "https://sra-pub-run-odp.s3.amazonaws.com/sra/SRR4228551/SRR4228551"

echo "Download complete. Files saved to $OUTDIR"