From d50d6b99fca5402335bc4722cfa1741bd08509f9 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <Venkat.Malladi@utsouthwestern.edu>
Date: Tue, 16 Aug 2016 14:09:56 -0500
Subject: [PATCH] Call peaks for H3K4me3.

---
 call-peaks-macs-single.sh | 169 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 169 insertions(+)
 create mode 100644 call-peaks-macs-single.sh

diff --git a/call-peaks-macs-single.sh b/call-peaks-macs-single.sh
new file mode 100644
index 0000000..6e78c91
--- /dev/null
+++ b/call-peaks-macs-single.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# call-peaks-macs.sh
+
+script_name="call-peaks-macs2.sh"
+script_ver="2.1.0"
+
+#Help function
+usage() {
+  echo "-h  --Help documentation for $script_name"
+  echo "-f  --File path to experiment tagAlign files."
+  echo "-s  --File path to control tagAlign files."
+  echo "-x  --File path to experiment cross-correlation scores."
+  echo "-r  --UCSC Reference genome (e.g. hg19, mm10)"
+  echo "-o  --Path to output directory"
+  echo "-v  --Version of script"
+  echo "Example: $script_name -f 'foo1.tagAlign.gz' -s 'con1.tagAlign.gz' -x 'foo1.cc' -r 'hg19' [-o '/path/to/output/dir/']"
+  exit 1
+}
+
+# Version function
+version(){
+    echo "$script_name $script_ver"
+    exit 1
+}
+
+
+# Peak calling function
+call_peak() {
+    # Establish variables
+
+    experiment=$1
+    control=$2
+    xcor_scores_input=$3
+    chrom_sizes=$4
+    genomesize=$5
+    out_dir=$6
+
+    #Extract the fragment length estimate from cross-correlation scores file
+    fraglen=`cat $xcor_scores_input |  grep "predicted" | cut -f6 -d ' '`
+    echo $fraglen
+
+    # Generate narrow peaks and preliminary signal tracks
+    base_fn=$(basename "${experiment}")
+    prefix=${base_fn%.tagAlign.gz}
+    macs2 callpeak -t $experiment -c $control -f BED -n $out_dir/$prefix -g $genomesize -p 1e-5 --nomodel --shift 0 --extsize $fraglen --keep-dup all -B --SPMR
+
+    # Generate fold enrichment signal tracks
+    macs2 bdgcmp -t $out_dir/$prefix\_treat_pileup.bdg -c $out_dir/$prefix\_control_lambda.bdg --outdir $out_dir -o $prefix\_FE.bdg -m FE
+
+    # Genearte bigWigs from bedgraph to support vizualization
+    bedtools slop -i $out_dir/$prefix\_FE.bdg -g /$chrom_sizes -b 0 | bedClip stdin $chrom_sizes $out_dir/$prefix.fc.signal.bedgraph
+    bedSort $out_dir/$prefix.fc.signal.bedgraph $out_dir/$prefix.sorted.fc.signal.bedgraph
+    bedGraphToBigWig $out_dir/$prefix.sorted.fc.signal.bedgraph $chrom_sizes $out_dir/$prefix.fc_signal.bw
+    rm $out_dir/$prefix.fc.signal.bedgraph
+    rm $out_dir/$prefix.sorted.fc.signal.bedgraph
+
+    # Generate fold enrichment signal tracks
+
+    # Compute sval = min(no. of reads in ChIP, no. of reads in control) / 1,000,000
+    experiment_reads=`gzip -dc $experiment | wc -l | cut -f1`
+    control_reads=`gzip -dc $control | wc -l | cut -f1`
+    if [ $experiment_reads -ge $control_reads ]; then
+        min=$control_reads
+    else
+        min=$experiment_reads
+    fi
+    sval=`echo $min/1000000 | bc -l`
+
+    macs2 bdgcmp -t $out_dir/$prefix\_treat_pileup.bdg -c $out_dir/$prefix\_control_lambda.bdg --outdir $out_dir -o $prefix\_ppois.bdg -m ppois -S $sval
+
+    # Genearte bigWigs from bedgraph to support vizualization
+    bedtools slop -i $out_dir/$prefix\_ppois.bdg -g $chrom_sizes -b 0 | bedClip stdin $chrom_sizes $out_dir/$prefix.pval.signal.bedgraph
+    bedSort $out_dir/$prefix.pval.signal.bedgraph $out_dir/$prefix.sorted.pval.signal.bedgraph
+    bedGraphToBigWig $out_dir/$prefix.sorted.pval.signal.bedgraph $chrom_sizes $out_dir/$prefix.pval_signal.bw
+    rm $out_dir/$prefix.pval.signal.bedgraph
+    rm $out_dir/$prefix.sorted.pval.signal.bedgraph
+}
+
+main(){
+
+    # Load required modules
+    module load python/2.7.x-anaconda
+    module load R/3.1.0-intel
+    module load macs/2.1.0-20151222
+    module load gcc/4.8.1
+    module load bedtools/2.17.0
+    module load UCSC_userApps/v317
+
+    # Parsing options
+    OPTIND=1 # Reset OPTIND
+    while getopts :f:s:x:r:o:hv opt
+        do
+            case $opt in
+                f) aln=$OPTARG;;
+                s) control=$OPTARG;;
+                x) aln_cross=$OPTARG;;
+                r) ucsc_reference=$OPTARG;;
+                o) out=$OPTARG;;
+                h) usage;;
+                v) version;;
+            esac
+        done
+
+    shift $(($OPTIND -1))
+
+    # Check for mandatory options
+    if [[ -z $aln ]] || [[ -z $control ]] || [[ -z $ucsc_reference ]] || [[ -z $aln_cross ]]; then
+        usage
+    fi
+
+    # Check if length of arguments in aln1, aln2 and exp are the same
+    array_aln=(${aln//[,| ]/ })
+    array_control=(${control//[,| ]/ })
+    array_aln_cross=(${aln_cross//[,| ]/ })
+    # Define the genome size to use
+    if [ $ucsc_reference = 'hg19' ]; then
+        genome_size='hs'
+    elif [ $ucsc_reference = 'mm10' ]; then
+        genome_size='mm'
+    elif [ $ucsc_reference = 'mm9' ]; then
+        genome_size='mm'
+    else
+        usage
+    fi
+
+    # Define the output directory, if none defined make the location relative to first file
+    if [ -z $out ]; then
+        one_parent=$(dirname "${aln}")
+        out_dir=$(dirname "${one_parent}")\/$script_name-$script_ver/
+    else
+        out_dir=$out\/$script_name-$script_ver
+    fi
+
+    if [ ! -d $out_dir ]; then
+        mkdir $out_dir
+    fi
+
+    # Align if file doesn't exist
+    if [ ! -e $out_dir/metadata.json ]; then
+
+        # split files
+        rep1=${aln}
+        con1=${control}
+        rep1_xcor=${aln_cross}
+
+        echo "* Downloading chrom.sizes..."
+        fetchChromSizes $ucsc_reference > $out_dir/chrom.sizes
+        chrom_sizes=$out_dir/chrom.sizes
+
+        # Call peaks Replicates 1 and 2
+        call_peak $rep1 $con1 $rep1_xcor $chrom_sizes $genome_size $out_dir
+
+        # Remove chrom sizes
+        rm $out_dir/chrom.sizes
+
+        # Get input and output files and then print out metadata.json file
+        input_files=("${array_aln[@]}" "${array_control[@]}" "${array_aln_cross[@]}" "$ucsc_reference")
+        printf -v input "\"%s\"," "${input_files[@]}"
+        input=${input%,}
+        output_file=($out_dir\/*)
+        printf -v output "\"%s\"," "${output_file[@]}"
+        output=${output%,}
+        printf '{"script name":"%s","script version":"%s", "input files": [%s], "output files": [%s]}' "$script_name" "$script_ver" "$input" "$output"  | python -m json.tool > $out_dir/metadata.json
+    else
+        echo "* Peaks have been called"
+    fi
+}
+
+main "$@"
-- 
GitLab