Skip to content
Snippets Groups Projects
Commit d50d6b99 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Call peaks for H3K4me3.

parent 43d157fc
Branches
No related merge requests found
#!/bin/bash
# call-peaks-macs.sh
script_name="call-peaks-macs2.sh"
script_ver="2.1.0"
#Help function
usage() {
echo "-h --Help documentation for $script_name"
echo "-f --File path to experiment tagAlign files."
echo "-s --File path to control tagAlign files."
echo "-x --File path to experiment cross-correlation scores."
echo "-r --UCSC Reference genome (e.g. hg19, mm10)"
echo "-o --Path to output directory"
echo "-v --Version of script"
echo "Example: $script_name -f 'foo1.tagAlign.gz' -s 'con1.tagAlign.gz' -x 'foo1.cc' -r 'hg19' [-o '/path/to/output/dir/']"
exit 1
}
# Version function
version(){
echo "$script_name $script_ver"
exit 1
}
# Peak calling function
call_peak() {
# Establish variables
experiment=$1
control=$2
xcor_scores_input=$3
chrom_sizes=$4
genomesize=$5
out_dir=$6
#Extract the fragment length estimate from cross-correlation scores file
fraglen=`cat $xcor_scores_input | grep "predicted" | cut -f6 -d ' '`
echo $fraglen
# Generate narrow peaks and preliminary signal tracks
base_fn=$(basename "${experiment}")
prefix=${base_fn%.tagAlign.gz}
macs2 callpeak -t $experiment -c $control -f BED -n $out_dir/$prefix -g $genomesize -p 1e-5 --nomodel --shift 0 --extsize $fraglen --keep-dup all -B --SPMR
# Generate fold enrichment signal tracks
macs2 bdgcmp -t $out_dir/$prefix\_treat_pileup.bdg -c $out_dir/$prefix\_control_lambda.bdg --outdir $out_dir -o $prefix\_FE.bdg -m FE
# Genearte bigWigs from bedgraph to support vizualization
bedtools slop -i $out_dir/$prefix\_FE.bdg -g /$chrom_sizes -b 0 | bedClip stdin $chrom_sizes $out_dir/$prefix.fc.signal.bedgraph
bedSort $out_dir/$prefix.fc.signal.bedgraph $out_dir/$prefix.sorted.fc.signal.bedgraph
bedGraphToBigWig $out_dir/$prefix.sorted.fc.signal.bedgraph $chrom_sizes $out_dir/$prefix.fc_signal.bw
rm $out_dir/$prefix.fc.signal.bedgraph
rm $out_dir/$prefix.sorted.fc.signal.bedgraph
# Generate fold enrichment signal tracks
# Compute sval = min(no. of reads in ChIP, no. of reads in control) / 1,000,000
experiment_reads=`gzip -dc $experiment | wc -l | cut -f1`
control_reads=`gzip -dc $control | wc -l | cut -f1`
if [ $experiment_reads -ge $control_reads ]; then
min=$control_reads
else
min=$experiment_reads
fi
sval=`echo $min/1000000 | bc -l`
macs2 bdgcmp -t $out_dir/$prefix\_treat_pileup.bdg -c $out_dir/$prefix\_control_lambda.bdg --outdir $out_dir -o $prefix\_ppois.bdg -m ppois -S $sval
# Genearte bigWigs from bedgraph to support vizualization
bedtools slop -i $out_dir/$prefix\_ppois.bdg -g $chrom_sizes -b 0 | bedClip stdin $chrom_sizes $out_dir/$prefix.pval.signal.bedgraph
bedSort $out_dir/$prefix.pval.signal.bedgraph $out_dir/$prefix.sorted.pval.signal.bedgraph
bedGraphToBigWig $out_dir/$prefix.sorted.pval.signal.bedgraph $chrom_sizes $out_dir/$prefix.pval_signal.bw
rm $out_dir/$prefix.pval.signal.bedgraph
rm $out_dir/$prefix.sorted.pval.signal.bedgraph
}
main(){
# Load required modules
module load python/2.7.x-anaconda
module load R/3.1.0-intel
module load macs/2.1.0-20151222
module load gcc/4.8.1
module load bedtools/2.17.0
module load UCSC_userApps/v317
# Parsing options
OPTIND=1 # Reset OPTIND
while getopts :f:s:x:r:o:hv opt
do
case $opt in
f) aln=$OPTARG;;
s) control=$OPTARG;;
x) aln_cross=$OPTARG;;
r) ucsc_reference=$OPTARG;;
o) out=$OPTARG;;
h) usage;;
v) version;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $aln ]] || [[ -z $control ]] || [[ -z $ucsc_reference ]] || [[ -z $aln_cross ]]; then
usage
fi
# Check if length of arguments in aln1, aln2 and exp are the same
array_aln=(${aln//[,| ]/ })
array_control=(${control//[,| ]/ })
array_aln_cross=(${aln_cross//[,| ]/ })
# Define the genome size to use
if [ $ucsc_reference = 'hg19' ]; then
genome_size='hs'
elif [ $ucsc_reference = 'mm10' ]; then
genome_size='mm'
elif [ $ucsc_reference = 'mm9' ]; then
genome_size='mm'
else
usage
fi
# Define the output directory, if none defined make the location relative to first file
if [ -z $out ]; then
one_parent=$(dirname "${aln}")
out_dir=$(dirname "${one_parent}")\/$script_name-$script_ver/
else
out_dir=$out\/$script_name-$script_ver
fi
if [ ! -d $out_dir ]; then
mkdir $out_dir
fi
# Align if file doesn't exist
if [ ! -e $out_dir/metadata.json ]; then
# split files
rep1=${aln}
con1=${control}
rep1_xcor=${aln_cross}
echo "* Downloading chrom.sizes..."
fetchChromSizes $ucsc_reference > $out_dir/chrom.sizes
chrom_sizes=$out_dir/chrom.sizes
# Call peaks Replicates 1 and 2
call_peak $rep1 $con1 $rep1_xcor $chrom_sizes $genome_size $out_dir
# Remove chrom sizes
rm $out_dir/chrom.sizes
# Get input and output files and then print out metadata.json file
input_files=("${array_aln[@]}" "${array_control[@]}" "${array_aln_cross[@]}" "$ucsc_reference")
printf -v input "\"%s\"," "${input_files[@]}"
input=${input%,}
output_file=($out_dir\/*)
printf -v output "\"%s\"," "${output_file[@]}"
output=${output%,}
printf '{"script name":"%s","script version":"%s", "input files": [%s], "output files": [%s]}' "$script_name" "$script_ver" "$input" "$output" | python -m json.tool > $out_dir/metadata.json
else
echo "* Peaks have been called"
fi
}
main "$@"
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment