diff --git a/gene-counts.sh b/gene-counts.sh new file mode 100644 index 0000000000000000000000000000000000000000..8b6855e5669e4cd86c9bda7884768b5e589fdde4 --- /dev/null +++ b/gene-counts.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# gene-counts.sh + +script_name="gene-counts.sh" +script_ver="1.0.0" + +#Help function +usage() { + echo "-h Help documentation for $script_name" + echo "-a --Path to bam file" + echo "-b --Path to bam file2" + echo "-g --Path to the annotation file" + echo "-o --Path to output directory" + echo "-e --The experiment names." + echo "-v --Version of script" + echo "Example: $script_name -a 'foo_1.bam' -b 'foo_2.bam' -g '/path/to/gtf/gencode.v19.annotation.gtf' [-o '/path/to/output/dir/']" + exit 1 +} + +# Version function +version(){ + echo "$script_name $script_ver" + exit 1 +} + +main(){ + + + # Load required modules + module load RSEM/1.2.31 + module load python/2.7.x-anaconda + module load picard/1.127 + + # Parsing options + OPTIND=1 # Reset OPTIND + while getopts :a:b:s:g:e:o:vh opt + do + case $opt in + a) aln_file_2=$OPTARG;; + b) aln_file_1=$OPTARG;; + g) transcriptome=$OPTARG;; + e) exp=$OPTARG;; + o) out=$OPTARG;; + v) version;; + h) usage;; + esac + done + + shift $(($OPTIND -1)) + + # Check for mandatory options + if [[ -z $aln_file_1 ]] || [[ -z $aln_file_2 ]] || [[ -z $transcriptome ]] || [[ -z $exp ]]; then + usage + fi + + # Define the output directory, if none defined make the location relative to first file + if [ -z $out ]; then + one_parent=$(dirname "${aln_file_1}") + out_dir=$(dirname "${one_parent}")\/$script_name-$script_ver/ + else + out_dir=$out\/$script_name-$script_ver + fi + + if [ ! -d $out_dir ]; then + mkdir $out_dir + fi + + + # If filtered alignment file doesn't exist + if [ ! -e $out_dir/$exp.counts ]; then + + + java -Xmx2g -jar $PICARD/picard.jar MergeSamFiles OUTPUT=$out_dir/$exp.bam INPUT=$aln_file_1 INPUT=$aln_file_2 + + rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 $out_dir/$exp.bam $transcriptome $out_dir/$output_fp + + rm $out_dir/$exp.bam + + # Get input and output files and then print out metadata.json file + input_files=("$aln_file_1", "$aln_file_2", "$transcriptome") + printf -v input "\"%s\"," "${input_files[@]}" + input=${input%,} + output_file=($out_dir/$output_fp*) + printf -v output "\"%s\"," "${output_file[@]}" + output=${output%,} + printf '{"script name":"%s","script version":"%s", "input files": [%s], "output files": [%s]}' "$script_name" "$script_ver" "$input" "$output" | python -m json.tool > $out_dir/$output_fp.metadata.json + + else + echo "* Counts have already been called for $raw_fn" + fi +} + +main "$@" diff --git a/make-normal-signal.sh b/make-normal-signal.sh new file mode 100755 index 0000000000000000000000000000000000000000..cad6caf5e410875eb389662cd72150eedcfbbe63 --- /dev/null +++ b/make-normal-signal.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# make-normal-signal.sh + +script_name="make-normal-signal.sh" +script_ver="1.0.0" + +#Help function +usage() { + echo "-h Help documentation for $script_name"\\n + echo "-f --File path to Replicate 1 alignment." + echo "-s --File path to Replicate 2 alignment." + echo "-r --UCSC Reference genome (e.g. hg19, mm10)" + echo "-e --The experiment names." + echo "-o --The ouput directory." + echo "Example: $script_name -f 'foo1.bam' -s 'foo2.bam man2.bam' -r 'hg19' -e 'foo man' -o output" + exit 1 +} + +# Version function +version(){ + echo "$script_name $script_ver" + exit 1 +} + +main(){ + + # Load required modules + module load python/2.7.x-anaconda + module load star/2.4.2a + module load picard/1.127 + module load UCSC_userApps/v317 + + # Parsing options + OPTIND=1 # Reset OPTIND + while getopts :f:s:r:e:o:hv opt + do + case $opt in + f) aln1=$OPTARG;; + s) aln2=$OPTARG;; + r) ucsc_reference=$OPTARG;; + e) exp=$OPTARG;; + o) out=$OPTARG;; + h) usage;; + v) version;; + esac + done + + shift $(($OPTIND -1)) + + # Check for mandatory options + if [[ -z $aln1 ]] || [[ -z $aln2 ]] || [[ -z $ucsc_reference ]] || [[ -z $exp ]] || [[ -z $out ]]; then + usage + fi + + # Define the out directory + out_dir=$out\/$script_name-$script_ver + + # Make sure directories exist + if [ ! -e $out ]; then + mkdir $out + fi + + # Run make-normal-signal.R if directory doesn't exist + if [ ! -d $out_dir ]; then + mkdir $out_dir + # Fetch chrom sizes + echo "* Downloading chrom.sizes..." + fetchChromSizes $ucsc_reference > $out_dir/chrom.sizes + + java -Xmx2g -jar $PICARD/picard.jar MergeSamFiles OUTPUT=$out_dir/$exp.bam INPUT=$aln1 INPUT=$aln2 + cd $out_dir + STAR --runMode inputAlignmentsFromBAM --inputBAMfile $exp.bam --outWigType bedGraph --outWigStrand Stranded --outFileNamePrefix $exp --outWigReferencesPrefix chr + bedSort $out_dir/$exp\Signal.UniqueMultiple.str1.out.bg $out_dir/$exp\Signal.UniqueMultiple.str1.out.sorted.bg + bedSort $out_dir/$exp\Signal.UniqueMultiple.str2.out.bg $out_dir/$exp\Signal.UniqueMultiple.str2.out.sorted.bg + bedGraphToBigWig $out_dir/$exp\Signal.UniqueMultiple.str1.out.sorted.bg $out_dir/chrom.sizes $out_dir/$exp.negative.bw + bedGraphToBigWig $out_dir/$exp\Signal.UniqueMultiple.str2.out.sorted.bg $out_dir/chrom.sizes $out_dir/$exp.positive.bw + rm $out_dir/$exp*.bg + rm $out_dir/$exp.bam + + # Get input and output files and then print out metadata.json file + input_files=("${array_aln1[@]}" "${array_aln2[@]}") + printf -v input "\"%s\"," "${input_files[@]}" + input=${input%,} + output_file=($out_dir\/*) + printf -v output "\"%s\"," "${output_file[@]}" + output=${output%,} + printf '{"script name":"%s","script version":"%s", "input files": [%s], "output files": [%s]}' "$script_name" "$script_ver" "$input" "$output" | python -m json.tool > $out_dir/metadata.json + else + echo "* Normalized Signal has been made. " + fi +} + +main "$@"