Skip to content
Snippets Groups Projects
gene-counts.sh 2.63 KiB
Newer Older
#!/bin/bash
# gene-counts.sh

script_name="gene-counts.sh"
script_ver="1.0.0"

#Help function
usage() {
  echo "-h Help documentation for $script_name"
  echo "-a  --Path to bam file"
  echo "-b  --Path to bam file2"
  echo "-g  --Path to the annotation file"
  echo "-o  --Path to output directory"
  echo "-e  --The experiment names."
  echo "-v  --Version of script"
  echo "Example: $script_name -a 'foo_1.bam' -b 'foo_2.bam' -g '/path/to/gtf/gencode.v19.annotation.gtf' [-o '/path/to/output/dir/']"
  exit 1
}

# Version function
version(){
    echo "$script_name $script_ver"
    exit 1
}

main(){


    # Load required modules
    module load RSEM/1.2.31
    module load python/2.7.x-anaconda
    module load picard/1.127

    # Parsing options
    OPTIND=1 # Reset OPTIND
    while getopts :a:b:s:g:e:o:vh opt
        do
            case $opt in
                a) aln_file_2=$OPTARG;;
		b) aln_file_1=$OPTARG;;
                g) transcriptome=$OPTARG;;
		e) exp=$OPTARG;;
                o) out=$OPTARG;;
                v) version;;
                h) usage;;
            esac
        done

    shift $(($OPTIND -1))

    # Check for mandatory options
    if [[ -z $aln_file_1 ]] || [[ -z $aln_file_2 ]] || [[ -z $transcriptome ]] || [[ -z $exp ]]; then
        usage
    fi

    # Define the output directory, if none defined make the location relative to first file
    if [ -z $out ]; then
        one_parent=$(dirname "${aln_file_1}")
        out_dir=$(dirname "${one_parent}")\/$script_name-$script_ver/
    else
        out_dir=$out\/$script_name-$script_ver
    fi

    if [ ! -d $out_dir ]; then
        mkdir $out_dir
    fi


    # If filtered alignment file doesn't exist
    if [ ! -e $out_dir/$exp.counts ]; then


	java -Xmx2g -jar $PICARD/picard.jar MergeSamFiles OUTPUT=$out_dir/$exp.bam INPUT=$aln_file_1 INPUT=$aln_file_2

	rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000  $out_dir/$exp.bam $transcriptome $out_dir/$output_fp

	rm $out_dir/$exp.bam

        # Get input and output files and then print out metadata.json file
        input_files=("$aln_file_1", "$aln_file_2", "$transcriptome")
        printf -v input "\"%s\"," "${input_files[@]}"
        input=${input%,}
        output_file=($out_dir/$output_fp*)
        printf -v output "\"%s\"," "${output_file[@]}"
        output=${output%,}
        printf '{"script name":"%s","script version":"%s", "input files": [%s], "output files": [%s]}' "$script_name" "$script_ver" "$input" "$output"  | python -m json.tool > $out_dir/$output_fp.metadata.json

    else
        echo "* Counts have already been called for $raw_fn"
    fi
}

main "$@"