Skip to content
Snippets Groups Projects
Commit 3a071c54 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

adding variantcalling

parent f649f687
Branches
Tags
No related merge requests found
......@@ -15,7 +15,7 @@ OPTIND=1 # Reset OPTIND
while getopts :r:b:n:p:h opt
do
case $opt in
r) refgeno=$OPTARG;;
r) index_path=$OPTARG;;
b) sbam=$OPTARG;;
c) bed=$OPTARG;;
y) nuctype=$OPTARG;;
......@@ -27,13 +27,7 @@ done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage
fi
if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ]; then
index_path=/project/shared/bicf_workflow_ref/${refgeno}
else
if [[ -z $pair_id ]] || [[ -z $sbam ]]; then
usage
fi
......
......@@ -15,7 +15,7 @@ OPTIND=1 # Reset OPTIND
while getopts :r:a:x:y:p:h opt
do
case $opt in
r) refgeno=$OPTARG;;
r) index_path=$OPTARG;;
x) fq1=$OPTARG;;
y) fq2=$OPTARG;;
a) algo=$OPTARG;;
......@@ -31,12 +31,6 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage
fi
if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ]; then
index_path=/project/shared/bicf_workflow_ref/${refgeno}
else
usage
fi
module load samtools/gcc/1.6 picard/2.10.3
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
......@@ -47,9 +41,9 @@ then
if ($fq1 != $fq2)
then
module load star/2.4.2a
STAR --genomeDir ${index_path}/star_index/ --readFilesIn ${fq1} ${fq2} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
STAR --genomeDir ${index_path}/star_index/ --readFilesIn $fq1 $fq2 --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
else
STAR --genomeDir ${index_path}/${star_index} --readFilesIn ${fq1} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
STAR --genomeDir ${index_path}/star_index/ --readFilesIn $fq1 --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
fi
mv outLog.final.out ${pair_id}.alignerout.txt
mv outAligned.sortedByCoord.out.bam output.bam
......@@ -57,9 +51,9 @@ else
module load hisat2/2.1.0-intel
if [ $fq1 == $fq2 ]
then
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -U ${fq1} -S out.sam --summary-file ${pair_id}.alignerout.txt
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt
else
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -1 ${fq1} -2 ${fq2} -S out.sam --summary-file ${pair_id}.alignerout.txt
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt
fi
samtools view -1 --threads $SLURM_CPUS_ON_NODE -o output.bam out.sam
fi
......
......@@ -29,11 +29,7 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage
fi
if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ]; then
index_path=/project/shared/bicf_workflow_ref/${refgeno}/CTAT_lib/
else
usage
fi
index_path=${refgeno}/CTAT_lib/
module add python/2.7.x-anaconda star/2.5.2b
STAR-Fusion --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --output_dir star_fusion &> star_fusion.err
......
#!/bin/bash
#rnaseqalign.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-b --BAM File"
echo "-g --GTF File"
echo "-s --stranded"
echo "-p --Prefix for output file name"
echo "Example: bash geneabudance.sh genect_rnaseq/geneabundance.sh -s stranded -g gtf_file -p pair_id -b bam"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :b:g:p:s:h opt
do
case $opt in
b) sbam=$OPTARG;;
g) gtf=$OPTARG;;
p) pair_id=$OPTARG;;
s) stranded=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $sbam ]]
then
usage
fi
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
module load subread/1.5.0-intel stringtie/1.1.2-intel
featureCounts -s $stranded -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam}
mkdir ${pair_id}_stringtie
cd ${pair_id}_stringtie
stringtie ../${sbam} -p $SLURM_CPUS_ON_NODE -G ../${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt
\ No newline at end of file
......@@ -2,7 +2,7 @@
#gatkrunner.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-h Help documentation for gatkrunner.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-b --BAM File"
echo "-p --Prefix for output file name"
......@@ -14,7 +14,7 @@ OPTIND=1 # Reset OPTIND
while getopts :r:a:b:p:h opt
do
case $opt in
r) refgeno=$OPTARG;;
r) index_path=$OPTARG;;
b) sbam=$OPTARG;;
p) pair_id=$OPTARG;;
a) algo=$OPTARG;;
......@@ -25,13 +25,8 @@ done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $bam ] || [[ -z $refgeno ]]]; then
usage
fi
if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ] || [ $refgeno == 'GRCh37' ]; then
index_path=/project/shared/bicf_workflow_ref/${refgeno}
else
if [[ -z $pair_id ]] || [[ -z $sbam ]] || [[ -z $index_path ]]
then
usage
fi
......@@ -39,23 +34,47 @@ if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
dbsnp="${index_path}/Snp.vcf.gz"
knownindel="${index_path}/Snp.vcf.gz"
reffa="${index_path}/genome.fa"
if [[ -a "${index_path}/dbSnp.vcf.gz" ]]
then
dbsnp="${index_path}/dbSnp.vcf.gz"
else
echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz"
usage
fi
if [[ -a "${index_path}/GoldIndels.vcf.gz" ]]
then
knownindel="${index_path}/GoldIndels.vcf.gz"
else
echo "Missing InDel File: ${index_path}/GoldIndels.vcf.gz"
usage
fi
if [[ -a "${index_path}/genome.fa" ]]
then
reffa="${index_path}/genome.fa"
else
echo "Missing Fasta File: ${index_path}/genome.fa"
usage
fi
module load gatk/3.7 samtools/1.6
samtools index -@ $SLURM_CPUS_ON_NODE ${sbam}
if [[ $algo == 'gatkbam' ]]
if [[ $algo == 'gatkbam_rna' ]]
then
module load picard/2.10.3
java -Xmx4g -jar $PICARD/picard.jar CleanSam INPUT=${sbam} OUTPUT=${pair_id}.clean.bam
#java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id}
samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.clean.bam
java -Xmx4g -jar $GATK_JAR -T SplitNCigarReads -R ${reffa} -I ${pair_id}.clean.bam -o ${pair_id}.split.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS
java -Xmx32g -jar $GATK_JAR -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${pair_id}.split.bam -nt $SLURM_CPUS_ON_NODE -nct 1
java -Xmx32g -jar $GATK_JAR -I ${pair_id}.split.bam -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam
java -Xmx32g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct $SLURM_CPUS_ON_NODE
java -Xmx32g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct $SLURM_CPUS_ON_NODE
elif [[ $algo == 'gatkbam' ]]
then
samtools index ${sbam}
java -Xmx32g -jar $GATK_JAR -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${sbam} -nt $SLURM_CPUS_ON_NODE -nct 1
java -Xmx32g -jar $GATK_JAR -I ${sbam} -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam
java -Xmx32g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct $SLURM_CPUS_ON_NODE
java -Xmx32g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8
elif
then
else
java -Xmx32g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct $SLURM_CPUS_ON_NODE
fi
usage() {
echo "-h Help documentation for gatkrunner.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-p --Prefix for output file name"
echo "-a --Algorithm/Command"
echo "Example: bash hisat.sh -p prefix -r GRCh38"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:a:b:p:h opt
do
case $opt in
r) refgeno=$OPTARG;;
p) pair_id=$OPTARG;;
a) algo=$OPTARG;;
h) usage;;
esac
done
function join_by { local IFS="$1"; shift; echo "$*"; }
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $bam ] || [[ -z $refgeno ]]]; then
usage
fi
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
if [[ -a "${index_path}/dbSnp.vcf.gz" ]]
then
dbsnp="${index_path}/dbSnp.vcf.gz"
else
echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz"
usage
fi
if [[ -a "${index_path}/GoldIndels.vcf.gz" ]]
then
knownindel="${index_path}/GoldIndels.vcf.gz"
else
echo "Missing InDel File: ${index_path}/GoldIndels.vcf.gz"
usage
fi
if [[ -a "${index_path}/genome.fa" ]]
then
reffa="${index_path}/genome.fa"
else
echo "Missing Fasta File: ${index_path}/genome.fa"
usage
fi
module load python/2.7.x-anaconda samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14
for i in *.bam; do
samtools index -@ $SLURM_CPUS_ON_NODE $i
done
if [[ $algo == 'mpileup' ]]
then
samtools mpileup -t 'AD,DP,INFO/AD' -ug -Q20 -C50 -f ${reffa} *.bam | bcftools call -vmO z -o ${pair_id}.sam.ori.vcf.gz
vcf-sort ${pair_id}.sam.ori.vcf.gz | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.sam.vcf.gz -
elif [[ $algo == 'hotspot' ]]
then
samtools mpileup -d 99999 -t 'AD,DP,INFO/AD' -uf ${reffa} *.bam > ${pair_id}.mpi
bcftools filter -i "AD[1]/DP > 0.01" ${pair_id}.mpi | bcftools filter -i "DP > 50" | bcftools call -m -A |vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.lowfreq.vcf.gz -
java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "(CNT[*] >0)" - |bgzip > ${pair_id}.hotspot.vcf.gz
elif
then [[ $algo == 'speedseq' ]]
module load speedseq/20160506
speedseq var -t $SLURM_CPUS_ON_NODE -o ssvar ${reffa} *.bam
vcf-annotate -n --fill-type ssvar.vcf.gz| bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.ssvar.vcf.gz -
elif [[ $algo == 'gatk' ]]
then
module load gatk/3.7
$gvcflist = ''
for i in *.bam; do
java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T HaplotypeCaller -stand_call_conf 30 -stand_emit_conf 10.0 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -variant_index_type LINEAR -variant_index_parameter 128000 --emitRefConfidence GVCF -I $i -o ${i}.gatk.g.vcf -nct 2 &
$gvcflist += "--variant ${i}.gatk.g.vcf "
done
wait
java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T GenotypeGVCFs -o gatk.vcf -nt $SLURM_CPUS_ON_NODE $gvcflist
vcf-annotate -n --fill-type gatk.vcf | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.gatk.vcf.gz -
tabix ${pair_id}.gatk.vcf.gz
elif
then [[ $algo == 'platypus' ]]
module load platypus/gcc/0.8.1
$bamlist = join_by , *.bam
Platypus.py callVariants --minMapQual=10 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf
vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz
tabix platypus.vcf.gz
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${i}.pl.vcf.gz platypus.vcf.gz
fi
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment