Skip to content
Snippets Groups Projects
Commit 9e9fa0e7 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

upload alignment processes

parent 3090fd6a
Branches
Tags
No related merge requests found
#!/bin/bash
#trimgalore.sh
pair_id=$1
bam=$2
usage() {
echo "-h Help documentation for hisat.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-b --BAM File"
echo "-n --NucType"
echo "-p --Prefix for output file name"
echo "-c --Capture Bedfile"
echo "Example: bash hisat.sh -p prefix -r GRCh38 -a hisat -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:b:n:p:h opt
do
case $opt in
r) refgeno=$OPTARG;;
b) sbam=$OPTARG;;
c) bed=$OPTARG;;
y) nuctype=$OPTARG;;
p) pair_id=$OPTARG;;
h) usage;;
esac
done
module load samtools/1.4.1 fastqc/0.11.5
samtools flagstat ${bam} > ${pair_id}.flagstat.txt
fastqc -f bam ${bam}
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage
fi
if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ]; then
index_path=/project/shared/bicf_workflow_ref/${refgeno}
else
usage
fi
module load samtools/1.6 fastqc/0.11.5
samtools flagstat ${sbam} > ${pair_id}.flagstat.txt
fastqc -f bam ${sbam}
if [[ $nuctype == 'dna' ]]; then
module load bedtools/2.26.0 picard/2.10.3
samtools view -b --threads $SLURM_CPUS_ON_NODE -L ${bed} -o ${pair_id}.ontarget.bam ${sbam}
samtools index ${pair_id}.ontarget.bam
samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt
samtools view -b -q 1 ${pair_id}.ontarget.bam | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
samtools view -b -F 1024 ${pair_id}.ontarget.bam | bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b stdin -hist | grep ^all > ${pair_id}.dedupcov.txt
java -Xmx32g -jar \$PICARD/picard.jar CollectAlignmentSummaryMetrics R=${reffa} I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt
java -Xmx64g -jar \$PICARD/picard.jar EstimateLibraryComplexity I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.libcomplex.txt
samtools view -F 1024 ${pair_id}.ontarget.bam | awk '{sum+=\$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt
java -Xmx4g -jar \$PICARD/picard.jar CollectInsertSizeMetrics INPUT=${pair_id}.bam HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${reffa} OUTPUT=${pair_id}.hist.txt
samtools view -b -q 1 ${pair_id}.ontarget.bam | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt
perl $baseDir/scripts/calculate_depthcov.pl ${pair_id}.covhist.txt
grep ^all ${pair_id}.covhist.txt > ${pair_id}.genomecov.txt
fi
File moved
File moved
#!/bin/bash
#dnaseqalign.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-x --FastQ R1"
echo "-y --FastQ R2"
echo "-p --Prefix for output file name"
echo "Example: bash hisat.sh -p prefix -r GRCh38 -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:a:x:y:p:h opt
do
case $opt in
r) refgeno=$OPTARG;;
x) fq1=$OPTARG;;
y) fq2=$OPTARG;;
a) algo=$OPTARG;;
p) pair_id=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage
fi
if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ] || [ $refgeno == 'GRCh37' ]; then
index_path=/project/shared/bicf_workflow_ref/${refgeno}
else
usage
fi
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
module load bwakit/0.7.15 bwa/intel/0.7.15 samtools/1.6 bcftools/1.6 htslib/1.6 picard/2.10.3 speedseq/20160506
if ($fq1 != $fq2)
then
bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" ${index_path}/genome.fa ${fq1} > out.sam
else
bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" ${index_path}/genome.fa ${fq1} ${fq2} > out.sam
fi
if [ $refgeno == 'GRCh38' ]
then
cat out.sam | k8 /cm/shared/apps/bwakit/0.7.15/bwa-postalt.js -p ${pair_id}.hla ${index_path}/genome.fa.alt | samtools view -1 - > output.unsort.bam
run-HLA ${pair_id}.hla > ${pair_id}.hla.top 2> ${pair_id}.log.hla
touch ${pair_id}.hla.HLA-dummy.gt
cat ${pair_id}.hla.HLA*.gt | grep ^GT | cut -f2- > ${pair_id}.hla.all
else
samtools view -1 -o output.unsort.bam out.sam
fi
samtools sort --threads $SLURM_CPUS_ON_NODE -o output.dups.bam output.unsort.bam
samtools view -h output.unsort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam
gawk '{ if (\$0~"^@") { print; next } else { \$10="*"; \$11="*"; print } }' OFS="\\t" splitters.sam | samtools view -S -b - | samtools sort -o ${pair_id}.splitters.bam -
gawk '{ if (\$0~"^@") { print; next } else { \$10="*"; \$11="*"; print } }' OFS="\\t" discordants.sam | samtools view -S -b - | samtools sort -o ${pair_id}.discordants.bam -
java -Djava.io.tmpdir=./ -Xmx4g -jar \$PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.dups.bam O=${pair_id}.bam
samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.bam
#!/bin/bash
#hisat.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-a --FastQ R1"
echo "-b --FastQ R2"
echo "-p --Prefix for output file name"
echo "Example: bash hisat.sh -p prefix -r GRCh38 -a SRR1551047_1.fastq.gz -b SRR1551047_2.fastq.gz"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:a:b:p:h opt
do
case $opt in
r) refgeno=$OPTARG;;
a) fq1=$OPTARG;;
b) fq2=$OPTARG;;
p) pair_id=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage
fi
if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ]; then
index_path=/project/shared/bicf_workflow_ref/${refgeno}/hisat_index/
else
usage
fi
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
module load hisat2/2.1.0-intel samtools/gcc/1.6 picard/2.10.3
if [ $fq1 == $fq2 ]
then
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/genome -U ${fq1} -S out.sam --summary-file ${pair_id}.alignerout.txt
else
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/genome -1 ${fq1} -2 ${fq2} -S out.sam --summary-file ${pair_id}.alignerout.txt
fi
samtools view -1 --threads $SLURM_CPUS_ON_NODE -o output.bam out.sam
samtools sort -@ $SLURM_CPUS_ON_NODE -O BAM -n -o output.nsort.bam output.bam
java -jar $PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.nsort.bam O=${pair_id}.bam
samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.bam
#!/bin/bash
#rnaseqalign.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-x --FastQ R1"
echo "-y --FastQ R2"
echo "-a --Method: hisat or star"
echo "-p --Prefix for output file name"
echo "Example: bash hisat.sh -p prefix -r GRCh38 -a hisat -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:a:x:y:p:h opt
do
case $opt in
r) refgeno=$OPTARG;;
x) fq1=$OPTARG;;
y) fq2=$OPTARG;;
a) algo=$OPTARG;;
p) pair_id=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage
fi
if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ]; then
index_path=/project/shared/bicf_workflow_ref/${refgeno}
else
usage
fi
module load samtools/gcc/1.6 picard/2.10.3
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
if [ $algo == 'star' ]
then
if ($fq1 != $fq2)
then
module load star/2.4.2a
STAR --genomeDir ${index_path}/star_index/ --readFilesIn ${fq1} ${fq2} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
else
STAR --genomeDir ${index_path}/${star_index} --readFilesIn ${fq1} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
fi
mv outLog.final.out ${pair_id}.alignerout.txt
mv outAligned.sortedByCoord.out.bam output.bam
else
module load hisat2/2.1.0-intel
if [ $fq1 == $fq2 ]
then
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -U ${fq1} -S out.sam --summary-file ${pair_id}.alignerout.txt
else
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -1 ${fq1} -2 ${fq2} -S out.sam --summary-file ${pair_id}.alignerout.txt
fi
samtools view -1 --threads $SLURM_CPUS_ON_NODE -o output.bam out.sam
fi
samtools sort -@ $SLURM_CPUS_ON_NODE -O BAM -n -o output.nsort.bam output.bam
java -jar $PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.nsort.bam O=${pair_id}.bam
samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.bam
#!/bin/bash
#hisat.sh
pair_id=$1
index_path=$2
fq1=$3
fq2=$4
module load star/2.4.2a samtools/1.4.1
if ($fq1 != $fq2)
then
STAR --genomeDir ${index_path}/star_index/ --readFilesIn ${fq1} ${fq2} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
else
STAR --genomeDir ${index_path}/${star_index} --readFilesIn ${fq1} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
fi
mv outLog.final.out ${pair_id}.alignerout.txt
samtools sort --threads \$SLURM_CPUS_ON_NODE -o ${pair_id}.bam outAligned.sortedByCoord.out.bam
File moved
#!/bin/bash
#gatkrunner.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-b --BAM File"
echo "-p --Prefix for output file name"
echo "-a --Algorithm/Command"
echo "Example: bash hisat.sh -p prefix -r GRCh38 -b File.bam"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:a:b:p:h opt
do
case $opt in
r) refgeno=$OPTARG;;
b) sbam=$OPTARG;;
p) pair_id=$OPTARG;;
a) algo=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $bam ] || [[ -z $refgeno ]]]; then
usage
fi
if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ] || [ $refgeno == 'GRCh37' ]; then
index_path=/project/shared/bicf_workflow_ref/${refgeno}
else
usage
fi
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
dbsnp="${index_path}/Snp.vcf.gz"
knownindel="${index_path}/Snp.vcf.gz"
reffa="${index_path}/genome.fa"
module load gatk/3.7 samtools/1.6
if [[ $algo == 'gatkbam' ]]
then
samtools index ${sbam}
java -Xmx32g -jar $GATK_JAR -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${sbam} -nt $SLURM_CPUS_ON_NODE -nct 1
java -Xmx32g -jar $GATK_JAR -I ${sbam} -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam
java -Xmx32g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct $SLURM_CPUS_ON_NODE
java -Xmx32g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8
elif
then
else
fi
File moved
File moved
File moved
File moved
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment