Skip to content
Snippets Groups Projects
Commit c2f6bdd7 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

simplifying qc stats

parent c2e0f1fa
Branches
Tags
No related merge requests found
...@@ -35,7 +35,7 @@ shift $(($OPTIND -1)) ...@@ -35,7 +35,7 @@ shift $(($OPTIND -1))
#fi #fi
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
module load samtools/gcc/1.8 fastqc/0.11.5 module load samtools/gcc/1.10 fastqc/0.11.8
samtools flagstat ${sbam} > ${pair_id}.flagstat.txt samtools flagstat ${sbam} > ${pair_id}.flagstat.txt
fastqc -f bam ${sbam} fastqc -f bam ${sbam}
baseDir="`dirname \"$0\"`" baseDir="`dirname \"$0\"`"
...@@ -53,19 +53,19 @@ then ...@@ -53,19 +53,19 @@ then
fi fi
tmpdir=`pwd` tmpdir=`pwd`
if [[ $nuctype == 'dna' ]]; then if [[ $nuctype == 'dna' ]]; then
module load bedtools/2.26.0 picard/2.10.3 module load bedtools/2.29.2 picard/2.10.3
bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt
grep ^all ${pair_id}.covhist.txt > ${pair_id}.genomecov.txt
perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt
if [[ -z $skiplc ]] if [[ -z $skiplc ]]
then then
samtools view -@ $NPROC -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam} samtools view -@ $NPROC -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam}
samtools index -@ $NPROC ${pair_id}.ontarget.bam samtools index -@ $NPROC ${pair_id}.ontarget.bam
samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt
java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir}
java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir}
samtools view -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt samtools view -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir}
#java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir}
#samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt
fi fi
java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir} #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir}
bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt
grep ^all ${pair_id}.covhist.txt > ${pair_id}.genomecov.txt
perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt
fi fi
#!/bin/bash
#trimgalore.sh
usage() {
echo "-h Help documentation for hisat.sh"
echo "-a --FastQ R1"
echo "-b --FastQ R2"
echo "-p --Prefix for output file name"
echo "Example: bash trimgalore.sh -p prefix -a SRR1551047_1.fastq.gz -b SRR1551047_2.fastq.gz"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:g:q:b:p:ufh opt
do
case $opt in
p) pair_id=$OPTARG;;
f) filter=1;;
r) index_path=$OPTARG;;
u) umi='umi';;
g) read_group=$OPTARG;;
p) pair_id=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
fqs=("$@")
numfq=$#
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage
fi
NPROC=$SLURM_CPUS_ON_NODE
if [[ -z $NPROC ]]
then
NPROC=`nproc`
fi
if [[ -z $read_group ]]
then
read_group=$pair_id
fi
if [[ $index_path == *project* ]]
then
testexe='/project/shared/bicf_workflow_ref/seqprg/bin'
else
testexe='/usr/local/bin'
fi
source /etc/profile.d/modules.sh
module load trimgalore/0.6.4 cutadapt/1.9.1 python/2.7.x-anaconda bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3
threads=`expr $NPROC / 2`
trim_galore --cores $threads --paired -q 25 -o trim --illumina --gzip --length 35 ${fqs}
if [[ $filter == 1 ]]
then
perl $baseDir/parse_trimreport.pl ${pair_id}.trimreport.txt *trimming_report.txt
fi
bwa mem -M -t $threads -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa trim/*.fq.gz > out.sam
if [[ $umi == 'umi' ]] && [[ -f "${index_path}/genome.fa.alt" ]]
then
k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam
elif [[ -f "${index_path}/genome.fa.alt" ]]
then
k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam| samtools view -1 - > output.unsort.bam
elif [[ $umi == 'umi' ]]
then
python ${baseDir}/add_umi_sam.py -s out.sam -o output.unsort.bam
else
samtools view -1 -o output.unsort.bam out.sam
fi
samtools sort -n --threads $threads -o output.dups.bam output.unsort.bam
java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.dups.bam O=${pair_id}.bam
samtools index ${pair_id}.bam
...@@ -28,19 +28,32 @@ baseDir="`dirname \"$0\"`" ...@@ -28,19 +28,32 @@ baseDir="`dirname \"$0\"`"
if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage usage
fi fi
fqs=("$@")
numfq=$#
if [[ -f $fq1 ]]
then
fqs="$fq1"
r1base="${fq1%.fastq*}"
fi
if [[ -f $fq2 ]]
then
fqs+=" $fq2"
r2base="${fq2%.fastq*}"
fi
numfq=${#fqs[@]}
r1base="${fq1%.fastq*}"
r2base="${fq2%.fastq*}"
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
module load trimgalore/0.4.1 cutadapt/1.9.1 module load trimgalore/0.6.4 cutadapt/2.5
if [ -s $fq2 ] if [ $numfq > 1 ]
then then
trim_galore --paired -q 25 --illumina --gzip --length 35 ${fq1} ${fq2} trim_galore --paired -q 25 --illumina --gzip --length 35 ${fqs}
mv ${r1base}_val_1.fq.gz ${pair_id}.trim.R1.fastq.gz mv ${r1base}_val_1.fq.gz ${pair_id}.trim.R1.fastq.gz
mv ${r2base}_val_2.fq.gz ${pair_id}.trim.R2.fastq.gz mv ${r2base}_val_2.fq.gz ${pair_id}.trim.R2.fastq.gz
else else
trim_galore -q 25 --illumina --gzip --length 35 ${fq1} trim_galore -q 25 --illumina --gzip --length 35 ${fqs}
mv ${r1base}_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz mv ${r1base}_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz
cp ${pair_id}.trim.R1.fastq.gz ${pair_id}.trim.R2.fastq.gz cp ${pair_id}.trim.R1.fastq.gz ${pair_id}.trim.R2.fastq.gz
fi fi
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment