diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index cee7b80555bb0b6aa724dda97bcc33b13f0151f7..52f904e385e2e536a3f8e8858d0e2dbb179536b1 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -35,7 +35,7 @@ shift $(($OPTIND -1)) #fi source /etc/profile.d/modules.sh -module load samtools/gcc/1.8 fastqc/0.11.5 +module load samtools/gcc/1.10 fastqc/0.11.8 samtools flagstat ${sbam} > ${pair_id}.flagstat.txt fastqc -f bam ${sbam} baseDir="`dirname \"$0\"`" @@ -53,19 +53,19 @@ then fi tmpdir=`pwd` if [[ $nuctype == 'dna' ]]; then - module load bedtools/2.26.0 picard/2.10.3 + module load bedtools/2.29.2 picard/2.10.3 + bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt + grep ^all ${pair_id}.covhist.txt > ${pair_id}.genomecov.txt + perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt if [[ -z $skiplc ]] then samtools view -@ $NPROC -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam} samtools index -@ $NPROC ${pair_id}.ontarget.bam samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt - java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir} - java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir} samtools view -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt - samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt + #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir} + #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir} + #samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt fi - java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir} - bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt - grep ^all ${pair_id}.covhist.txt > ${pair_id}.genomecov.txt - perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt + #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir} fi diff --git a/cvc/dna_trim_align.sh b/cvc/dna_trim_align.sh new file mode 100644 index 0000000000000000000000000000000000000000..a44d414e66de09573d69548e3c1ee3ccf6c4bebd --- /dev/null +++ b/cvc/dna_trim_align.sh @@ -0,0 +1,80 @@ +#!/bin/bash +#trimgalore.sh + +usage() { + echo "-h Help documentation for hisat.sh" + echo "-a --FastQ R1" + echo "-b --FastQ R2" + echo "-p --Prefix for output file name" + echo "Example: bash trimgalore.sh -p prefix -a SRR1551047_1.fastq.gz -b SRR1551047_2.fastq.gz" + exit 1 +} +OPTIND=1 # Reset OPTIND +while getopts :r:g:q:b:p:ufh opt +do + case $opt in + p) pair_id=$OPTARG;; + f) filter=1;; + r) index_path=$OPTARG;; + u) umi='umi';; + g) read_group=$OPTARG;; + p) pair_id=$OPTARG;; + h) usage;; + esac +done + +shift $(($OPTIND -1)) +baseDir="`dirname \"$0\"`" + +fqs=("$@") +numfq=$# + +# Check for mandatory options +if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then + usage +fi +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] +then + NPROC=`nproc` +fi +if [[ -z $read_group ]] +then + read_group=$pair_id +fi +if [[ $index_path == *project* ]] +then + testexe='/project/shared/bicf_workflow_ref/seqprg/bin' +else + testexe='/usr/local/bin' +fi + +source /etc/profile.d/modules.sh +module load trimgalore/0.6.4 cutadapt/1.9.1 python/2.7.x-anaconda bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3 + +threads=`expr $NPROC / 2` + +trim_galore --cores $threads --paired -q 25 -o trim --illumina --gzip --length 35 ${fqs} +if [[ $filter == 1 ]] +then + perl $baseDir/parse_trimreport.pl ${pair_id}.trimreport.txt *trimming_report.txt +fi + +bwa mem -M -t $threads -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa trim/*.fq.gz > out.sam + +if [[ $umi == 'umi' ]] && [[ -f "${index_path}/genome.fa.alt" ]] +then + k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam +elif [[ -f "${index_path}/genome.fa.alt" ]] +then + k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam| samtools view -1 - > output.unsort.bam +elif [[ $umi == 'umi' ]] +then + python ${baseDir}/add_umi_sam.py -s out.sam -o output.unsort.bam +else + samtools view -1 -o output.unsort.bam out.sam +fi + +samtools sort -n --threads $threads -o output.dups.bam output.unsort.bam +java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.dups.bam O=${pair_id}.bam +samtools index ${pair_id}.bam diff --git a/preproc_fastq/trimgalore.sh b/preproc_fastq/trimgalore.sh index c9f00d14d3d7ad936056ec5abdc9eaba779d0d9f..8bd5613015db1df80a8410d16f4f37e01b3033a9 100644 --- a/preproc_fastq/trimgalore.sh +++ b/preproc_fastq/trimgalore.sh @@ -28,19 +28,32 @@ baseDir="`dirname \"$0\"`" if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then usage fi +fqs=("$@") +numfq=$# + +if [[ -f $fq1 ]] +then + fqs="$fq1" + r1base="${fq1%.fastq*}" +fi +if [[ -f $fq2 ]] +then + fqs+=" $fq2" + r2base="${fq2%.fastq*}" +fi + +numfq=${#fqs[@]} -r1base="${fq1%.fastq*}" -r2base="${fq2%.fastq*}" source /etc/profile.d/modules.sh -module load trimgalore/0.4.1 cutadapt/1.9.1 +module load trimgalore/0.6.4 cutadapt/2.5 -if [ -s $fq2 ] +if [ $numfq > 1 ] then - trim_galore --paired -q 25 --illumina --gzip --length 35 ${fq1} ${fq2} + trim_galore --paired -q 25 --illumina --gzip --length 35 ${fqs} mv ${r1base}_val_1.fq.gz ${pair_id}.trim.R1.fastq.gz mv ${r2base}_val_2.fq.gz ${pair_id}.trim.R2.fastq.gz else - trim_galore -q 25 --illumina --gzip --length 35 ${fq1} + trim_galore -q 25 --illumina --gzip --length 35 ${fqs} mv ${r1base}_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz cp ${pair_id}.trim.R1.fastq.gz ${pair_id}.trim.R2.fastq.gz fi