diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index b5257a772d730efa4d686ecf4b4b4e787109cad2..2d4bc75ef55dbbccd337617a9e149a85ba88fd94 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -2,18 +2,18 @@ #trimgalore.sh usage() { - echo "-h Help documentation for hisat.sh" - echo "-r --Reference Genome: GRCh38 or GRCm38" - echo "-b --BAM File" - echo "-n --NucType" - echo "-p --Prefix for output file name" - echo "-c --Capture Bedfile" - echo "-d --RemoveDuplicates 1=yes, 0=no default=no" - echo "Example: bash bamqc.sh -p prefix -r /project/shared/bicf_workflow_ref/human/GRCh38 -b SRR1551047.bam -n dna -c target.bed" - exit 1 + echo "-h Help documentation for hisat.sh" + echo "-r --Reference Genome: GRCh38 or GRCm38" + echo "-b --BAM File" + echo "-n --NucType" + echo "-p --Prefix for output file name" + echo "-c --Capture Bedfile" + echo "-d --RemoveDuplicates 1=yes, 0=no default=no" + echo "Example: bash bamqc.sh -p prefix -r /project/shared/bicf_workflow_ref/human/GRCh38 -b SRR1551047.bam -n dna -c target.bed" + exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:b:c:n:p:e:s:d:h opt +while getopts :r:b:c:n:p:u:e:s:d:h opt do case $opt in r) index_path=$OPTARG;; @@ -24,6 +24,7 @@ do d) dedup=$OPTARG;; e) version=$OPTARG;; s) skiplc=1;; + u) user=$OPTARG;; h) usage;; esac done @@ -39,13 +40,6 @@ if [[ -z $version ]] then version='NA' fi - -source /etc/profile.d/modules.sh -module load samtools/gcc/1.10 fastqc/0.11.8 -samtools flagstat ${sbam} > ${pair_id}.flagstat.txt -fastqc -f bam ${sbam} -baseDir="`dirname \"$0\"`" - NPROC=$SLURM_CPUS_ON_NODE if [[ -z $NPROC ]] then @@ -53,14 +47,38 @@ then fi threads=`expr $NPROC - 10` +if [[ -n $user ]] +then + USER=$user +else + USER=$(whoami) +fi + +parseopt=" -u $USER" +if [[ -n $version ]] +then + parseopt="$parseopt -e $version" +fi +if [[ -f $index_path/reference_info.txt ]] +then + parseopt="$parseopt -r $index_path" +fi +tmpdir=`pwd` +source /etc/profile.d/modules.sh +module load samtools/gcc/1.10 fastqc/0.11.8 bedtools/2.29.0 picard/2.10.3 + +samtools flagstat ${sbam} > ${pair_id}.flagstat.txt +fastqc -f bam ${sbam} +baseDir="`dirname \"$0\"`" + if [[ $dedup == 1 ]] then mv $sbam ori.bam samtools view -@ $threads -F 1024 -b -o ${sbam} ori.bam fi -tmpdir=`pwd` -if [[ $nuctype == 'dna' ]]; then - module load bedtools/2.29.2 picard/2.10.3 + +if [[ $nuctype == 'dna' ]] +then bedtools coverage -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt grep ^all ${pair_id}.covhist.txt > ${pair_id}.genomecov.txt perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt @@ -75,12 +93,7 @@ if [[ $nuctype == 'dna' ]]; then #samtools view -@ $threads ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt fi #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir} - if [[ $index_path/reference_info.txt ]] - then - perl $baseDir/sequenceqc_dna.pl -e ${version} -r $index_path ${pair_id}.genomecov.txt - else - touch ${pair_id}.genomecov.txt - fi + perl $baseDir/sequenceqc_dna.pl $parseopt ${pair_id}.genomecov.txt else - perl $baseDir/sequenceqc_rna.pl -e ${version} -r $index_path ${pair_id}.flagstat.txt + perl $baseDir/sequenceqc_rna.pl $parseopt ${pair_id}.flagstat.txt fi diff --git a/alignment/sequenceqc_dna.pl b/alignment/sequenceqc_dna.pl index 6634923731839925d2e9a57c1169c35ebcad0e70..06bcf396a53e17e90340f4ba829f450439bec723 100755 --- a/alignment/sequenceqc_dna.pl +++ b/alignment/sequenceqc_dna.pl @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -#sequenceqc_alignment.p +#sequenceqc_alignment.pl use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); my %opt = (); @@ -122,7 +122,7 @@ foreach $sfile (@statfiles) { "Alignment_Status\t".$hash{status},"Alignment_Date\t".$hash{date}, "File_Owner\t".$hash{fileowner},"Workflow_Version\t".$gittag),"\n"; close OUT; - if (-e "$opt{refdir}\/reference_info.txt") { + if ($opt{refdir} && -e "$opt{refdir}\/reference_info.txt") { system(qq{cat $opt{refdir}\/reference_info.txt >> $prefix\.sequence.stats.txt}); } ##### END separateFilesPerSample ###### diff --git a/alignment/sequenceqc_rna.pl b/alignment/sequenceqc_rna.pl index 906b802404a6ba82cd463ca09ebe92f1d4d6c931..b2fda5160b5c569f0c7b273e2413f45f4df3a5d3 100755 --- a/alignment/sequenceqc_rna.pl +++ b/alignment/sequenceqc_rna.pl @@ -74,5 +74,4 @@ foreach my $file (@files) { print OUT join("\n","Sample\t".$sample,"Total_Raw_Count\t".$total, "Read1_Map\t".$pairs,"Read2_Map\t".$read2ct, "Map_Rate\t".$maprate,"Concordant_Rate\t".$concorrate,"Alignment_Status\t".$status,"Alignment_Date\t".$date, "File_Owner\t".$fileowner,"Workflow_Version\t".$gittag),"\n"; - system(qq{cat $opt{refdir}\/reference_info.txt >> $prefix\.sequence.stats.txt}); }