From aaef66b3bfa52db2660656f03295a096981e42da Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Mon, 30 Oct 2017 17:01:28 -0500 Subject: [PATCH] update dnaalignment --- alignment/bamqc.sh | 10 ++-------- alignment/dnaseqalign.sh | 18 ++++++------------ 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index 7c2b403..c08dcbe 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -8,14 +8,14 @@ usage() { echo "-n --NucType" echo "-p --Prefix for output file name" echo "-c --Capture Bedfile" - echo "Example: bash hisat.sh -p prefix -r GRCh38 -a hisat -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz" + echo "Example: bash bamqc.sh -p prefix -r /project/shared/bicf_workflow_ref/GRCh38 -b SRR1551047.bam -y dna -c target.bed" exit 1 } OPTIND=1 # Reset OPTIND while getopts :r:b:n:p:h opt do case $opt in - r) refgeno=$OPTARG;; + r) index_path=$OPTARG;; b) sbam=$OPTARG;; c) bed=$OPTARG;; y) nuctype=$OPTARG;; @@ -31,12 +31,6 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then usage fi -if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ]; then - index_path=/project/shared/bicf_workflow_ref/${refgeno} -else - usage -fi - module load samtools/1.6 fastqc/0.11.5 samtools flagstat ${sbam} > ${pair_id}.flagstat.txt fastqc -f bam ${sbam} diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh index 98972fe..0e19623 100644 --- a/alignment/dnaseqalign.sh +++ b/alignment/dnaseqalign.sh @@ -7,14 +7,14 @@ usage() { echo "-x --FastQ R1" echo "-y --FastQ R2" echo "-p --Prefix for output file name" - echo "Example: bash hisat.sh -p prefix -r GRCh38 -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz" + echo "Example: bash dnaseqalign.sh -p prefix -r /project/shared/bicf_workflow_ref/GRCh38 -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz" exit 1 } OPTIND=1 # Reset OPTIND while getopts :r:a:x:y:p:h opt do case $opt in - r) refgeno=$OPTARG;; + r) index_path=$OPTARG;; x) fq1=$OPTARG;; y) fq2=$OPTARG;; a) algo=$OPTARG;; @@ -30,12 +30,6 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then usage fi -if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ] || [ $refgeno == 'GRCh37' ]; then - index_path=/project/shared/bicf_workflow_ref/${refgeno} -else - usage -fi - if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 @@ -48,7 +42,7 @@ then else bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" ${index_path}/genome.fa ${fq1} ${fq2} > out.sam fi -if [ $refgeno == 'GRCh38' ] +if [[ $index_path == '/project/shared/bicf_workflow_ref/GRCh38' ]] then cat out.sam | k8 /cm/shared/apps/bwakit/0.7.15/bwa-postalt.js -p ${pair_id}.hla ${index_path}/genome.fa.alt | samtools view -1 - > output.unsort.bam run-HLA ${pair_id}.hla > ${pair_id}.hla.top 2> ${pair_id}.log.hla @@ -59,7 +53,7 @@ else fi samtools sort --threads $SLURM_CPUS_ON_NODE -o output.dups.bam output.unsort.bam samtools view -h output.unsort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam -gawk '{ if (\$0~"^@") { print; next } else { \$10="*"; \$11="*"; print } }' OFS="\\t" splitters.sam | samtools view -S -b - | samtools sort -o ${pair_id}.splitters.bam - -gawk '{ if (\$0~"^@") { print; next } else { \$10="*"; \$11="*"; print } }' OFS="\\t" discordants.sam | samtools view -S -b - | samtools sort -o ${pair_id}.discordants.bam - -java -Djava.io.tmpdir=./ -Xmx4g -jar \$PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.dups.bam O=${pair_id}.bam +gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o ${pair_id}.splitters.bam - +gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o ${pair_id}.discordants.bam - +java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.dups.bam O=${pair_id}.bam samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.bam -- GitLab