diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index eefb332c8236d57b13c96f3e7526ecc77ce71602..029a921d4f13e70325c598c61321f1dcbe270eab 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -8,7 +8,7 @@ usage() { echo "-n --NucType" echo "-p --Prefix for output file name" echo "-c --Capture Bedfile" - echo "Example: bash bamqc.sh -p prefix -r /project/shared/bicf_workflow_ref/GRCh38 -b SRR1551047.bam -n dna -c target.bed" + echo "Example: bash bamqc.sh -p prefix -r /project/shared/bicf_workflow_ref/human/GRCh38 -b SRR1551047.bam -n dna -c target.bed" exit 1 } OPTIND=1 # Reset OPTIND diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh index 36a13645df4ed85af8de20cfce189b13d0dc8dd0..9ae193d5c623bbd06626a463e5ae33904c53eaa6 100644 --- a/alignment/dnaseqalign.sh +++ b/alignment/dnaseqalign.sh @@ -8,7 +8,7 @@ usage() { echo "-y --FastQ R2" echo "-p --Prefix for output file name" echo "-u --UMI" - echo "Example: bash dnaseqalign.sh -p prefix -u 1 -r /project/shared/bicf_workflow_ref/GRCh38 -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz" + echo "Example: bash dnaseqalign.sh -p prefix -u 1 -r /project/shared/bicf_workflow_ref/human/GRCh38 -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz" exit 1 } OPTIND=1 # Reset OPTIND diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl index f44cf58f34f07ec90b277e1944f3e8ffd042f271..1c43cd37dc9f9d66b2317a7626756454d96754e3 100755 --- a/alignment/filter_genefusions.pl +++ b/alignment/filter_genefusions.pl @@ -14,13 +14,13 @@ while (my $line = <ENT>) { $entrez{$row[2]} = $row[1]; } } -open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/utswv2_known_genefusions.txt" or die $!; +open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/utswv2_known_genefusions.txt" or die $!; while (my $line = <OM>) { chomp($line); $known{$line} = 1; } close OM; -open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!; +open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!; while (my $line = <OM>) { chomp($line); $keep{$line} = 1; diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 6338711781e0a833b01ce822de173562ffe49eeb..b794b25c4e26a1691252dce8ea5cae323f621b8e 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -63,7 +63,7 @@ then samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam gzip ${pair_id}.consensus.R1.fastq gzip ${pair_id}.consensus.R2.fastq - bwa mem -M -C -t 2 -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" /project/shared/bicf_workflow_ref/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz | samtools view -1 - > ${pair_id}.consensus.bam + bwa mem -M -C -t 2 -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" /project/shared/bicf_workflow_ref/human/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz | samtools view -1 - > ${pair_id}.consensus.bam samtools sort --threads 10 -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam else cp ${sbam} ${pair_id}.dedup.bam diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh index 182a738385e8c8bf3159c151a498df76ec89c2d6..606af0aa2fdd656a69d8e837d96b757a7bfcd72f 100644 --- a/alignment/rnaseqalign.sh +++ b/alignment/rnaseqalign.sh @@ -9,7 +9,7 @@ usage() { echo "-a --Method: hisat or star" echo "-p --Prefix for output file name" echo "-u --UMI sequences are in FQ Read Name" - echo "Example: bash rnaseqalign.sh -a hisat -p prefix -u -r /project/shared/bicf_workflow_ref/GRCh38 -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz" + echo "Example: bash rnaseqalign.sh -a hisat -p prefix -u -r /project/shared/bicf_workflow_ref/human/GRCh38 -x SRR1551047_1.fastq.gz -y SRR1551047_2.fastq.gz" exit 1 } OPTIND=1 # Reset OPTIND diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index d462f5ad4c7e252f7d136eac6ee7dfbb1e423469..3e1ec53835531d75db5169237a59e0b74b178f63 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -7,7 +7,7 @@ usage() { echo "-a --FastQ R1" echo "-b --FastQ R2" echo "-p --Prefix for output file name" - echo "Example: bash starfusion.sh -p prefix -r /project/shared/bicf_workflow_ref/GRCh38 -a SRR1551047_1.fastq.gz -b SRR1551047_2.fastq.gz" + echo "Example: bash starfusion.sh -p prefix -r /project/shared/bicf_workflow_ref/human/GRCh38 -a SRR1551047_1.fastq.gz -b SRR1551047_2.fastq.gz" exit 1 } OPTIND=1 # Reset OPTIND @@ -67,7 +67,7 @@ cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "sing if [[ $filter == 1 ]] then cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed - bedtools intersect -wao -a temp.bed -b /project/shared/bicf_workflow_ref/GRCh38/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt + bedtools intersect -wao -a temp.bed -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt perl $baseDir/filter_genefusions.pl -p ${pair_id} -f ${pair_id}.starfusion.txt fi diff --git a/genect_rnaseq/cBioPortal_documents.pl b/genect_rnaseq/cBioPortal_documents.pl index 25a816e128df299832800f5b2cea921ce073ac62..5b672a32055bc7dc7e54bed7b67170e371d1f4ed 100644 --- a/genect_rnaseq/cBioPortal_documents.pl +++ b/genect_rnaseq/cBioPortal_documents.pl @@ -15,7 +15,7 @@ while (my $line = <ENT_ENS>){ $entrez{$row[2]}=$row[1]; } close ENT_ENS; -open ENT_ENS, "</project/shared/bicf_workflow_ref/GRCh38/genenames.txt" or die $!; +open ENT_ENS, "</project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt" or die $!; my $gn_header = <ENT_ENS>; my %ensym; while (my $line = <ENT_ENS>){ diff --git a/variants/annotvcf.sh b/variants/annotvcf.sh index 1f4d9720de5b41baf5b9b4dd3adaa0e807015c74..d3225209671997321d6a36d9befaa29fd842ed21 100755 --- a/variants/annotvcf.sh +++ b/variants/annotvcf.sh @@ -33,7 +33,7 @@ fi if [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]] then tabix ${unionvcf} - bcftools annotate -Oz -a ${index_path}/gnomad.txt.gz -h ${index_path}/gnomad.header -c CHROM,POS,REF,ALT,GNOMAD_HOM,GNOMAD_AF,AF_POPMAX -o ${pair_id}.gnomad.vcf.gz ${unionvcf} + bcftools annotate -Oz -a ${index_path}/gnomad.txt.gz -h ${index_path}/gnomad.header -c CHROM,POS,REF,ALT,HG19_VARIANT,GNOMAD_HOM,GNOMAD_AF,AF_POPMAX -o ${pair_id}.gnomad.vcf.gz ${unionvcf} tabix ${pair_id}.gnomad.vcf.gz bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.gnomad.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-downstream -no-upstream -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index ea813fbcc17a5273e8feee0e7f29063e19d462db..743167f9393019ef33b91a60e4c6d6d76c5c84bb 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -26,7 +26,7 @@ done shift $(($OPTIND -1)) -index_path='/project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/' +index_path='/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/' # Check for mandatory options if [[ -z $pair_id ]] || [[ -z $sbam ]]; then @@ -38,7 +38,7 @@ then fi baseDir="`dirname \"$0\"`" -if [[ $capture == '/project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/UTSWV2.bed' ]] +if [[ $capture == '/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/UTSWV2.bed' ]] then normals="${index_path}/UTSWV2.normals.cnn" targets="${index_path}/UTSWV2.cnvkit_" @@ -64,5 +64,5 @@ cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${ cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns cnvkit.py call ${pair_id}.cns -o ${pair_id}.call.cns cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf -cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed +cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed perl $baseDir/filter_cnvkit.pl *.call.cns diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl index b7504ad5823d209c647e3ae270f9e71df8f55f62..b4ddaccdd975d24fe37ca84d41dcac47b2e7e476 100755 --- a/variants/filter_cnvkit.pl +++ b/variants/filter_cnvkit.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl -w #parse_cnvkit_table.pl -my $refdir = '/project/shared/bicf_workflow_ref/GRCh38/'; +my $refdir = '/project/shared/bicf_workflow_ref/human/GRCh38/'; open OM, "<$refdir\/clinseq_prj/panel1410.genelist.txt" or die $!; while (my $line = <OM>) { chomp($line); diff --git a/variants/somatic_callers.sh b/variants/somatic_callers.sh index 4517c52fd4f30737223dba4046c057a3eb877362..1adde7ba1e9c6483965fb2523fc19eef85d591a0 100755 --- a/variants/somatic_callers.sh +++ b/variants/somatic_callers.sh @@ -36,7 +36,7 @@ if [[ -z $SLURM_CPUS_ON_NODE ]] SLURM_CPUS_ON_NODE=1 fi -index_path=/project/shared/bicf_workflow_ref/GRCh38 +index_path=/project/shared/bicf_workflow_ref/human/GRCh38 genome_reference=${index_path}/genome.fa cosmic_reference=${index_path}/cosmic.vcf.gz @@ -76,7 +76,7 @@ fi if [ $algo == 'mutect' ] then module load parallel python/2.7.x-anaconda gatk/3.8 bcftools/intel/1.3 bedtools/2.25.0 snpeff/4.2 vcftools/0.1.14 - cut -f 1 /project/shared/bicf_workflow_ref/GRCh38/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar $GATK_JAR -R ${genome_reference} -D ${dbSnp_reference} -T MuTect2 -stand_call_conf 30 -stand_emit_conf 10.0 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor}.final.bam -I:normal ${normal}.final.bam --cosmic ${cosmic} -o ${tumor}.{}.mutect.vcf -L {}" + cut -f 1 /project/shared/bicf_workflow_ref/human/GRCh38/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar $GATK_JAR -R ${genome_reference} -D ${dbSnp_reference} -T MuTect2 -stand_call_conf 30 -stand_emit_conf 10.0 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor}.final.bam -I:normal ${normal}.final.bam --cosmic ${cosmic} -o ${tumor}.{}.mutect.vcf -L {}" vcf-concat ${tumor}*.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe 's/TUMOR/'${tumor}'/' | perl -pe 's/NORMAL/'${normal}'/g' |bgzip > ${tumor}_${normal}.mutect.vcf.gz fi