before_script: - module load python/3.6.4-anaconda - pip install --user attrs==19.1.0 pytest-pythonpath==0.7.1 pytest-cov==2.5.1 deriva==1.3.0 - module load singularity/3.5.3 - module load nextflow/20.01.0 - ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/* ./test_data/ - mkdir -p ~/.deriva - mkdir -p ~/.bdbag stages: - unit - aggregation - reference - integration - consistency getBag: stage: unit only: - push - tags except: - merge_requests script: - ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli --version > version_deriva.txt - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6 - pytest -m getBag artifacts: name: "$CI_JOB_NAME" when: always paths: - version_deriva.txt expire_in: 7 days getData: stage: unit only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag --version > version_bdbag.txt - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt - unzip ./test_data/bag/Replicate_Q-Y5F6.zip - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST - pytest -m getData artifacts: name: "$CI_JOB_NAME" when: always paths: - version_bdbag.txt expire_in: 7 days parseMetadata: stage: unit only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/python3:2.0.1_indev' python3 --version > version_python.txt - rep=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID) - exp=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID) - study=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID) - endsMeta=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta) - endsManual=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual) - stranded=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded) - spike=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike) - species=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species) - readLength=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.stageNew.csv" -p readLength) - echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv - pytest -m parseMetadata artifacts: name: "$CI_JOB_NAME" when: always paths: - version_python.txt expire_in: 7 days inferMetadata: stage: unit only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' infer_experiment.py --version > version_rseqc.txt - > align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) && if [[ ${align} == "" ]]; then exit 1; fi - > singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log && ended=`singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/inferMeta.sh endness Q-Y5F6_1M.se.inferMetadata.log` && if [[ ${ended} == "" ]]; then exit 1; fi - pytest -m inferMetadata artifacts: name: "$CI_JOB_NAME" when: always paths: - version_rseqc.txt expire_in: 7 days trimData: stage: unit only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --version > version_trimgalore.txt - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - pytest -m trimData artifacts: name: "$CI_JOB_NAME" when: always paths: - version_trimgalore.txt expire_in: 7 days downsampleData: stage: unit only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/seqtk:2.0.1_indev' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq - pytest -m downsampleData alignData: stage: unit only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 --version > version_hisat2.txt - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools --version > version_samtools.txt - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai - pytest -m alignData artifacts: name: "$CI_JOB_NAME" when: always paths: - version_hisat2.txt - version_samtools.txt expire_in: 7 days dedupData: stage: unit only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai - > for i in {"chr8","chr4","chrY"}; do echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;"; done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k - pytest -m dedupData - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates &> version_markdups.txt - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools --version > version_samtools.txt artifacts: name: "$CI_JOB_NAME" when: always paths: - version_markdups.txt - version_samtools.txt expire_in: 7 days countData: stage: unit only: - push - tags except: - merge_requests script: - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -v > version_featurecounts.txt - singularity run 'docker://bicf/subread2:2.0.0' R --version > version_r.txt - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.countData - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)') - pytest -m makeFeatureCounts artifacts: name: "$CI_JOB_NAME" when: always paths: - version_featurecounts.txt - version_r.txt expire_in: 7 days makeBigWig: stage: unit only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' deeptools --version > version_deeptools.txt - singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw - pytest -m makeBigWig artifacts: name: "$CI_JOB_NAME" when: always paths: - version_deeptools.txt expire_in: 7 days fastqc: stage: unit only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc --version > version_fastqc.txt - singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o . - pytest -m fastqc artifacts: name: "$CI_JOB_NAME" when: always paths: - version_fastqc.txt expire_in: 7 days dataQC: stage: unit only: - push - tags except: - merge_requests script: - echo -e "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls - for i in {"chr8","chr4","chrY"}; do echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls - pytest -m dataQC outputBag: stage: unit only: - push - tags except: - merge_requests script: - mkdir test - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag test --archiver zip - pytest -m outputBag generateVersions: stage: aggregation only: - push - tags except: - merge_requests script: - singularity run 'docker://bicf/multiqc1.8:2.0.1_indev' multiqc --version > version_multiqc.txt - python ./workflow/scripts/generate_versions.py -o software_versions - python ./workflow/scripts/generate_references.py -r ./docs/references.md -o software_references artifacts: name: "$CI_JOB_NAME" when: always paths: - software_references_mqc.yaml - software_versions_mqc.yaml expire_in: 7 days humanBioHPC: stage: reference only: - push - tags except: - merge_requests script: - mkdir -p hu - cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./hu/ mouseBioHPC: stage: reference only: - push - tags except: - merge_requests script: - mkdir -p mo - cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./mo/ humanDataHub: stage: reference only: - push - tags except: - merge_requests script: - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt - referenceBase=dev.gudmap.org - refName=GRCh - refHuVersion=38.p12.v31 - references=$(echo ${referenceBase}/${refName}${refHuVersion}) - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1) - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2) - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3) - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}) - curl --request GET ${query} > refQuery.json - refURL=$(python ./workflow/scripts/extractRefData.py --returnParam URL) - loc=$(dirname ${refURL}) - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)') - test=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/) - test=$(echo ${test} | grep -o ${filename}) - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi mousenDataHub: stage: reference only: - push - tags except: - merge_requests script: - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt - referenceBase=dev.gudmap.org - refName=GRCm - refHuVersion=38.p6.vM22 - references=$(echo ${referenceBase}/${refName}${refHuVersion}) - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1) - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2) - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3) - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}) - curl --request GET ${query} > refQuery.json - refURL=$(python ./workflow/scripts/extractRefData.py --returnParam URL) - loc=$(dirname ${refURL}) - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)') - test=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/) - test=$(echo ${test} | grep -o ${filename}) - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi integration_se: stage: integration only: [merge_requests] except: variables: - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/ script: - hostname - ulimit -a - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 -with-dag dag.png --ci true - find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \; artifacts: name: "$CI_JOB_NAME" when: always paths: - output/qc/ - output/report/ - SE_multiqc_data.json expire_in: 7 days retry: max: 1 when: - always integration_pe: stage: integration only: [merge_requests] except: variables: - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/ script: - hostname - ulimit -a - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA -with-dag dag.png --ci true - find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \; artifacts: name: "$CI_JOB_NAME" when: always paths: - dag.png - output/qc/ - output/report/ - PE_multiqc_data.json expire_in: 7 days retry: max: 1 when: - always override_inputBag: stage: integration only: [merge_requests] except: variables: - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/ script: - hostname - ulimit -a - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --inputBagForce ./test_data/bag/Replicate_Q-Y5F6.zip --ci true - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_PE_multiqc_data.json \; artifacts: name: "$CI_JOB_NAME" when: always paths: - inputBagOverride_PE_multiqc_data.json expire_in: 7 days retry: max: 1 when: - always override_fastq: stage: integration only: [merge_requests] except: variables: - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/ script: - hostname - ulimit -a - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --ci true - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \; artifacts: name: "$CI_JOB_NAME" when: always paths: - fastqOverride_PE_multiqc_data.json expire_in: 7 days retry: max: 1 when: - always override_species: stage: integration only: [merge_requests] except: variables: - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/ script: - hostname - ulimit -a - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --speciesForce 'Homo sapiens' --ci true - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_PE_multiqc_data.json \; artifacts: name: "$CI_JOB_NAME" when: always paths: - speciesOverride_PE_multiqc_data.json expire_in: 7 days retry: max: 1 when: - always consistency: stage: consistency only: [merge_requests] except: variables: - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/ script: - grep -m 1 \"Assigned\":.[0-9] SE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedSE.txt - grep -m 1 \"Assigned\":.[0-9] PE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedPE.txt - echo 7742416 > assignedExpectSE.txt - echo 2599140 > assignedExpectPE.txt - pytest -m consistencySE - pytest -m consistencyPE artifacts: name: "$CI_JOB_NAME" when: always paths: - SE_multiqc_data.json - PE_multiqc_data.json - assignedSE.txt - assignedPE.txt - assignedExpectSE.txt - assignedExpectPE.txt expire_in: 7 days