Newer
Older
- pip install --user attrs==19.1.0 pytest-pythonpath==0.7.1 pytest-cov==2.5.1 deriva==1.3.0
- module load singularity/3.5.3
- module load nextflow/20.01.0
- ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/* ./test_data/
- consistency
only:
- push
- tags
except:
- merge_requests
- ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli --version > version_deriva.txt
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_deriva.txt
expire_in: 7 days
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag --version > version_bdbag.txt
- ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt

Gervaise Henry
committed
- unzip ./test_data/bag/Replicate_Q-Y5F6.zip
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_bdbag.txt
expire_in: 7 days
only:
- push
- tags
except:
- merge_requests
- singularity run 'docker://bicf/python3:2.0.1_indev' python3 --version > version_python.txt
- rep=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
- exp=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
- study=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID)
- endsMeta=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta)
- endsManual=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual)
- stranded=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded)
- spike=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike)
- species=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species)
- readLength=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.stageNew.csv" -p readLength)
- echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
- pytest -m parseMetadata
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_python.txt
expire_in: 7 days
only:
- push
- tags
except:
- merge_requests
- singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' infer_experiment.py --version > version_rseqc.txt
align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
if [[ ${align} == "" ]]; then exit 1; fi
singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log &&
ended=`singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/inferMeta.sh endness Q-Y5F6_1M.se.inferMetadata.log` &&
if [[ ${ended} == "" ]]; then exit 1; fi
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_rseqc.txt
expire_in: 7 days
only:
- push
- tags
except:
- merge_requests
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --version > version_trimgalore.txt
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
- readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_trimgalore.txt
expire_in: 7 days
only:
- push
- tags
except:
- merge_requests
- singularity run 'docker://bicf/seqtk:2.0.1_indev' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq
only:
- push
- tags
except:
- merge_requests
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 --version > version_hisat2.txt
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools --version > version_samtools.txt
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_hisat2.txt
- version_samtools.txt
expire_in: 7 days
only:
- push
- tags
except:
- merge_requests
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates --version &> version_markdups.txt
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools --version > version_samtools.txt
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
for i in {"chr8","chr4","chrY"}; do
echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";

Gervaise Henry
committed
done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_markdups.txt
- version_samtools.txt
expire_in: 7 days
only:
- push
- tags
except:
- merge_requests
- ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv
- ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv
- singularity run 'docker://bicf/subread2:2.0.0' featureCounts -v > version_featurecounts.txt
- singularity run 'docker://bicf/subread2:2.0.0' R --version > version_r.txt
- singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.countData
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
- assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_featurecounts.txt
- version_r.txt
expire_in: 7 days
only:
- push
- tags
except:
- merge_requests
- singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' deeptools --version > version_deeptools.txt
- singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_deeptools.txt
expire_in: 7 days
only:
- push
- tags
except:
- merge_requests
- singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc --version > version_fastqc.txt
- singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_fastqc.txt
expire_in: 7 days
only:
- push
- tags
except:
- merge_requests
- echo -e "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
- for i in {"chr8","chr4","chrY"}; do
echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
only:
- push
- tags
except:
- merge_requests
script:
- mkdir test
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag test --archiver zip
- pytest -m outputBag
only:
- push
- tags
except:
- merge_requests
script:
- singularity run 'docker://bicf/multiqc1.8:2.0.1_indev' multiqc --version > version_multiqc.txt
- python ./workflow/scripts/generate_versions.py -o software_versions
- python ./workflow/scripts/generate_references.py -r ./docs/references.md -o software_references
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- software_references_mqc.yaml
- software_versions_mqc.yaml
expire_in: 7 days
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
humanBioHPC:
stage: reference
only:
- push
- tags
except:
- merge_requests
script:
- mkdir -p hu
- cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./hu/
mouseBioHPC:
stage: reference
only:
- push
- tags
except:
- merge_requests
script:
- mkdir -p mo
- cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./mo/
humanDataHub:
stage: reference
only:
- push
- tags
except:
- merge_requests
script:
- ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
- referenceBase=dev.gudmap.org
- refName=GRCh
- refHuVersion=38.p12.v31
- references=$(echo ${referenceBase}/${refName}${refHuVersion})
- GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
- GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
- GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
- query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
- curl --request GET ${query} > refQuery.json
- refURL=$(python ./workflow/scripts/extractRefData.py --returnParam URL)
- loc=$(dirname ${refURL})
- if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
- filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
- test=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
- test=$(echo ${test} | grep -o ${filename})
- if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
mousenDataHub:
stage: reference
only:
- push
- tags
except:
- merge_requests
script:
- ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
- referenceBase=dev.gudmap.org
- refName=GRCm
- refHuVersion=38.p6.vM22
- references=$(echo ${referenceBase}/${refName}${refHuVersion})
- GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
- GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
- GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
- query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE})
- curl --request GET ${query} > refQuery.json
- refURL=$(python ./workflow/scripts/extractRefData.py --returnParam URL)
- loc=$(dirname ${refURL})
- if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
- filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
- test=$(singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
- test=$(echo ${test} | grep -o ${filename})
- if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/

Gervaise Henry
committed
- hostname
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 -with-dag dag.png --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- output/qc/
expire_in: 7 days
integration_pe:
stage: integration
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/

Gervaise Henry
committed
- hostname
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA -with-dag dag.png --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- dag.png
- output/qc/

Gervaise Henry
committed
override_inputBag:
stage: integration
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --inputBagForce ./test_data/bag/Replicate_Q-Y5F6.zip --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- inputBagOverride_PE_multiqc_data.json

Gervaise Henry
committed
override_fastq:
stage: integration
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- fastqOverride_PE_multiqc_data.json

Gervaise Henry
committed
expire_in: 7 days
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --speciesForce 'Homo sapiens' --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- speciesOverride_PE_multiqc_data.json
expire_in: 7 days

Gervaise Henry
committed
consistency:
stage: consistency
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/

Gervaise Henry
committed
script:
- grep -m 1 \"Assigned\":.[0-9] SE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedSE.txt
- grep -m 1 \"Assigned\":.[0-9] PE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedPE.txt
- echo 7742416 > assignedExpectSE.txt
- echo 2599140 > assignedExpectPE.txt
- pytest -m consistencySE
- pytest -m consistencyPE
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- SE_multiqc_data.json
- PE_multiqc_data.json
- assignedSE.txt
- assignedPE.txt
- assignedExpectSE.txt
- assignedExpectPE.txt