before_script:
  - module add  python/3.6.4-anaconda
  - pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
  - module load singularity/3.0.2
  - module load nextflow/19.09.0
  - ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/* ./test_data/
  - mkdir -p ~/.deriva
  - mkdir -p ~/.bdbag

stages:
  - unit
  - integration

getBag:
  stage: unit
  script:
  - ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
  - singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=16-1ZX4
  - pytest -m getBag

getData:
  stage: unit
  script:
  - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
  - unzip ./test_data/bagit/Study_Q-Y4H0.zip
  - singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' bash ./workflow/scripts/bdbagFetch.sh Study_Q-Y4H0 Q-Y4H0 TEST
  - pytest -m getData

parseMetadata:
  stage: unit
  script:
  - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID
  - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p ends
  - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual
  - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded
  - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species

inferMetadata:
  stage: unit
  script:
  - >
    align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5JA_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
    if [[ ${align} == "" ]]
    then
      exit 1
    fi
  - >
    singularity run 'docker://bicf/rseqc3.0:2.0.0' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam" 1>> Q-Y5JA_1M.se.inferMetadata.log
    ended=`bash inferMeta.sh endness Q-Y5JA_1M.se.inferMetadata.log
    if [[ ${ended} == "" ]]
    then
      exit 1
    fi
  - pytest -m inferMetadata

getRef:
  stage: unit
  script:
  - mkdir -p hu
  - mkdir -p mo
  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./hu/
  - cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./mo/

trimData:
  stage: unit
  script:
  - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --basename Q-Y5JA_1M.se -j 20 ./test_data/fastq/small/Q-Y5JA_1M.R1.fastq.gz
  - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5JA_1M.pe -j 20 ./test_data/fastq/small/Q-Y5JA_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5JA_1M.R2.fastq.gz
  - pytest -m trimData

alignData:
  stage: unit
  script:
  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5JA_1M.se.unal.gz -S Q-Y5JA_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz --summary-file Q-Y5JA_1M.se.alignSummary.txt --new-summary
  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5JA_1M.se.bam Q-Y5JA_1M.se.sam
  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5JA_1M.se.sorted.bam Q-Y5JA_1M.se.bam
  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5JA_1M.se.sorted.bam Q-Y5JA_1M.se.sorted.bam.bai
  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5JA_1M.pe.unal.gz -S Q-Y5JA_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5JA_1M_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5JA_1M_R2_val_2.fq.gz --summary-file Q-Y5JA_1M.pe.alignSummary.txt --new-summary
  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5JA_1M.pe.bam Q-Y5JA_1M.pe.sam
  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.bam
  - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.sorted.bam.bai
  - pytest -m alignData

dedupData:
  stage: unit
  script:
  - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
  - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5JA_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5JA_1M.se.deduped.bam
  - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam Q-Y5JA_1M.se.sorted.deduped.bam.bai
  - for i in {"chr8","chr4","chrY"}; do 
      echo "samtools view -b Q-Y5JA_1M.se.sorted.deduped.bam ${i} > Q-Y5JA_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5JA_1M.se.sorted.deduped.${i}.bam Q-Y5JA_1M.se.sorted.deduped.${i}.bam.bai;";
      done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
  - pytest -m dedupData

  countData:
  stage: unit
  script:
  - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -R SAM -p -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -T 20 -s 1 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -o Q-Y5JA_1M.se.featureCounts -g 'gene_name' --primary --ignoreDup -B ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam 
  - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count Q-Y5JA_1M.se.featureCounts
  - pytest -m makeFeatureCounts

makeBigWig:
  stage: unit
  script:
  - singularity run 'docker://bicf/deeptools3.3:2.0.0' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam -o Q-Y5JA_1M.se.bw
  - pytest -m makeBigWig

fastqc:
  stage: unit
  script:
  - singularity run 'docker://bicf/fastqc:2.0.0' ./test_data/fastq/small/Q-Y5JA_1M.R1.fastq.gz -o .
  - pytest -m fastqc

dataQC:
  stage: unit
  script:
  - for i in {"chr8","chr4","chrY"}; do
    echo "tin.py -i /project/BICF/BICF_Core/shared/gudmap/test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5JA_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.0' parallel -j 20 -k > Q-Y5JA_1M.se.sorted.deduped.tin.xls
  - pytest -m inferMetadata

downsampleData:
  stage: unit
  script:
  - singularity exec 'docker://bicf/seqtk:2.0.0' seqtk sample -s100 ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz 1000 1> sampled.1.fq
  - pytest -m downsampleData


integration_se:
  stage: integration
  script:
  - hostname
  - ulimit -a
  - nextflow -bg run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4

integration_pe:
  stage: integration
  script:
  - hostname
  - ulimit -a
  - nextflow -bg run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA -with-dag dag.png
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - dag.png
    expire_in: 7 days