before_script:
  - module load python/3.6.4-anaconda
  - pip install --user attrs==20.3.0 pytest==6.2.2 pytest-pythonpath==0.7.3 pytest-cov==2.11.1
  - module load singularity/3.5.3
  - export SINGULARITY_CACHEDIR=${dir}cache/
  - module load nextflow/20.01.0
  - ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/* ./test_data/
  - mkdir -p ~/.deriva
  - mkdir -p ~/.bdbag

after_script:
  - unset SINGULARITY_CACHEDIR

variables:
  refMoVersion: "38.p6.vM25"
  refHuVersion: "38.p13.v36"
  refERCCVersion: "92"
  dir: "/project/BICF/BICF_Core/shared/gudmap/singularity_cache/"

stages:
  - singularity
  - versions
  - aggregation
  - badges
  - deploy
  - unit
  - reference
  - integration
  - consistency


img_cache:
  stage: singularity
  script:
    - cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | xargs -P 10 -I {} singularity pull --dir ${dir} 'docker://'{} || true

collect:
  stage: versions
  script:
    - >
      derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
      derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${derivaImg}_${derivaVar}.sif
      singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-download-cli --version > version_deriva.txt
      singularity run ${dir}${derivaImg}_${derivaVar}.sif bdbag --version > version_bdbag.txt
    - >
      pythonImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep python | cut -d"/" -f2 | cut -d":" -f1)
      pythonVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep python | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${pythonImg}_${pythonVar}.sif
      singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 --version > version_python.txt
    - >
      fastqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep fastqc | cut -d"/" -f2 | cut -d":" -f1)
      fastqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep fastqc | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${fastqcImg}_${fastqcVar}.sif
      singularity run ${dir}${fastqcImg}_${fastqcVar}.sif fastqc --version > version_fastqc.txt
    - >
      seqwhoImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqwho | cut -d"/" -f2 | cut -d":" -f1)
      seqwhoVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqwho | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${seqwhoImg}_${seqwhoVar}.sif
      singularity run ${dir}${seqwhoImg}_${seqwhoVar}.sif seqwho.py -h | grep -o Version.* > version_seqwho.txt &
    - >
      trimgaloreImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep trimgalore | cut -d"/" -f2 | cut -d":" -f1)
      trimgaloreVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep trimgalore | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${trimgaloreImg}_${trimgaloreVar}.sif
      singularity run ${dir}${trimgaloreImg}_${trimgaloreVar}.sif trim_galore --version > version_trimgalore.txt
    - >
      seqtkImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqtk | cut -d"/" -f2 | cut -d":" -f1)
      seqtkVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqtk | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${seqtkImg}_${seqtkVar}.sif
      singularity run ${dir}${seqtkImg}_${seqtkVar}.sif seqtk 2>&1 | grep -o Version.* > version_seqtk.txt &
    - >
      rseqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f1)
      rseqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${rseqcImg}_${rseqcVar}.sif
      singularity run ${dir}${rseqcImg}_${rseqcVar}.sif infer_experiment.py --version > version_rseqc.txt
    - >
      hisatImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep hisat | cut -d"/" -f2 | cut -d":" -f1)
      hisatVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep hisat | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${hisatImg}_${hisatVar}.sif
      singularity run ${dir}${hisatImg}_${hisatVar}.sif hisat2 --version > version_hisat2.txt
      singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools --version > version_samtools.txt
    - >
      picardImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep picard | cut -d"/" -f2 | cut -d":" -f1)
      picardVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep picard | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${picardImg}_${picardVar}.sif
      singularity run ${dir}${picardImg}_${picardVar}.sif java -jar /picard/build/libs/picard.jar MarkDuplicates --version 2> version_markdups.txt &
    - >
      subreadImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep subread | cut -d"/" -f2 | cut -d":" -f1)
      subreadVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep subread | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${subreadImg}_${subreadVar}.sif
      singularity run ${dir}${subreadImg}_${subreadVar}.sif featureCounts -v &> version_featurecounts.txt
      singularity run ${dir}${subreadImg}_${subreadVar}.sif R --version > version_r.txt
    - >
      deeptoolsImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deeptools | cut -d"/" -f2 | cut -d":" -f1)
      deeptoolsVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deeptools | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${deeptoolsImg}_${deeptoolsVar}.sif
      singularity run ${dir}${deeptoolsImg}_${deeptoolsVar}.sif deeptools --version > version_deeptools.txt
    - >
      multiqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep multiqc | cut -d"/" -f2 | cut -d":" -f1)
      multiqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep multiqc | cut -d"/" -f2 | cut -d":" -f2)
      echo ${dir}${multiqcImg}_${multiqcVar}.sif
      singularity run ${dir}${multiqcImg}_${multiqcVar}.sif multiqc --version > version_multiqc.txt
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - version_deriva.txt
      - version_bdbag.txt
      - version_python.txt
      - version_fastqc.txt
      - version_seqwho.txt
      - version_trimgalore.txt
      - version_seqtk.txt
      - version_rseqc.txt
      - version_hisat2.txt
      - version_samtools.txt
      - version_markdups.txt
      - version_featurecounts.txt
      - version_r.txt
      - version_deeptools.txt
    expire_in: 7 days

generateVersions:
  stage: aggregation
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - python ./workflow/scripts/generate_versions.py -o software_versions
    - python ./workflow/scripts/generate_references.py -r ./docs/references.md -o software_references
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - software_references_mqc.yaml
      - software_versions_mqc.yaml
    expire_in: 7 days


build_badges:
  stage: badges
  only:
    - master
    - develop
    - tags
  before_script:
    - module load singularity/3.5.3
    - chmod +x ./workflow/scripts/get_updated_badge_info.sh
  script:
    - echo "Building badges"
    - baseImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep gudmap-rbk_base | cut -d"/" -f2 | cut -d":" -f1)
    - baseVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep gudmap-rbk_base | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${baseImg}_${baseVar}.sif
    - singularity run ${dir}${baseImg}_${baseVar}.sif bash ./workflow/scripts/get_updated_badge_info.sh
    - singularity run ${dir}${baseImg}_${baseVar}.sif bash ./workflow/scripts/get_updated_rep_count.sh
  artifacts:
    paths:
      - badges/

pages:
  stage: deploy
  only:
    - master
    - develop
    - tags
  dependencies:
    - build_badges
  script:
    - mkdir -p public
    - mv badges/ public/badges/
  artifacts:
    paths:
    - public

getBag:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
    - singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-download-cli staging.gudmap.org --catalog 2 ./workflow/conf/Replicate_For_Input_Bag.json . rid=Q-Y5F6
    - pytest -m getBag

getData:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    -  echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
    - unzip ./test_data/bag/Q-Y5F6_inputBag_xxxxtest.zip
    - singularity run ${dir}${derivaImg}_${derivaVar}.sif bash ./workflow/scripts/bdbag_fetch.sh Q-Y5F6_inputBag Q-Y5F6
    - pytest -m getData

parseMetadata:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - pythonImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep python | cut -d"/" -f2 | cut -d":" -f1)
    - pythonVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep python | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${pythonImg}_${pythonVar}.sif
    - rep=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
    - exp=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
    - study=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID)
    - endsRaw=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta)
    - stranded=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded)
    - spike=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike)
    - species=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species)
    - readLength=$(singularity run ${dir}${pythonImg}_${pythonVar}.sif python3 ./workflow/scripts/parse_meta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p readLength)
    - endsMeta="uk"
    - endsManual="se" 
    - echo -e "${endsMeta},${endsRaw},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
    - pytest -m parseMetadata

fastqc:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - fastqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep fastqc | cut -d"/" -f2 | cut -d":" -f1)
    - fastqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep fastqc | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${fastqcImg}_${fastqcVar}.sif
    - singularity run ${dir}${fastqcImg}_${fastqcVar}.sif fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
    - pytest -m fastqc

seqwho:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - seqwhoImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqwho | cut -d"/" -f2 | cut -d":" -f1)
    - seqwhoVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqwho | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${seqwhoImg}_${seqwhoVar}.sif
    - wget -O SeqWho.ix https://cloud.biohpc.swmed.edu/index.php/s/eeNWqZz8jqN5zWY/download
    - mkdir -p SeqWho_call_plots/test_data/fastq/small/
    - singularity run ${dir}${seqwhoImg}_${seqwhoVar}.sif seqwho.py -f test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -x SeqWho.ix
    - pytest -m seqwho

trimData:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - trimgaloreImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep trimgalore | cut -d"/" -f2 | cut -d":" -f1)
    - trimgaloreVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep trimgalore | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${trimgaloreImg}_${trimgaloreVar}.sif
    - singularity run ${dir}${trimgaloreImg}_${trimgaloreVar}.sif trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
    - singularity run ${dir}${trimgaloreImg}_${trimgaloreVar}.sif trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
    - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
    - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
    - pytest -m trimData

downsampleData:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - seqtkImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqtk | cut -d"/" -f2 | cut -d":" -f1)
    - seqtkVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep seqtk | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${seqtkImg}_${seqtkVar}.sif
    - singularity run ${dir}${seqtkImg}_${seqtkVar}.sif seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq
    - pytest -m downsampleData

inferMetadata:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - rseqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f1)
    - rseqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${rseqcImg}_${rseqcVar}.sif
    - >
      align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
      if [[ ${align} == "" ]]; then exit 1; fi
    - singularity run ${dir}${rseqcImg}_${rseqcVar}.sif infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log &&
    - ended=`singularity run ${dir}${rseqcImg}_${rseqcVar}.sif python3 ./workflow/scripts/infer_meta.sh endness Q-Y5F6_1M.se.inferMetadata.log` &&
      if [[ ${ended} == "" ]]; then exit 1; fi
    - pytest -m inferMetadata
  
alignData:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - hisatImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep hisat | cut -d"/" -f2 | cut -d":" -f1)
    - hisatVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep hisat | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${hisatImg}_${hisatVar}.sif
    - singularity run ${dir}${hisatImg}_${hisatVar}.sif hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
    - singularity run ${dir}${hisatImg}_${hisatVar}.sif hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
    - singularity run ${dir}${hisatImg}_${hisatVar}.sif samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
    - pytest -m alignData

dedupData:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - picardImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep picard | cut -d"/" -f2 | cut -d":" -f1)
    - picardVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep picard | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${picardImg}_${picardVar}.sif
    - singularity run ${dir}${picardImg}_${picardVar}.sif java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
    - singularity run ${dir}${picardImg}_${picardVar}.sif samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
    - singularity run ${dir}${picardImg}_${picardVar}.sif samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
    - for i in {"chr8","chr4","chrY"}; do
        echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
      done | singularity run ${dir}${picardImg}_${picardVar}.sif parallel -j 20 -k
    - pytest -m dedupData

countData:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/metadata/geneID.tsv
    - ln -s /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/metadata/Entrez.tsv
    - subreadImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep subread | cut -d"/" -f2 | cut -d":" -f1)
    - subreadVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep subread | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${subreadImg}_${subreadVar}.sif
    - singularity run ${dir}${subreadImg}_${subreadVar}.sif featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/sequence/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se_countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
    - singularity run ${dir}${subreadImg}_${subreadVar}.sif Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se_countData
    - singularity run ${dir}${subreadImg}_${subreadVar}.sif Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
    - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
    - pytest -m makeFeatureCounts

makeBigWig:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - deeptoolsImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deeptools | cut -d"/" -f2 | cut -d":" -f1)
    - deeptoolsVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deeptools | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${deeptoolsImg}_${deeptoolsVar}.sif
    - singularity run ${dir}${deeptoolsImg}_${deeptoolsVar}.sif bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
    - pytest -m makeBigWig

dataQC:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
  script:
    - rseqcImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f1)
    - rseqcVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep rseqc | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${rseqcImg}_${rseqcVar}.sif
    - echo -e  "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
    - for i in {"chr8","chr4","chrY"}; do
        echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/annotation/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"
      done | singularity run ${dir}${rseqcImg}_${rseqcVar}.sif parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
    - pytest -m dataQC

uploadInputBag:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
    - echo THIS IS A TEST FILE > test.txt
    - md5=$(md5sum ./test.txt | awk '{ print $1 }') &&
    - size=$(wc -c < ./test.txt) &&
    - >
      exist=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=${md5}) &&
      if [ "${exist}" == "[]" ]; then
        cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
        cookie=${cookie:11:-1} &&
        loc=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host staging.gudmap.org put ./test.txt /hatrac/resources/rnaseq/pipeline/input_bag/TEST/test.txt --parents) &&
        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_input_bag.py -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test input bag' -o staging.gudmap.org -c ${cookie}) &&
        echo ${rid} test input bag created
      else
        rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
        rid=${rid:8:-6} &&
        echo ${rid} test input bag already exists
      fi

uploadExecutionRun:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
    - >
      exist=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Execution_Run/Replicate=17-BTFJ) &&
      cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
      cookie=${cookie:11:-1} &&
      if [ "${exist}" == "[]" ]; then
        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BV2Y -g 17-BV90 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u F) &&
        echo ${rid} test execution run created
      else
        rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
        rid=${rid:7:-6} &&
        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_execution_run.py -r 17-BTFJ -w 17-BV2Y -g 17-BV90 -i 17-BTFT -s Success -d 'This is a test execution run' -o staging.gudmap.org -c ${cookie} -u ${rid}) &&
        echo ${rid} test execution run already exists
      fi

uploadQC:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
    - >
      exist=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:mRNA_QC/Replicate=17-BTFJ) &&
      cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
      cookie=${cookie:11:-1} &&
      if [ "${exist}" != "[]" ]; then
        rids=$(echo ${exist} | grep -o '\"RID\":\".\{7\}' | sed 's/^.\{7\}//') &&
        for rid in ${rids}; do
          singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/delete_entry.py -r ${rid} -t mRNA_QC -o staging.gudmap.org -c ${cookie}
        done
        echo all old mRNA QC RIDs deleted
      fi
        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BVDJ -p "Single End" -s forward -l 35 -w 5 -f 1 -t 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
        echo ${rid} test mRNA QC created

uploadProcessedFile:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
    - echo THIS IS A TEST FILE > 17-BTFJ_test.csv
    - mkdir -p ./deriva/Seq/pipeline/17-BTFE/17-BVDJ/
    - mv 17-BTFJ_test.csv ./deriva/Seq/pipeline/17-BTFE/17-BVDJ/17-BTFJ_test.csv
    - >
      exist=$(singularity run '${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Processed_File/Replicate=17-BTFJ) &&
      cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
      cookie=${cookie:11:-1} &&
      if [ "${exist}" != "[]" ]; then
        rids=$(echo ${exist} | grep -o '\"RID\":\".\{7\}' | sed 's/^.\{7\}//') &&
        for rid in ${rids}; do
          singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/delete_entry.py -r ${rid} -t Processed_File -o staging.gudmap.org -c ${cookie}
        done
        echo all old processed file RIDs deleted
      fi
    - singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-upload-cli --catalog 2 --token ${cookie:9} staging.gudmap.org ./deriva
    - echo test processed file uploaded
    - mkdir test
    - singularity run ${dir}${derivaImg}_${derivaVar}.sif bdbag test --archiver zip
    - echo test output bag created
    - pytest -m outputBag

uploadOutputBag:
  stage: unit
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/credential.json` ./credential.json
    - echo THIS IS A TEST FILE > test.txt
    - >
      md5=$(md5sum ./test.txt | awk '{ print $1 }') &&
      size=$(wc -c < ./test.txt) &&
      exist=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif curl -s https://staging.gudmap.org/ermrest/catalog/2/entity/RNASeq:Output_Bag/File_MD5=${md5}) &&
      if [ "${exist}" == "[]" ]; then
        cookie=$(cat credential.json | grep -A 1 '\"staging.gudmap.org\": {' | grep -o '\"cookie\": \".*\"') &&
        cookie=${cookie:11:-1} &&
        loc=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host staging.gudmap.org put ./test.txt /hatrac/resources/rnaseq/pipeline/output_bag/TEST/test.txt --parents) &&
        rid=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif python3 ./workflow/scripts/upload_output_bag.py -e 17-BVDJ -f test.txt -l ${loc} -s ${md5} -b ${size} -n 'This is a test output bag' -o staging.gudmap.org -c ${cookie}) &&
        echo ${rid} test output bag created
      else
        rid=$(echo ${exist} | grep -o '\"RID\":\".*\",\"RCT') &&
        rid=${rid:8:-6} &&
        echo ${rid} test output bag already exists
      fi


human_BioHPC:
  stage: reference
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - mkdir -p hu
    - cp -R /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2 ./hu/

mouse_BioHPC:
  stage: reference
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - mkdir -p mo
    - cp -R /project/BICF/BICF_Core/shared/gudmap/references/new/GRCh38.p13.v36/data/hisat2 ./mo/

human_dev:
  stage: reference
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
    - referenceBase=dev.gudmap.org
    - refName=GRCh
    - references=$(echo ${referenceBase}/${refName}${refHuVersion})
    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
    - curl --request GET ${query} > refQuery.json
    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
    - loc=$(dirname ${refURL})
    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
    - test=$(echo ${test} | grep -o ${filename})
    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi

mouse_dev:
  stage: reference
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
    - referenceBase=dev.gudmap.org
    - refName=GRCm
    - references=$(echo ${referenceBase}/${refName}${refMoVersion})
    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
    - curl --request GET ${query} > refQuery.json
    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
    - loc=$(dirname ${refURL})
    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
    - test=$(echo ${test} | grep -o ${filename})
    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi

human_staging:
  stage: reference
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
    - referenceBase=staging.gudmap.org
    - refName=GRCh
    - references=$(echo ${referenceBase}/${refName}${refHuVersion})
    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
    - curl --request GET ${query} > refQuery.json
    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
    - loc=$(dirname ${refURL})
    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
    - test=$(echo ${test} | grep -o ${filename})
    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi

mouse_staging:
  stage: reference
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
    - referenceBase=staging.gudmap.org
    - refName=GRCm
    - refHuVersion=38.p6.vM22
    - references=$(echo ${referenceBase}/${refName}${refMoVersion})
    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
    - curl --request GET ${query} > refQuery.json
    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
    - loc=$(dirname ${refURL})
    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
    - test=$(echo ${test} | grep -o ${filename})
    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi

human_prod:
  stage: reference
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
    - referenceBase=www.gudmap.org
    - refName=GRCh
    - references=$(echo ${referenceBase}/${refName}${refHuVersion})
    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
    - curl --request GET ${query} > refQuery.json
    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
    - loc=$(dirname ${refURL})
    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
    - test=$(echo ${test} | grep -o ${filename})
    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi

mouse_prod:
  stage: reference
  only:
    - push
    - tags
  except:
    - merge_requests
    - schedules
  script:
    - derivaImg=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f1)
    - derivaVar=$(cat nextflow.config | grep -oP "container = \K.*" | tr -d "'" | sort | uniq | grep deriva | cut -d"/" -f2 | cut -d":" -f2)
    - echo ${dir}${derivaImg}_${derivaVar}.sif
    - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
    - referenceBase=www.gudmap.org
    - refName=GRCm
    - refHuVersion=38.p6.vM22
    - references=$(echo ${referenceBase}/${refName}${refMoVersion})
    - GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1)
    - GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2)
    - GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3)
    - query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}/Used_Spike_Ins=false)
    - curl --request GET ${query} > refQuery.json
    - refURL=$(python ./workflow/scripts/extract_ref_data.py --returnParam URL)
    - loc=$(dirname ${refURL})
    - if [ "${loc}" = "/hatrac/*" ]; then echo "reference not present in hatrac"; exit 1; fi
    - filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)')
    - test=$(singularity run ${dir}${derivaImg}_${derivaVar}.sif deriva-hatrac-cli --host ${referenceBase} ls ${loc}/)
    - test=$(echo ${test} | grep -o ${filename})
    - if [ "${test}" == "" ]; then echo "reference file not present"; exit 1; fi


integration_se:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --refSource datahub --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./SE_report.html
    - find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
    - pytest -m completionMultiqc --filename SE_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - output/qc/
      - output/report/
      - SE_multiqc_data.json
    expire_in: 7 days
  retry:
    max: 0
    when:
      - always

integration_pe:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./PE_report.html
    - find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
    - pytest -m completionMultiqc --filename PE_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - dag.png
      - output/qc/
      - output/report/
      - PE_multiqc_data.json
    expire_in: 7 days
  retry:
    max: 0
    when:
      - always


failAmbiguousSpecies:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failAmbiguousSpecies_report.html
  retry:
    max: 0
    when:
      - always

failTrunkation:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failTrunkation_report.html
  retry:
    max: 0
    when:
      - always

failMismatchR1R2:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failMismatchR1R2_report.html
  retry:
    max: 0
    when:
      - always

failUnexpectedMeta:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failUnexpectedMeta_report.html
  retry:
    max: 0
    when:
      - always

failFileStructure:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failFileStructure_report.html
  retry:
    max: 0
    when:
      - always

failSeqType:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-DNDJ --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failSeqType_report.html
  retry:
    max: 0
    when:
      - always

override_inputBag:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true --track false -with-report ./inputBagOverride_report.html
    - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_multiqc_data.json \;
    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./inputBagOverride_multiqc.html \;
    - pytest -m completionMultiqc --filename inputBagOverride_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - inputBagOverride_multiqc_data.json
      - inputBagOverride_multiqc.html
    expire_in: 7 days
  retry:
    max: 0
    when:
      - always

override_fastq:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6    --source staging --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true --track false -with-report ./fastqOverride_report.html
    - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_multiqc_data.json \;
    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./fastqOverride_multiqc.html \;
    - pytest -m completionMultiqc --filename fastqOverride_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - fastqOverride_multiqc_data.json
      - fastqOverride_multiqc.html
    expire_in: 7 days
  retry:
    max: 0
    when:
      - always

override_species:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EW --source staging --speciesForce 'Homo sapiens' --upload true --dev false --ci true --track false -with-report ./speciesOverride_report.html
    - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_multiqc_data.json \;
    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./speciesOverride_multiqc.html \;
    - pytest -m completionMultiqc --filename speciesOverride_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - speciesOverride_multiqc_data.json
      - speciesOverride_multiqc.html
    expire_in: 7 days
  retry:
    max: 0
    when:
      - always

override_stranded:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EY --source staging --strandedForce unstranded --upload true --dev false --ci true --track false -with-report ./strandedOverride_report.html
    - find . -type f -name "multiqc_data.json" -exec cp {} ./strandedOverride_multiqc_data.json \;
    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./strandedOverride_multiqc.html \;
    - pytest -m completionMultiqc --filename strandedOverride_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - strandedOverride_multiqc_data.json
      - strandedOverride_multiqc.html
    expire_in: 7 days
  retry:
    max: 0
    when:
      - always

override_spike:
  stage: integration
  only: [merge_requests]
  except:
    variables:
      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - hostname
    - ulimit -a
    - nextflow -q run ./rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F0 --source staging --spikeForce true --upload true --dev false --ci true --track false -with-report ./spikeOverride_report.html
    - find . -type f -name "multiqc_data.json" -exec cp {} ./spikeOverride_multiqc_data.json \;
    - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./spikeOverride_multiqc.html \;
    - pytest -m completionMultiqc --filename spikeOverride_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - spikedOverride_multiqc_data.json
      - spikeOverride_multiqc.html
    expire_in: 7 days
  retry:
    max: 0
    when:
      - always


consistency:
  stage: consistency
  only: [merge_requests]
  except:
    variables:
        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
    - pytest -m consistencySE
    - pytest -m consistencyPE
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
      - SE_multiqc_data.json
      - PE_multiqc_data.json
    expire_in: 7 days