From 5f48c1c116f2fb4ccdfca5df712e3c2f190801d5 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Tue, 25 Aug 2020 17:46:17 -0500 Subject: [PATCH] Add fastq override #71 --- .gitlab-ci.yml | 16 +++++++++++++++- CHANGELOG.md | 7 +++++++ README.md | 5 +++++ workflow/rna-seq.nf | 42 +++++++++++++++++++++++++++--------------- 4 files changed, 54 insertions(+), 16 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4589f16..c2d779e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -206,4 +206,18 @@ inputBag: when: always paths: - inputBagOverride_PE_multiqc_data.json - expire_in: 7 days \ No newline at end of file + expire_in: 7 days + + fastq: + stage: override + script: + - hostname + - ulimit -a + - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --fastqForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --ci true + - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \; + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - fastqOverride_PE_multiqc_data.json + expire_in: 7 days \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 07869f4..104cc84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ * MultiQC output custom talbes (html+JSON): * Run table: *Session ID* and *Pipeline Version* * Reference Table: *Species*, *Genome Reference Consortium Build*, *Genome Reference Consortium Patch*, *GENCODE Annotation Release* (ouputs both human and mouse versions) +* Add inputBag override param (`inputBagForce`) + * Uses provided inputBag instead of downloading from data-hub + * Still requires matching repRID input param +* Add fastq override param (`fastqsForce`) [`R1`,`R2`] + * Uses provided fastq instead of downloading from data-hub + * Still requires matching repRID input param and will pull inputBag from data-hub to access submitted metadata for reporting **Background** * Add GeneSymbol/EnsemblID/EntrezID translation files to references @@ -13,6 +19,7 @@ *Known Bugs* * outputBag does not contain fetch for processed data * Does not include automatic data upload +* Override params (inputBag and fastq) are't checked for integrity <hr> diff --git a/README.md b/README.md index 2216c27..b2aa90a 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,11 @@ To Run: * reference version consists of Genome Reference Consortium version, patch release and GENCODE annotation release # (leaving the params blank will use the default version tied to the pipeline version) * *current mouse* **38.p6.vM22** = GRCm38.p6 with GENCODE annotation release M22 * *current human* **38.p6.v31** = GRCh38.p12 with GENCODE annotation release 31 +* ***Optional*** input overrides + * `--inputBagForce` utilizes a local replicate inputBag instead of downloading from the data-hub (still requires accurate repRID input) + * eg: `--inputBagForce test_data/bagit/Replicate_Q-Y5F6.zip` (must be the expected bagit structure) + * `--fastqsForce` utilizes local fastq's instead of downloading from the data-hub (still requires accurate repRID input) + * eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order) * Tracking parameters ([Tracking Site](http://bicf.pipeline.tracker.s3-website-us-east-1.amazonaws.com/)): * `--ci` boolean (default = false) * `--dev` boolean (default = false) diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index f1a7c65..2cf5364 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -21,6 +21,7 @@ params.outDir = "${baseDir}/../output" // Define override input variable params.inputBagForce = "" +params.fastqsForce = "" // Parse input variables deriva = Channel @@ -36,6 +37,7 @@ refERCCVersion = params.refERCCVersion outDir = params.outDir logsDir = "${outDir}/Logs" inputBagForce = params.inputBagForce +fastqsForce = params.fastqsForce // Define fixed files @@ -122,10 +124,10 @@ process getBag { path derivaConfig output: - path ("Replicate_*.zip") into bagit + path ("Replicate_*.zip") into bag when: - params.inputBagForce == "" + inputBagForce == "" script: """ @@ -139,7 +141,7 @@ process getBag { echo -e "LOG: linked" >> ${repRID}.getBag.log # deriva-download replicate RID - echo -e "LOG: fetching bagit for ${repRID} in GUDMAP" >> ${repRID}.getBag.log + echo -e "LOG: fetching bag for ${repRID} in GUDMAP" >> ${repRID}.getBag.log deriva-download-cli ${source} --catalog 2 ${derivaConfig} . rid=${repRID} echo -e "LOG: fetched" >> ${repRID}.getBag.log """ @@ -149,9 +151,9 @@ process getBag { if (inputBagForce != "") { inputBag = Channel .fromPath(inputBagForce) - .ifEmpty { exit 1, "override inputBagit file not found: ${inputBagForce}" } + .ifEmpty { exit 1, "override inputBag file not found: ${inputBagForce}" } } else { - inputBag = bagit + inputBag = bag } /* @@ -182,26 +184,36 @@ process getData { ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt echo -e "LOG: linked" >> ${repRID}.getData.log - # get bagit basename - replicate=\$(basename "\${inputBag}" | cut -d "." -f1) - echo -e "LOG: bagit replicate name \${replicate}" >> ${repRID}.getData.log + # get bag basename + replicate=\$(basename "${inputBag}" | cut -d "." -f1) + echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log - # unzip bagit - echo -e "LOG: unzipping replicate bagit" >> ${repRID}.getData.log + # unzip bag + echo -e "LOG: unzipping replicate bag" >> ${repRID}.getData.log unzip ${inputBag} echo -e "LOG: unzipped" >> ${repRID}.getData.log - # bagit fetch fastq's only and rename by repRID + # bag fetch fastq's only and rename by repRID echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log sh ${script_bdbagFetch} \${replicate} ${repRID} echo -e "LOG: fetched" >> ${repRID}.getData.log """ } -// Replicate raw fastq's for multiple process inputs -fastqs.into { - fastqs_trimData - fastqs_fastqc +// Set raw fastq to downloaded or forced input and replicate them for multiple process inputs +if (fastqsForce != "") { + Channel + .fromPath(fastqsForce) + .ifEmpty { exit 1, "override inputBag file not found: ${fastqsForce}" } + .collect().into { + fastqs_trimData + fastqs_fastqc + } +} else { + fastqs.into { + fastqs_trimData + fastqs_fastqc + } } /* -- GitLab