Skip to content
Snippets Groups Projects
Commit e8e5d140 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

getData setup

parent 4f3e4066
Branches
Tags
1 merge request!1Resolve "process_getData"
This commit is part of merge request !1. Comments created here will be created in the context of that merge request.
......@@ -297,6 +297,7 @@ $RECYCLE.BIN/
# nextflow analysis folders/files
/test_data/*
/workflow/docker/images/*
/workflow/.nextflow/*
/workflow/work/*
/workflow/output/*
......
cleanup.sh 0 → 100644
rm *.out
rm pipeline_trace*.txt*
rm report*.html*
rm timeline*.html*
rm .nextflow*.log*
rm -r .nextflow/
rm -r work/
profiles {
standard {
includeConfig 'workflow/conf/biohpc.config'
}
}
process {
executor = 'slurm'
queue='super'
// Process specific configuration
withLabel:getData {
executor = 'super'
}
}
trace {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
enabled = true
file = 'timeline.html'
}
report {
enabled = true
file = 'report.html'
}
tower {
accessToken = '3ade8f325d4855434b49aa387421a44c63e3360f'
enabled = true
}
\ No newline at end of file
name: bdbag
dependencies:
- pandas=0.23.3=py36_0
- pip:
- bdbag==1.5.5
RUN apt-get install -y python3.7 python3-pip
RUN wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda -b && \
rm Miniconda3-latest-Linux-x86_64.sh
ENV PATH=/miniconda/bin:${PATH}
RUN conda config --add channels defaults && \
conda config --add channels bioconda && \
conda config --add channels conda-forge && \
conda update -n base -c defaults -y conda
RUN pip install --upgrade pip
#!/usr/bin/env nextflow
profiles {
standard {
includeConfig 'conf/biohpc.config'
}
}
#!/usr/bin/env nextflow
// Define input variables
params.bdbag = "${baseDir}/../test_data/Study_Q-Y4H0.zip"
params.outDir = "${baseDir}/../output"
// Parse input variables
bdbag = Channel
.fromPath(params.bdbag)
.ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
outDir = params.outDir
/*
* getData: fetch study files from consortium with downloaded bdbag.zip
* python must be loaded prior to nextflow run, because conda env create from .yml doesn't work with nextflow loaded module (either process in-line, or config file)
*/
process getData {
publishDir "${outDir}/temp/getData", mode: "symlink"
conda "${baseDir}/conf/conda.env.bdbag.yml"
input:
file bdbag
output:
file("*") into dataPaths
script:
"""
hostname
ulimit -a
unzip ${bdbag}
python3 ${baseDir}/scripts/modifyFetch.py -f \$(echo "${bdbag}" | cut -d'.' -f1)
bdbag --materialize "\$(echo "${bdbag}" | cut -d'.' -f1)"
"""
}
\ No newline at end of file
import argparse
import pandas as pd
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fetchFile',help="The fetch file from bdgap.zip.",required=True)
args = parser.parse_args()
return args
def main():
args = get_args()
fetch = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None)
fetch_filtered = fetch[fetch[2].str[-9:]==".fastq.gz"]
fetch_filtered.to_csv(args.fetchFile+"/fetch.txt",sep="\t",header=False,index=False)
if __name__ == '__main__':
main()
\ No newline at end of file
#!/bin
unzip $1
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment