Commit 54b35b23 authored by Gervaise Henry's avatar Gervaise Henry 🤠
Browse files

Initial commit

parents
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# nextflow analysis folders/files
/workflow/.nextflow/
/test_data/*
/workflow/work
/workflow/output/design/
/workflow/output/bcl/
/workflow/output/fastq/
pipeline_trace*.txt*
.nextflow*.log*
report.html
*~
# CellRanger Pipeline
process {
executor = 'slurm'
queue='super'
// Process specific configuration
$checkDesignFile {
module = ['python/3.6.1-2-anaconda']
executor = 'local'
}
$untarBCL {
cpus = 32
}
$mkfastq {
module = ['cellranger/2.1.1', 'bcl2fastq/2.17.1.14']
cpus = 128
}
}
params {
// Reference file paths on BioHPC
genomes {
'ercc92' {
ref = '/project/apps_database/cellranger/refdata-cellranger-ercc92-1.2.0'
}
'GRCh38' {
ref = '/project/apps_database/cellranger/refdata-cellranger-GRCh38-1.2.0'
}
'hg19' {
ref = '/project/apps_database/cellranger/refdata-cellranger-hg19-1.2.0'
}
'mm10' {
ref = '/project/apps_database/cellranger/refdata-cellranger-mm10-1.2.0'
}
'hg19.mm10' {
ref = '/project/apps_database/cellranger/refdata-cellranger-hg19_and_mm10-1.2.0'
}
}
}
trace {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
enabled = true
file = 'timeline.html'
}
report {
enabled = true
file = 'report.html'
}
#!/usr/bin/env nextflow
// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
// Define Input variables
params.bcl = "$baseDir/../test_data/*.tar"
params.designFile = "$baseDir/../test_data/design.csv"
params.genome = 'GRCm38'
// Define List of Files
tarList = Channel.fromPath( params.bcl )
// Define regular variables
process checkDesignFile {
publishDir "$baseDir/output/design", mode: 'copy'
input:
params.designFile
output:
file("design.csv") into designPaths
script:
"""
python $baseDir/scripts/check_design.py -d $params.designFile
"""
}
process untarBCL {
tag "$tar"
publishDir "$baseDir/output/bcl", mode: 'copy'
input:
file tar from tarList
output:
file("*") into bclPaths
script:
"""
tar -xvf $tar
"""
}
process mkfastq {
tag "${bcl.baseName}"
publishDir "$baseDir/output/fastq/${bcl.baseName}", mode: 'copy'
input:
val bcl from bclPaths
file designPaths
output:
file("**/outs/fastq_path/**/*") into fastqPaths
script:
"""
cellranger mkfastq --id="${bcl.baseName}" --run=$bcl --csv=$designPaths
"""
}
profiles {
standard {
includeConfig 'conf/biohpc.config'
}
}
#!/usr/bin/env python3
'''Check if design file is correctly formatted and matches files list.'''
import argparse
import logging
import pandas as pd
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-d', '--design',
help="The design file to run QC (tsv format).",
required=True)
args = parser.parse_args()
return args
def check_design_headers(design):
'''Check if design file conforms to sequencing type.'''
# Default headers
design_template = [
'Lane',
'Sample',
'Index']
design_headers = list(design.columns.values)
# Check if headers
logger.info("Running header check.")
missing_headers = set(design_template) - set(design_headers)
if len(missing_headers) > 0:
logger.error('Missing column headers: %s', list(missing_headers))
raise Exception("Missing column headers: %s" % list(missing_headers))
return design
def main():
args = get_args()
design = args.design
# Create a file handler
handler = logging.FileHandler('design.log')
logger.addHandler(handler)
# Read files as dataframes
design_df = pd.read_csv(args.design, sep=',')
# Check design file
new_design_df = check_design_headers(design_df)
new_design_df.to_csv('design.csv', header=True, sep=',', index=False)
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment