diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index 57a83b333141e9056fa00b894bc76e43010ee341..8417664465fb8691f52e97960b6acda0b54594c8 100755 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -150,16 +150,6 @@ workflow_parameters: description: | 10x cellranger version. - - id: feature - type: select - default: 'no' - choices: - - [ 'no', 'No'] - - [ 'yes', 'Yes'] - required: true - description: | - Additional features to count (only used in cellranger version 3+, ignored otherwise). - - id: astrocyte type: select choices: diff --git a/workflow/main.test.nf b/workflow/main.test.nf deleted file mode 100644 index 581f1777764f5d67b7dc352d17bb9b3e2e350065..0000000000000000000000000000000000000000 --- a/workflow/main.test.nf +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env nextflow - -// Path to an input file, or a pattern for multiple inputs -// Note - $baseDir is the location of this workflow file main.nf - -// Define Input variables -params.fastq = "$baseDir/../test_data/*.fastq.gz" -params.designFile = "$baseDir/../test_data/design.csv" -params.genome = 'GRCh38-3.0.0' -params.genomes = [] -params.genomeLocation = params.genome ? params.genomes[ params.genome ].loc ?: false : false -params.expectCells = 10000 -params.forceCells = 0 -params.kitVersion = '3' -params.chemistry = [] -params.chemistryParam = params.kitVersion ? params.chemistry[ params.kitVersion ].param ?: false : false -params.version = '3.0.2' -params.feature = 'yes' -params.outDir = "$baseDir/output" - -// Define regular variables -designLocation = Channel - .fromPath(params.designFile) - .ifEmpty { exit 1, "design file not found: ${params.designFile}" } -fastqList = Channel - .fromPath(params.fastq) - .flatten() - .map { file -> [ file.getFileName().toString(), file.toString() ].join("\t") } - .collectFile(name: 'fileList.tsv', newLine: true) -refLocation = Channel - .fromPath(params.genomeLocation+params.genome) - .ifEmpty { exit 1, "referene not found: ${params.genome}" } -expectCells = params.expectCells -forceCells = params.forceCells -chemistryParam = params.chemistryParam -version = params.version -feature = params.feature -featurechk = feature -outDir = params.outDir - -process checkDesignFile { - - publishDir "$outDir/${task.process}", mode: 'copy' - - input: - - file designLocation - file fastqList - featurechk - - output: - - file("*.checked.csv") into designPaths - - script: - - """ - python3 $baseDir/scripts/check_design.test.py -d $designLocation -f $fastqList -t "$featurechk" - """ -} - -// Parse design file -samples = designPaths - .splitCsv (sep: ',', header: true) - .map { row -> [ row.Sample, file(row.fastq_R1), file(row.fastq_R2) ] } - .groupTuple() - //.subscribe { println it } - -// Duplicate variables -samples.into { - samples211 - samples301 - samples302 -} -refLocation.into { - refLocation211 - refLocation301 - refLocation302 -} -expectCells211 = expectCells -expectCells301 = expectCells -expectCells302 = expectCells -forceCells211 = forceCells -forceCells301 = forceCells -forceCells302 = forceCells -chemistryParam301 = chemistryParam -chemistryParam302 = chemistryParam -feature301 = feature -feature302 = feature diff --git a/workflow/scripts/check_design.test.py b/workflow/scripts/check_design.test.py deleted file mode 100755 index e08f08a2573c9a32c88ce3bdbc3860e5bd179446..0000000000000000000000000000000000000000 --- a/workflow/scripts/check_design.test.py +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env python3 - -'''Check if design file is correctly formatted and matches files list.''' - -import argparse -import logging -import pandas as pd - -EPILOG = ''' -For more details: - %(prog)s --help -''' - -# SETTINGS - -logger = logging.getLogger(__name__) -logger.addHandler(logging.NullHandler()) -logger.propagate = False -logger.setLevel(logging.INFO) - - -def get_args(): - '''Define arguments.''' - - parser = argparse.ArgumentParser( - description=__doc__, epilog=EPILOG, - formatter_class=argparse.RawDescriptionHelpFormatter) - - parser.add_argument('-d', '--design', - help="The design file to run QC (tsv format).", - required=True ) - - parser.add_argument('-f', '--fastq', - help="File with list of fastq files (tsv format).", - required=True ) - - parser.add_argument('-t', '--feature', - help="Additional features to count?", - required=True ) - - args = parser.parse_args() - return args - - -def check_design_headers_n(design): - '''Check if design file conforms to sequencing type.''' - - # Default headers - design_template = [ - 'Sample', - 'fastq_R1', - 'fastq_R2'] - - design_headers = list(design.columns.values) - - # Check if headers - logger.info("Running header check.") - - missing_headers = set(design_template) - set(design_headers) - - if len(missing_headers) > 0: - logger.error('Missing column headers: %s', list(missing_headers)) - raise Exception("Missing column headers: %s" % list(missing_headers)) - - return design - -def check_design_headers_y(design): - '''Check if design file conforms to sequencing type.''' - - # Default headers - design_template = [ - 'Sample', - 'fastq_R1', - 'fastq_R2', - 'library_type'] - - design_headers = list(design.columns.values) - - # Check if headers - logger.info("Running header check.") - - missing_headers = set(design_template) - set(design_headers) - - if len(missing_headers) > 0: - logger.error('Missing column headers: %s', list(missing_headers)) - raise Exception("Missing column headers: %s" % list(missing_headers)) - - return design - -def check_files(design, fastq): - '''Check if design file has the files found.''' - - logger.info("Running file check.") - - files = list(design['fastq_R1']) + list(design['fastq_R2']) - - files_found = fastq['name'] - - missing_files = set(files) - set(files_found) - - if len(missing_files) > 0: - logger.error('Missing files from design file: %s', list(missing_files)) - raise Exception("Missing files from design file: %s" % - list(missing_files)) - else: - file_dict = fastq.set_index('name').T.to_dict() - - design['fastq_R1'] = design['fastq_R1'].apply(lambda x: file_dict[x]['path']) - design['fastq_R2'] = design['fastq_R2'].apply(lambda x: file_dict[x]['path']) - - return design - - -def main(): - args = get_args() - design = args.design - - # Create a file handler - handler = logging.FileHandler('design.log') - logger.addHandler(handler) - - # Read files as dataframes - design_df = pd.read_csv(args.design, sep=',') - fastq_df = pd.read_csv(args.fastq, sep='\t', names=['name', 'path']) - - # Check design file - if args.feature == 'no': - new_design_df = check_design_headers_n(design_df) - else: - new_design_df = check_design_headers_y(design_df) - #new_design_df[['sample']].to_csv('library.checked.csv', header=True, sep=',', index=False) - - check_files(design_df, fastq_df) - new_design_df.drop('library_type', 1).to_csv('design.checked.csv', header=True, sep=',', index=False) - - - -if __name__ == '__main__': - main()