/*
 * Copyright (c) 2016. The University of Texas Southwestern Medical Center
 *
 *   This file is part of the BioHPC Workflow Platform
 *
 * This is a minimal test workflow package that astrocyte tests can run against.
 *
 * @authors
 * David Trudgian <David.Trudgian@UTSouthwestern.edu>
 *
 */

// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
params.story = "$baseDir/../test_data/mobydick.txt"


// To accept multiple filenames, separated with commas, we use a glob pattern
// in curly brackets.
stories = Channel.fromPath( "{" + params.story + ",}" )

process uppercase {

    cpus 1

    input:
    file story from stories

    output:
    file "${story.name}.uppercase" into uppercased

    """
    cat "$story" | tr "[A-Z]" "[a-z]" > "${story.name}.uppercase"
    """

}

process tolines {

    cpus 1

    input:
    file uppercase from uppercased

    output:
    file "${uppercase.name}.tolines" into tolines

    """
    cat "$uppercase" | tr -cs "a-z'" "\012"  > "${uppercase.name}.tolines"
    """

}

process wordcounts {

    // Publish the outputs we create here into the workflow output directory
    publishDir "$baseDir/output", mode: 'copy'

    input:
    file wordlines from tolines

    output:
    file "${wordlines.name}.wordcount"

    """
    cat "$wordlines" | sort | uniq -c | sort -n -r > "${wordlines.name}.wordcount"
    """
}