diff --git a/workflow/main.nf b/workflow/main.nf index 98ebe1096609291b490ced628828f615313c50aa..6c560aa0b04be2bd9721495b1e0afca7b7554f22 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -297,12 +297,12 @@ process poolAndPsuedoReads { if (pairedEnd) { """ - python3 $baseDir/scripts/pool_and_psuedoreplicate.py -t $experimentObjs -p -c cutoffRatio + python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -p -c cutoffRatio """ } else { """ - python3 $baseDir/scripts/pool_and_psuedoreplicate.py -t $experimentObjs -c cutoffRatio + python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c cutoffRatio """ } diff --git a/workflow/scripts/pool_and_psuedoreplicate.py b/workflow/scripts/pool_and_psuedoreplicate.py index 716779754179d54f56fd23c25c8f5701bd387b08..06fb8e80fec85e3d01a15ce61353676d3013ca1f 100644 --- a/workflow/scripts/pool_and_psuedoreplicate.py +++ b/workflow/scripts/pool_and_psuedoreplicate.py @@ -82,9 +82,9 @@ def pool(tag_files, outfile, paired): '''Pool files together.''' if paired: - file_extension = 'bedpe.gz' + file_extension = '.bedpe.gz' else: - file_extension = 'bedse.gz' + file_extension = '.bedse.gz' pooled_filename = outfile + file_extension @@ -98,7 +98,7 @@ def pool(tag_files, outfile, paired): def self_psuedoreplication(tag_file, prefix, paired): - '''Make n number of self-psuedoreplicates equivlent to reps.''' + '''Make 2 self-psuedoreplicates.''' # Get total number of reads no_lines = utils.count_lines(tag_file) @@ -124,7 +124,7 @@ def self_psuedoreplication(tag_file, prefix, paired): # Convert read pairs to reads into standard tagAlign file - for i, index in enumerate(list(range(0, reps))): + for i, index in enumerate([0, 1]): steps = ['cat %s' % (splits_prefix + index)] if paired: steps.extend([r"""awk 'BEGIN{OFS="\t"}{printf "%s\t%s\t%s\tN\t1000\t%s\n%s\t%s\t%s\tN\t1000\t%s\n",$1,$2,$3,$9,$4,$5,$6,$10}'"""]) @@ -139,7 +139,7 @@ def main(): args = get_args() paired = args.paired design = args.design - cutoff_ratio = args.cutoff_ratio + cutoff_ratio = args.cutoff # Create a file handler handler = logging.FileHandler('experiment_generation.log') @@ -149,7 +149,7 @@ def main(): design_df = pd.read_csv(design, sep='\t') # Get current directory to build paths - cwd = os.getwd() + cwd = os.getcwd() # Check Number of replicates and replicates no_reps = check_replicates(design_df) @@ -217,7 +217,7 @@ def main(): # Make self psuedoreplicates equivalent to number of replicates pseudoreplicates_dict = {} for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']): - replicate_prefix = experiment_id + '_' + rep + replicate_prefix = experiment_id + '_' + str(rep) pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired) pseudoreplicates_dict[rep] = pr_dict diff --git a/workflow/scripts/utils.py b/workflow/scripts/utils.py index 11c4170a00153f4c2b20dce03706e529231cb294..2643c6e409938d30453d1bc93359df81651de5fe 100644 --- a/workflow/scripts/utils.py +++ b/workflow/scripts/utils.py @@ -56,19 +56,19 @@ def strip_extensions(filename, extensions): def count_lines(filename): - from magic import from_file + import mimetypes compressed_mimetypes = [ - "application/x-compress", - "application/x-bzip2", - "application/x-gzip" + "compress", + "bzip2", + "gzip" ] - mime_type = from_file(fname, mime=True) + mime_type = mimetypes.guess_type(filename)[1] if mime_type in compressed_mimetypes: catcommand = 'gzip -dc' else: catcommand = 'cat' out, err = run_pipe([ - '%s %s' % (catcommand, fname), + '%s %s' % (catcommand, filename), 'wc -l' ]) return int(out)