Fix syntax issues and libaries.

477530aa · Venkat Malladi · ae9e6c75 · 477530aa · 477530aa · 477530aa
Commit 477530aa authored 7 years ago by Venkat Malladi
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -297,12 +297,12 @@ process poolAndPsuedoReads {

  if (pairedEnd) {
    """
-    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -t $experimentObjs -p -c cutoffRatio
+    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -p -c cutoffRatio
    """
  }
  else {
    """
-    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -t $experimentObjs -c cutoffRatio
+    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c cutoffRatio
    """
  }


--- a/workflow/scripts/pool_and_psuedoreplicate.py
+++ b/workflow/scripts/pool_and_psuedoreplicate.py
@@ -82,9 +82,9 @@ def pool(tag_files, outfile, paired):
    '''Pool files together.'''

    if paired:
-        file_extension = 'bedpe.gz'
+        file_extension = '.bedpe.gz'
    else:
-        file_extension = 'bedse.gz'
+        file_extension = '.bedse.gz'

    pooled_filename = outfile + file_extension

@@ -98,7 +98,7 @@ def pool(tag_files, outfile, paired):


 def self_psuedoreplication(tag_file, prefix, paired):
-    '''Make n number of self-psuedoreplicates equivlent to reps.'''
+    '''Make 2 self-psuedoreplicates.'''

    # Get total number of reads
    no_lines = utils.count_lines(tag_file)
@@ -124,7 +124,7 @@ def self_psuedoreplication(tag_file, prefix, paired):

    # Convert read pairs to reads into standard tagAlign file

-    for i, index in enumerate(list(range(0, reps))):
+    for i, index in enumerate([0, 1]):
        steps = ['cat %s' % (splits_prefix + index)]
        if paired:
            steps.extend([r"""awk 'BEGIN{OFS="\t"}{printf "%s\t%s\t%s\tN\t1000\t%s\n%s\t%s\t%s\tN\t1000\t%s\n",$1,$2,$3,$9,$4,$5,$6,$10}'"""])
@@ -139,7 +139,7 @@ def main():
    args = get_args()
    paired = args.paired
    design = args.design
-    cutoff_ratio = args.cutoff_ratio
+    cutoff_ratio = args.cutoff

    # Create a file handler
    handler = logging.FileHandler('experiment_generation.log')
@@ -149,7 +149,7 @@ def main():
    design_df = pd.read_csv(design, sep='\t')

    # Get current directory to build paths
-    cwd = os.getwd()
+    cwd = os.getcwd()

    # Check Number of replicates and replicates
    no_reps = check_replicates(design_df)
@@ -217,7 +217,7 @@ def main():
        # Make self psuedoreplicates equivalent to number of replicates
        pseudoreplicates_dict = {}
        for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
-            replicate_prefix = experiment_id + '_' + rep
+            replicate_prefix = experiment_id + '_' + str(rep)
            pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
            pseudoreplicates_dict[rep] = pr_dict


--- a/workflow/scripts/utils.py
+++ b/workflow/scripts/utils.py
@@ -56,19 +56,19 @@ def strip_extensions(filename, extensions):


 def count_lines(filename):
-    from magic import from_file
+    import mimetypes
    compressed_mimetypes = [
-        "application/x-compress",
-        "application/x-bzip2",
-        "application/x-gzip"
+        "compress",
+        "bzip2",
+        "gzip"
        ]
-    mime_type = from_file(fname, mime=True)
+    mime_type = mimetypes.guess_type(filename)[1]
    if mime_type in compressed_mimetypes:
        catcommand = 'gzip -dc'
    else:
        catcommand = 'cat'
    out, err = run_pipe([
-        '%s %s' % (catcommand, fname),
+        '%s %s' % (catcommand, filename),
        'wc -l'
        ])
    return int(out)