diff --git a/design_file/check_design.py b/design_file/check_design.py old mode 100644 new mode 100755 diff --git a/design_file/check_designfile.pl b/design_file/check_designfile.pl new file mode 100755 index 0000000000000000000000000000000000000000..e2ae6bc09a7e496dd4928db87a68b7a4e2b9b283 --- /dev/null +++ b/design_file/check_designfile.pl @@ -0,0 +1,81 @@ +#!/usr/bin/perl -w +#check_designfile.pl + +use strict; +use warnings; + +my $pe = shift @ARGV; +my $dfile = shift @ARGV; +open OUT, ">design.valid.txt" or die $!; +open DFILE, "<$dfile" or die $!; +my $head = <DFILE>; +chomp($head); +$head =~ s/FullPathTo//g; +my @colnames = split(/\t/,$head); +my %newcols = map {$_=> 1} @colnames; + +unless (grep(/FqR1/,@colnames)) { + die "Missing Sequence Files in Design File: FqR1\n"; +} +unless (grep(/SampleID/,@colnames)) { + die "Missing SampleID in Design File\n"; +} + +if ($pe eq 'pe') { + unless (grep(/FqR2/,@colnames)) { + die "Missing Sequence Files in Design File: FqR2\n"; + } +}else { + delete $newcols{FqR2}; +} + +my @cols = sort {$a cmp $b} keys %newcols; +print OUT join("\t",@cols),"\n"; +my @grp = ('a','b'); +my $lnct = 0; +while (my $line = <DFILE>) { + chomp($line); + $line =~ s/FullPathTo//g; + my @row = split(/\t/,$line); + my %hash; + foreach my $i (0..$#row) { + next unless ($newcols{$colnames[$i]}); + $row[$i] =~ s/-//g unless ($colnames[$i] =~ m/Fq/); + $hash{$colnames[$i]} = $row[$i]; + } + if ($hash{SampleID} =~ m/^\d/) { + $hash{SampleID} =~ s/^/S/; + } + $hash{SampleName} = $hash{SampleID} unless ($hash{SampleName}); + $hash{SubjectID} = $hash{SampleID} unless ($hash{SubjectID}); + unless ($hash{SampleGroup}) { + my $j = $lnct %% 2; + $hash{SampleGroup} = $grp[$j]; + } + $lnct ++; + $hash{SampleGroup} =~ s/_//g; + next unless ( -f $hash{FqR1}); + if ($hash{FqR1} =~ m/gz/) { + system(qq{mv $hash{FqR1} $hash{SampleID}.R1.fastq.gz}); + }else { + system(qq{mv $hash{FqR1} $hash{SampleID}.R1.fastq}); + system(qq{pigz -f $hash{SampleID}.R1.fastq}); + } + $hash{FqR1} = "$hash{SampleID}.R1.fastq.gz"; + if ($pe eq 'pe') { + next unless (-f $hash{FqR2}); + if ($hash{FqR2} =~ m/gz/) { + system(qq{mv $hash{FqR2} $hash{SampleID}.R2.fastq.gz}); + }else { + system(qq{mv $hash{FqR2} $hash{SampleID}.R2.fastq}); + system(qq{pigz -f $hash{SampleID}.R2.fastq}); + } + $hash{FqR2} = "$hash{SampleID}.R2.fastq.gz"; + } + my @line; + foreach my $f (@cols) { + push @line, $hash{$f}; + } + print OUT join("\t",@line),"\n"; + print join(",",$hash{SampleID},"$hash{SampleID}.R1.fastq.gz","$hash{SampleID}.R2.fastq.gz"),"\n"; +} diff --git a/design_file/check_inputfiles.sh b/design_file/check_inputfiles.sh new file mode 100755 index 0000000000000000000000000000000000000000..31422e755ec7406b56030575cb32581bd9b58b3a --- /dev/null +++ b/design_file/check_inputfiles.sh @@ -0,0 +1,26 @@ +#!/bin/bash +#check_inputfiles.sh + +fqs=`ls *.f*` + +for i in $fqs; +do + if [[ ${i} == *.fq ]]; + then + new_name=`echo ${i} | sed -e "s/.fq\$/_good.fastq/"`; + mv ${i} ${new_name}; + `pigz -f ${new_name}`; + elif [[ ${i} == *.fastq ]]; + then + new_name=`echo ${i} | sed -e "s/.fastq\$/_good.fastq/"`; + mv ${i} ${new_name}; + `pigz -f ${new_name}`; + elif [[ ${i} == *.fq.gz ]]; + then + new_name=`echo ${i} | sed -e "s/.fq.gz\$/_good.fastq.gz/"`; + mv ${i} ${new_name}; + else + new_name=`echo ${i} | sed -e "s/.fastq.gz\$/_good.fastq.gz/"`; + mv ${i} ${new_name}; + fi; +done diff --git a/design_file/checkdesignfile.sh b/design_file/checkdesignfile.sh new file mode 100755 index 0000000000000000000000000000000000000000..8be80a42db88b686be206fec1b2973ec4317aa29 --- /dev/null +++ b/design_file/checkdesignfile.sh @@ -0,0 +1,10 @@ +#!/bin/bash +#check_inputfiles.sh + +baseDir="`dirname \"$0\"`" + +rpair=$1 +design=$2 + +perl -p -e 's/\\r\\n*/\\n/g' $design > design.fix.txt +perl $baseDir/check_designfile.pl ${rpair} design.fix.txt diff --git a/design_file/experiment_design.py b/design_file/experiment_design.py old mode 100644 new mode 100755