From f888e173865eb9a78c2d881e8cfcd69514b8d29f Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Tue, 29 Sep 2020 14:53:31 -0500 Subject: [PATCH] update designfile RNASeq --- design_file/check_design.py | 0 design_file/check_designfile.pl | 81 ++++++++++++++++++++++++++++++++ design_file/check_inputfiles.sh | 26 ++++++++++ design_file/checkdesignfile.sh | 10 ++++ design_file/experiment_design.py | 0 5 files changed, 117 insertions(+) mode change 100644 => 100755 design_file/check_design.py create mode 100755 design_file/check_designfile.pl create mode 100755 design_file/check_inputfiles.sh create mode 100755 design_file/checkdesignfile.sh mode change 100644 => 100755 design_file/experiment_design.py diff --git a/design_file/check_design.py b/design_file/check_design.py old mode 100644 new mode 100755 diff --git a/design_file/check_designfile.pl b/design_file/check_designfile.pl new file mode 100755 index 0000000..e2ae6bc --- /dev/null +++ b/design_file/check_designfile.pl @@ -0,0 +1,81 @@ +#!/usr/bin/perl -w +#check_designfile.pl + +use strict; +use warnings; + +my $pe = shift @ARGV; +my $dfile = shift @ARGV; +open OUT, ">design.valid.txt" or die $!; +open DFILE, "<$dfile" or die $!; +my $head = <DFILE>; +chomp($head); +$head =~ s/FullPathTo//g; +my @colnames = split(/\t/,$head); +my %newcols = map {$_=> 1} @colnames; + +unless (grep(/FqR1/,@colnames)) { + die "Missing Sequence Files in Design File: FqR1\n"; +} +unless (grep(/SampleID/,@colnames)) { + die "Missing SampleID in Design File\n"; +} + +if ($pe eq 'pe') { + unless (grep(/FqR2/,@colnames)) { + die "Missing Sequence Files in Design File: FqR2\n"; + } +}else { + delete $newcols{FqR2}; +} + +my @cols = sort {$a cmp $b} keys %newcols; +print OUT join("\t",@cols),"\n"; +my @grp = ('a','b'); +my $lnct = 0; +while (my $line = <DFILE>) { + chomp($line); + $line =~ s/FullPathTo//g; + my @row = split(/\t/,$line); + my %hash; + foreach my $i (0..$#row) { + next unless ($newcols{$colnames[$i]}); + $row[$i] =~ s/-//g unless ($colnames[$i] =~ m/Fq/); + $hash{$colnames[$i]} = $row[$i]; + } + if ($hash{SampleID} =~ m/^\d/) { + $hash{SampleID} =~ s/^/S/; + } + $hash{SampleName} = $hash{SampleID} unless ($hash{SampleName}); + $hash{SubjectID} = $hash{SampleID} unless ($hash{SubjectID}); + unless ($hash{SampleGroup}) { + my $j = $lnct %% 2; + $hash{SampleGroup} = $grp[$j]; + } + $lnct ++; + $hash{SampleGroup} =~ s/_//g; + next unless ( -f $hash{FqR1}); + if ($hash{FqR1} =~ m/gz/) { + system(qq{mv $hash{FqR1} $hash{SampleID}.R1.fastq.gz}); + }else { + system(qq{mv $hash{FqR1} $hash{SampleID}.R1.fastq}); + system(qq{pigz -f $hash{SampleID}.R1.fastq}); + } + $hash{FqR1} = "$hash{SampleID}.R1.fastq.gz"; + if ($pe eq 'pe') { + next unless (-f $hash{FqR2}); + if ($hash{FqR2} =~ m/gz/) { + system(qq{mv $hash{FqR2} $hash{SampleID}.R2.fastq.gz}); + }else { + system(qq{mv $hash{FqR2} $hash{SampleID}.R2.fastq}); + system(qq{pigz -f $hash{SampleID}.R2.fastq}); + } + $hash{FqR2} = "$hash{SampleID}.R2.fastq.gz"; + } + my @line; + foreach my $f (@cols) { + push @line, $hash{$f}; + } + print OUT join("\t",@line),"\n"; + print join(",",$hash{SampleID},"$hash{SampleID}.R1.fastq.gz","$hash{SampleID}.R2.fastq.gz"),"\n"; +} diff --git a/design_file/check_inputfiles.sh b/design_file/check_inputfiles.sh new file mode 100755 index 0000000..31422e7 --- /dev/null +++ b/design_file/check_inputfiles.sh @@ -0,0 +1,26 @@ +#!/bin/bash +#check_inputfiles.sh + +fqs=`ls *.f*` + +for i in $fqs; +do + if [[ ${i} == *.fq ]]; + then + new_name=`echo ${i} | sed -e "s/.fq\$/_good.fastq/"`; + mv ${i} ${new_name}; + `pigz -f ${new_name}`; + elif [[ ${i} == *.fastq ]]; + then + new_name=`echo ${i} | sed -e "s/.fastq\$/_good.fastq/"`; + mv ${i} ${new_name}; + `pigz -f ${new_name}`; + elif [[ ${i} == *.fq.gz ]]; + then + new_name=`echo ${i} | sed -e "s/.fq.gz\$/_good.fastq.gz/"`; + mv ${i} ${new_name}; + else + new_name=`echo ${i} | sed -e "s/.fastq.gz\$/_good.fastq.gz/"`; + mv ${i} ${new_name}; + fi; +done diff --git a/design_file/checkdesignfile.sh b/design_file/checkdesignfile.sh new file mode 100755 index 0000000..8be80a4 --- /dev/null +++ b/design_file/checkdesignfile.sh @@ -0,0 +1,10 @@ +#!/bin/bash +#check_inputfiles.sh + +baseDir="`dirname \"$0\"`" + +rpair=$1 +design=$2 + +perl -p -e 's/\\r\\n*/\\n/g' $design > design.fix.txt +perl $baseDir/check_designfile.pl ${rpair} design.fix.txt diff --git a/design_file/experiment_design.py b/design_file/experiment_design.py old mode 100644 new mode 100755 -- GitLab