diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9483d1bbfe08d08895fccccc908116131b595be3..95ca346e1cee0996c2c85feca8d08d6d79f9ba1f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,5 @@ before_script: - - module add python/3.6.1-2-anaconda + - module add python/3.6.4-anaconda - pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1 - module load singularity/3.0.2 - module load nextflow/19.09.0 @@ -22,8 +22,8 @@ getData: stage: unit script: - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt - - unzip ./test_data/bagit/Replicate_16-1ZX4 - - singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' sh ./workflow/scripts/bdbagFetch.sh Replicate_16-1ZX4 16-1ZX4 + - unzip ./test_data/bagit/Study_Q-Y4H0.zip + - singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' bash ./workflow/scripts/bdbagFetch.sh Study_Q-Y4H0 Q-Y4H0 TEST - pytest -m getData parseMetadata: @@ -53,14 +53,14 @@ trimData: alignData: stage: unit script: - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p `nproc` --add-chrname --un-gz Q-Y5JA_1M.se.unal.gz -S Q-Y5JA_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p `nproc` --add-chrname --un-gz Q-Y5JA_1M.se.unal.gz -S Q-Y5JA_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz --summary-file ${repRID}.alignSummary.txt --new-summary - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o Q-Y5JA_1M.se.bam Q-Y5JA_1M.se.sam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ `nproc` -O BAM -o Q-Y5JA_1M.se.sorted.bam Q-Y5JA_1M.se.bam - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ `nproc` -b Q-Y5JA_1M.se.sorted.bam Q-Y5JA_1M.se.sorted.bai - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p `nproc` --add-chrname --un-gz Q-Y5JA_1M.pe.unal.gz -S Q-Y5JA_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5JA_1M_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5JA_1M_R2_val_2.fq.gz + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ `nproc` -b Q-Y5JA_1M.se.sorted.bam Q-Y5JA_1M.se.sorted.bam.bai + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p `nproc` --add-chrname --un-gz Q-Y5JA_1M.pe.unal.gz -S Q-Y5JA_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5JA_1M_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5JA_1M_R2_val_2.fq.gz --summary-file ${repRID}.alignSummary.txt --new-summary - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o Q-Y5JA_1M.pe.bam Q-Y5JA_1M.pe.sam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ `nproc` -O BAM -o Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.bam - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ `nproc` -b Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.sorted.bai + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ `nproc` -b Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.sorted.bam.bai - pytest -m alignData dedupData: @@ -68,7 +68,7 @@ dedupData: script: - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ `nproc` -O BAM -o Q-Y5JA_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5JA_1M.se.deduped.bam - - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ `nproc` -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam Q-Y5JA_1M.se.sorted.deduped.bai + - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ `nproc` -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam Q-Y5JA_1M.se.sorted.deduped.bam.bai - pytest -m dedupData makeBigWig: @@ -90,6 +90,12 @@ fastqc: - singularity run 'docker://bicf/fastqc:2.0.0' ./test_data/fastq/small/Q-Y5JA_1M.R1.fastq.gz -o . - pytest -m fastqc +inferMetadata: + stage: unit + script: + - singularity run 'docker://bicf/rseqc3.0:2.0.0' tin.py -i ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed + - pytest -m inferMetadata + integration_se: stage: integration script: diff --git a/docs/RNA-Seq Pipeline Design Flowchart.drawio b/docs/RNA-Seq Pipeline Design Flowchart.drawio index fcb00628bd6bdddddf2fbd8745bb0a726666d5cd..015897afb0156920a9d1d7ae3bc739f545cd9dda 100644 --- a/docs/RNA-Seq Pipeline Design Flowchart.drawio +++ b/docs/RNA-Seq Pipeline Design Flowchart.drawio @@ -1 +1 @@ -<mxfile host="Electron" modified="2020-02-18T03:32:33.287Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/12.6.5 Chrome/80.0.3987.86 Electron/8.0.0 Safari/537.36" etag="Xgv5JCNctLe5mDh8TcIL" version="12.6.5" type="device"><diagram name="Page-1" id="74e2e168-ea6b-b213-b513-2b3c1d86103e">7V1bd5u4Fv41Weuch3RxN35skt5mmmnSnnbavnTJINs0GFzATdxff8RFXCSZi8GAiPswE4QEeO9vX7UlXcjXm6c3Htiub10T2heSYD5dyDcXkiQKuob+F7bs4xZdTBpWnmUmnbKGT9YfiEcmrTvLhH6hY+C6dmBti42G6zjQCAptwPPcx2K3pWsX37oFK0g1fDKATbf+a5nBOmkVBSG78RZaq3Xyal1NbiyA8bDy3J2TvO9CkpfRv/j2BuBnJf39NTDdx1yT/OpCvvZcN4j/2jxdQzukLSZbPO71gbvpd3vQCeoMmM2gZsxnS3GumUDX5cvkCb+BvYP4J2g2etbV0kWPRF8c7BMqab92Lr5x6Uc8fIk6iML2KbuJ/lqF/78FloNuvnSAvQ8sw8dPRZ8WPzjuhpv9LXAOv+wxIX34OiXkiRANRXCwXS9u9lYL8J/ojnSN/sv867/pwOipS7Cx7H08fOM6LvoGAxa7ZL+S+SMvJJUUChXRPWyNWJpeYT6oESdQy034d/hhakhwFfGrqq+Y9sXsPuoxUvaYmOfpHROau210YwE2Wa+YJWkvP/Dch0RIwsawk3y1DjbRb78Ro0uE8m08ZvO0ConzYmm7j8YaeMEL0zV2G/T58cDHtRXATzHhUfdH1Dl+Qkj4sEV4IalRy9Ky7euI3/hb5KVuQMOI+0efRdxf6KoSCqoacQr/augF8KkWXdNbKUnfQHcDA28fdc8/JM8EYZ+7IeRvPKZkS27m7q1jjKc3tdw94Gftq9wnkDyPLlPG5xuLcEz6UbhNRTQWRwxyqSCZkv9obWzgoKuY7yHX5Stjbdnme7B3d6Ei8gOkGPEV4g7ifKL1I/mKNCYMdVJ4lSrF8MIGC2hfpWo1Zql847jRC/Pwi98byunrRJJvvkDPBA5ImpM36ugS2NbKQX8biMHQu8jDKfxNsqlC3VQuikCK7ujSQta0lA55VYt1Z4yorClRvRlYhOSulliBxEqK2Co85myOhm3OOmdvND1pBImhy1CQKfuLlPc1db9E6f4VDK7AqibHKa52QdODBqqayAlV2URl0FTrgKZX374/3cs/fkg//ty7u9nPH4+qdSkJc4qE0EQOR3LpesHaXbnINL7KWgmZ+LnbbHF/4BmoJRv13nW3CQ9+wiDYJzwAu8Atcgg+WcHX5IHh39/Cv1/M1OTy5il372aPLxxEg6/5i3iYii+zYdFVYdwd9CxExFDEYtEmWS9E/+qxPsGn7+48IyHjw82H7/76zT/318u3Czv4fXf7yr5UE48ReAi/cb/7Hz/luzd/f/ny7p8V+GvlBr/unEtRlOOeITNKoeRBGwTW76J3yMJJNPSl54F9rsPWtZzAzz35LmzI1IBUVAOSqBZ9toruc4FAaPz+DK/pD6kF4TKSVquGDLUh4HLmPDHmBTyChe/auwC+9IwEs1FrdqVQKl46lV7Byq+28pYJ5a0w9AxLd6eNbfQMk0kYzi3UTBul8kLNqRWxpk7J1Mi3ghZh6xRaC3aoNkQcaeb0RpkwdKc12nGdDtk8uLUtAwRh9PLx3Q2FioLRJqUrdtaRqw6cHbDfOdvIZWNJclHW/cTya0URFvsRv7rS14WRLzcmbD7ErvTCw170AqysgHasKG7kWYUZUxJDsRmVsCaKnXrhRkrmPthBfT3NCCTPNyAAnDqy+C4RLnTi2CZv+wiNADirMLJv8DqZ8baZTLwN2Mj9c5AQXIXqwqc428w7YTrYoqoNavkGc6ePsofYS6iyh1pNcyiLXdvDCXjRB3A6O+N0OJxKE8CpPOsh2tNqG1Buwz3sI3Ac7mE859i0BH7wqyuvcrOzA+sS+5YH2HsSUmtFyDNIreun8SVLDVyO0OjBwGR6lHzTmuW4n4rW1McrFJnRb/Xh7UFa8+S9y6TeHzwtLart09Jj90aOcjyUmo7HjHY8mKkBaTYFx0PowfGYNdQA3LofWNXx4X6UoppRKrKzyXoO28It/hYaFszXf2T3WL2tB1izL3RMB/rlT0aN+Y8j4IR4EVCcTWaa8zBImnLYYkEvZK5lAPtlMte8sUwzUqEuuhOaftS2Rm3Q6QcztZNyJ5lcpmeQAhQj8p+VU/TR2XVNmrxd7y7LoNY09jpt7Jn9FGUCxr6XOWW9vkbg1s5jrceHnS8FNMGnDWIHNas1yvxDWx4MGhMzU3Ef4ZJvo6kp4zOa0w+GuzOaeLotZzTZVGWUVBwg/0hiZKmZ2SS69zKJNBPOSB0SqfoEkNr1NFIpTauNF7f+HTbPDdLAA/p3BwDNT9kguiC1Qq+lhAxtwvaZhbrapPPaiXa+vkBJrAeXEFEkWgm1gg56kd+Vgx+mdBfAh8e79o3FTy2KH0P6mG7n6WSPTqK+tT6BcDWUwPTyhw2l2qq7PkOpcuudI3hEZ2ERxrMd4brrcsy2ZGfNoPdXjklDPFp9xX/qVxdGF8XOzqnf+tYcF+NVWfM5bcyZM2Ln1US1YTqsy/m8YTqfAEz7iV/19uW6/K3bHAKRI0n+tYKkovQwZzZv4Etxm1TBDiMfk2ZlrkCOT9EmGiONL9oSvNflXmU2rUBva7L07jWeK9XYDYu/fhkRQ0Aw3SqttrxVWFEki7fKyYJIVaVo3qN3jv/OeStHJoSLfo90klphkXaFypcHVxYQaaPwhUSVUDliwbup7C/qKgHEk6yn089IbYHU0tUulUAdh9OOgVkXqGT/foA6bz9n32kkORh0uwSqVBOo45iwp4A6bwZUldzKkeivzNv170kQ2vsWz1kQymcRKyVhHKm/2byZyib794JUSWifo37OSC2tJeYEqBoZd1cAlexfpYL1hiqe7N+TIOD1kDwKwrGgP7xRoQn8dZpVPU42StfTceLOKDNCNqrcjdkg2OXZ3TgWuwWEttpxsw12e6p3Gwa8SoUhILMhTfv3Ixztd806C0eHin0cTs+pZYNyYpr271o2LCt40u/uvc1f+s8/1oedqWiMYxCi7fD5r1mb97uJYI3XMTcRVIm39bGJ4GxkvsJwZUo08MqEpJCfYPVjzHywFeUo9B8X9XMjs9xThuqBmacJYLWnIrr2c3T8YXUgDSoKU9ChXVfSlZG0lofHSyVdqSvLcTWdxObVRXI0FemP91HlRbArOXqKwa7k0Kne2DV8LV6qhJ59cdhJGDyGArESeRyo6nK08jh8raZIi+MGPMAra/WvxevZXviuSHoap01nULkoWa/JXYl4X7cJDfDPHfjs3MHt/uv/Fp9vlc/z7eay/bTd2L3skoiQcICLwCsVk8p8LmO/eWY/nQ3dMXveXWcvyuhXTx3x4nqXa16OfW96vfzigOHgxcyfhlXD+92MbeoNJD1TOPlIGt8mq7pE0XT6aa2aKdhSeFYvlWAUibE7TiEJ28+EgT6yYsbniNZxLOzhYspgZBWHzxGt41jd03IiQe0hnMEkreV2cR3OyBOYSmDkLiNuoSbD/027yVwHNa0ZNnxQI9LZgmC7OXOLxa0RpJrpbQGW24czu5jsqj2Rc7IgVsLp7LE4WoOtceqlEBjbnrzztTJF/f7PJ/Fa+vzqrbTwPc2GOHM/9BYASsPlT9SAfsrY8ZqCM4h7ATFj+oMrEFcsux4KxCNL0EwcxIwlGTyBWJmNE8Qjq6CfDIhLy3B4AbEoNNTE5ICq5dGiorYc0LWYMNlBT9PttrYLzAnM0ylDnutUhv16xOYlO1cKKz6Sc8yKpUF3he7MWFwKLwSxaDBE/fjkf2kl0/GLUmnbcbhYp39T0XBTL2qAONM7VeRMuD7PbRJPiUrW8quDizf6RuW84ZaIZP+unQsmJtsvDDxjkrm1MCeYrPCpB8Fk+130z5istN6Hvd/hA71K6819oGcBY//2w+Mn8YP4wTf+/v4kWneMQC89MbkgDtqvnRu2Ll0nuIxnr16iDqqwfcpuksugws7HP+XO2kLbcsKlDTfQDxc3ScJrPMNGn+ucW3sVv5dqXlStyMpHTslqKgMJQihnh5dZeejb/oBF9KhQphLooeeqVxfqTfgsJPh+Fn7RMtY4LCJ2rGAdEacwoiK5g6CImebR6RAW093fAqcOBkQmBu6vc/yLH3WAhewkhLG2bPM92Lu7kAR+AIwHfEUnKIr63F8DM1o8F17YYAHtKzR6FXUiVugVI2gx+XGvwcayQw59gZ4JHJA0J2/UWSDrIvBunhHRCDiJjJSIorNSIvqpAEVXVETHths12X6S3FM58qvJ3c/yr3md5V8zxvtOu/qLnYyXpNZu10SKKJtk48skpjIjrzNCBVa/cWz0xUHdOr346ICq4iVzW6qQ+cjcliG6hk054QG2nawBPwWDBj/2VkoLxEdiDLiYmS11Smqc+CLUNAeiOo5dUbk4SiM923gqUD6MyJapp6OgXKbdBwMync8h1oMqpIMdf2oyKoNk36cgVeWl5IYbxpP9u5Y4piJTJMq4W84SercwAObANQtt48Y0TO91G5G5XiOOVMcSR54X4x3hO5RKUqXvoDJch3In4xxLVqkxTNPaaoyXmLJcafMRVJaDO8eyw0bnhIFlXzQfPE4UFTrjwndiGBO6333C+DLwI6va5sPAl8tPjeJtxjGbB3pKZxNfF8rnfdHHDWV5AlDuZw8Ula4emdq8R+Zt8OGklqtnglHCT991uvJSB9gL9zQM63VDiXIdlN9/MgwCwnKYQWOKE5GctStEz5NP7QtAR5Wx72Xyqc0p2GVz6UPPMRGnhalVc0xE/5lekYHX2g6QhT5msbBQnmWinUwcOBCbUYlTmhQcWCp0ct6oQirI/jOtAuS63HJAT1IxsjKFTBK+5e6M5ajh2itYap/OOpJChDmxjHhWcUSkOJ+3HNATvCV+4V3z3NQOaxZqb+aBz5sZT32CJBArWiSymrurAgVy6cxkRGWq++GIDeXiFP4Ro55nzBuHkKeu4M1tDjsvxACpytshz99uPKAnoRhZKcakhCJN5HEiFpJIbgpVIRbUAP7Egrm8uP35omMQgab+Ei3ITFQe51IxJKEkfO7dIIizhgaBHDCrWi9MxctNB7REPrr03HC1ZtY9TDLfuiYMe/wf</diagram></mxfile> \ No newline at end of file +<mxfile host="Electron" modified="2020-03-13T19:34:08.134Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/12.6.5 Chrome/80.0.3987.86 Electron/8.0.0 Safari/537.36" etag="VDlQzySqUssiwsoi2TLi" version="12.6.5" type="device"><diagram name="Page-1" id="74e2e168-ea6b-b213-b513-2b3c1d86103e">7V1bd5u4Fv41Xuuch3RxN35skl5mppkm09POdF66ZJBtGgwu4Cburz8SIAySzCVc5SQPrRES2Ht/+6otaaZebR/fBWC3ufFt6M4UyX6cqdczRZEl00D/4ZZD0mLKacM6cOy007Hhk/MLkpFp696xYVjoGPm+Gzm7YqPlex60okIbCAL/odht5bvFt+7AGjINnyzgsq1/O3a0SVtlSTreeA+d9SZ9tamnN5bAul8H/t5L3zdT1FX8l9zeAvKstH+4Abb/kGtS38zUq8D3o+TT9vEKupi2hGzJuLcn7mbfO4BeVGfAfA4NazFfyQvDBqapXqRP+AncPSQ/wXDRsy5XPnok+sbRIaWS8WPvkxsXYczD16iDLO0ejzfRpzX+/wY4Hrr52gPuIXKskDwVfbXkwUk30hzugHf6ZQ8p6fHrNMwTKR6K4OD6QdIcrJfgP/Ed5Qr9y/3032xg/NQV2DruIRm+9T0ffQcLFrscfyX3R84UnRYKHdEdt8Ysza4IH/SYE6jlGn/GX0zHBNcRv6r6yllfwu4nPUY5PibheXbHhvZ+F99Ygu2xV8KSrFcYBf59KiS4EXdSLzfRNv7t13J8iVC+S8ZsH9eYOK9Wrv9gbUAQvbJ9a79FXz8Z+LBxIvgpITzq/oA6J0/AhMct0itFj1tWjutexfwm30VdmRa0rKR//LWo+0tT17Cg6jGnyK+GQQQfa9E1u5WR9B30tzAKDnH3/EPyTJAOuRtS/sZDRrb0Zu7eJsF4dtPI3QPhsX2d+wo0z+PLjPH5xiIc034MbjMRTcSRgFwpSKYSPjhbF3joKuE75rp6aW0c1/4ADv4eK6IwQoqRXCHuIM6nWj+Wr1hjQqyT8FWmFPGFC5bQvczUasJS9drz4xfm4Ze8F8vp21SSr7/AwAYeSJvTN5roErjO2kOfLcRgGMzycMK/SbV1aNrarAik+I6pLFXDyOiQV7VEdyaIOjalqvcIFim9a6RWILWSMrEKDzmbYxCbs8nZG8NMG0Fq6I4oOCr7Wcb7mrpfYXT/GkaXYF2T4wxXu6DpSQNVTeSUqnyicmhqdEDTy6//Pt6p374p337d+fv5928PunOhSAuGhNBGDkd66QfRxl/7yDS+ObZSMvF9v92R/iCwUMtx1Aff36U8+A6j6JDyAOwjv8gh+OhE/6QPxJ+/4s+v5np6ef2Yu3d9IBceosE/+YtkmE4uj8Piq8K4Wxg4iIhYxBLRplkvxX/1WJ/iM/T3gZWS8f7647/h5t2fd1er90s3+nl788a90FOPEQQIv0m/u2/f1dt3f3z58tufa/D72o9+3HoXsqwmPTEzSqEUQBdEzs+id8jDSTz0dRCAQ67Dzne8KMw9+RY3HNWAUlQDiqwXfbaK7guJQmjy/iNesx9SC8JlJK1WDUfUYsDlzHlqzAt4BMvQd/cRfB1YKWbj1uOVxqh4pS+9QpRfbeWtUspb4+gZnu7OGtvoGS6TCJxbqJk2SuWVnlMrck2dclQjXwtahK9TWC3YodqQSaSZ0xtlwtCd1mjHdTZkC+DOdSwQ4ejlr9+uGVQUjDYtXYmzjlx14O2B+5u3i102niQXZT1MLb9RFGF5GPGrK31dGPlyY8LnQ+JKLwPiRS/B2olYx4rhRp5VhDElMRSfUSlr4thpEG5kZB6CHcy3ZxmB5PkaREBQR5bcpcKFThzb9G1/QSsC3hpH9g1ep3LeNleptwEXuX8eEoJLrC5ChrPNvBOugy3rxqiWbzR3+kn2kHgJVfbQqGkOVblre3gGXvQJnM5fcDoeTpUzwKk6HyDaM2obUGHDPeIjCBzuETzn2LQCYfSjK69yu3cj54L4lifY2wupjSLkOaQ2zX58yVIDlyM0ejCwuR6l2LTmOe590Zr58hpDZvRbQ3hzktYiee8qrfdHT0vLevu09NS9kSc5HlpNx2POOh7c1IAyPwfHQxrA8Zg31ADCuh9E1YnhfpSimlMqsncLbDpVKKLsHmNqsWUUyWNcp9Vjwh20HJivNMEP7PgVzj3s8wXQsz0Y9vsbogBgIap8D2rEjD1RBYBwHDFSkc7S50UobcrJJU9ssWA4FnBfp/P0W8e2Y/PjozvYbUJtG9QGvVlaInDrh07k+IVJffKUD1SH7GmkDMCFq7TqyHK8NWq4iJVBrmhAVoYR7NqZ014qANhpvggF8uKnTjVzcs6XoZy989VdKkiv6ZGZrEfG7adpZ+CRdZ2yvFsePlt6+MVbrJf3mw+3fx/eOBdz7exhytfyFDT6Ayif7Kp4eFSNASIEs76FEjY4IFZYjOCgVMFSfNoidjBT4ZNMWrblwZCJtBINcjI8Y0OIpOWHFbMBRILGAifYVofbdVnLy0dzxUvrw0Xnzs38BVesAInkoBva9Bz088+Oduegk/qLnP/Dpyqnxu4E+SeSNFWauURU90GqCubSC1LHRKp5Bkjtuq6glKbVxktY352Y5wbzggU+DOu7nwC0OHXk6ILWCoPWlnO0CT8ekupqk86L6drFcRIjsQFcQUSReGnsGnroRSHrez6xjhmF8UsQwqeHbY3FTy+KH0f6uG5nf7LHzqq9dz4BvDxW4nr544bJbdXdkGFyufXOETyms7TEuYqOcN11fX5bstcOYXup82EhHs/DiT/NZEqTi2Lnyqi2XKzYgFRnV1nzBWvMuSUSL8tLa8N0XJfzecN0cQYwHSZ+Nduv3xBvIf8YiJxI8q8VJDVtgPnQRQNfStikCnEYxZgQLXMFcnyKd1WaaHzRluCDrv8ts2kFejtnS+9B47lSjf0sp5t75a3GiyL7mm4+sVBEZ2g+oHdOPue8lScmhIt+j9LL4hGZdYXK94uoLFY0JuELyTqlcuSCd1PZXzZ1Coi9LLA2X5DaAqmlyx8rgToNp50Asy5Q6f7DAHXRfs6+00hyNOh2CVSlJlCnMWHPAHXRDKg6vbcv1V9btOs/kCC09y2esyCUzyJWSsI0Un/zRTOVTfcfBKmK1D5H/ZyRWlonLghQDTrurgAq3b9KBZsNVTzdfyBBaL8maDxBeCroT+9ca4Nwk2VVnyYbpQusBXFntDklG1XuxnwU7IrsbjwVuwWEttqCuQ12B6p3Gwe8WoUhoLMhTfsPIxztt1F8EY4OFfs0nJ6+ZYNxYpr271o2HCd6NG/vgu3v5vdfzse9rRmcc3Hi81HEr1lbDLurbI3XcXeV1am3DbGr7HxivsJ4ZUos8MqEpJCf4PXjzHzwFeUk9J8Q9XMTs9znDNUTM09ngNWBiujaz9GJh9WRNKgsnYMO7bqSroyktTw8USrpSl1ZgavpFD6vZulZhbQ/Pk6VVy/EH7+yLlMpz77UqxcGT6Hcq0S6JlND2Qvxx6+jlFnh2oJ7eOms/3ZEPYiR3JVpL6DfVAOTJ1LNmtxVqPd1m2wAf96Cz94t3B3++d/y8432ebHbXrSfUpu6B1wzWiuVisrUKucsEG6/aUyZTSuzUEbQeupIFLe4XPMK7Beza9mXJwzHEEabYlV6PDeHVenB3MOxanwvmnOmiIWk5xyOqVOmt9myqTA0Pf+UUxuDq9ady5Q5BVz8jueQIB3E5BKS1lINQptc9QxSUZxoOebWTLmMdlt0wwp/npn5bc22Qc0vv9SFxIZTsQijFfMOUvFChCRvJda2bN79+iRfKZ/fvFeWYWC4kITBY6910xrW+TIDhqnXkidWoHA2IC7Ni4oCYpnajapqfREzoKr6XNb0lgO6FhMuO9hIa79zfWCfQailjbltdhn26xFbFOe1FFZi+K7cpPOom251ZiwupFeSXDQYstlNbPz0El/WVJSlV0c3FQ3XTDMD5LnZqSLnwvV57kLRISp5xWwlpTAjo3LRcMcJun/XzgUXk+3rLp85JjlbwomEyQqfehRMtt+k8JljkmO9hQr0Kq238IEe98ir9pM84uK+2XGFXbuxyZSFgG5rxVIzWaF3mmg8oGPgO8A6vP/48En+KH8MrT/+fZSdW06GIztlryAPJ46G1qXd4+lTuHHnpz/l1tlB1/FwWdY1DHGZpSK9JRMa7FmAuSrQ5L1M87KqNjSfMkjrOpkDoZmCzwB9t19gGT8KC1UKPfRc/XKmX+NnIckPj3kHVsian3lGFX6y6YDsVLR8OkDtIBvAzW+abO6G0D3cAa8OBmQuBu6ucvxLHnWChfzsm7VxXPsDOPh7TIIwAtY9uWIzc0WFHm6AHZfx4ov4nPBLNHodd6JqhYupIzn9cW/B1nExh77AwAYemBXOizV5IOsi49Q8FWhQcJI5ucDsGOxCLrCLk8S5gGJrx+KjPq2abO8l6VqO/GpyD7RKtk7p6pzzvn4rV/mzUEp7v+tMimuaTEOVSUzlVBTnGGluv2lsIDCtmpsyglarKlGmLEoVshhTFmWIrmFTBj0Yqxdyj344lpKVAU5EtQtRYFDqYtTYF1qqqdxlfRp7Jwmx4W52Atq5QPk0IjvMoNaGcpmuHg3IbHaGqkzXaHc5+arpqCMkh94rvSq9qjbcVpLu33k2lqfINIUx1Y63gsENjIA9culN2ygwC7oHXdC4MGtEhfpUosKJbRIqRlRYKkmVvoPOcR3KnYyXyLBKjRGa1lZjokSI5UpbjBCxHNw5lp02OoOGif3QfPQ4UdbY/InYaV5C6GF3LBDLwE9s8YEYBr5cfmqsQeAcxnOip/Ji4utC+WX3xGlDeRrFfyJsrqiztSDnNotx9DbEcFLL1TPFKOl76HtdeanDrxfuiWHjLxjOdFB+JxwcBODillFjip5IztsKbeDJp4kdti7E5FObs/LKZsbHnmOizhTQq+aYqP5zsyIDb7QdoEpDzGIRoXyRiXYyceLYPE5dTWlScGSpMOl5owqpoPvPjQqQm2rLAQNJxcTKFI6S8DV3ZyoHktVemVX7DKeJFCIsqNXw86pi/cWi5YCB4K2IC++apyt1WLNQe08aso/1dOoTFIlamKXQtdldFSjQK8DORlQmNuvcmX8kN5SLXvwjnSMxE14Yqcj03upqOWiZAUqVv0Of09d4QNdiwV0Q3P7AlSmIQFPTwAry040FB/hTDgxked4M+cyAedUKXyY0aDqga+RzlwR3cITb1OOA3HI0WeHCnhMPlC4X5gkFl7j1D8caRQoMRTAnB10GPl46eeyOc8Q3vg1xj/8D</diagram></mxfile> \ No newline at end of file diff --git a/docs/RNA-Seq Pipeline Design Flowchart.jpg b/docs/RNA-Seq Pipeline Design Flowchart.jpg index 99a72eeaa5abfe898e761bbb44dfcc50920c978a..fb7264cd251e7d3ed9c35971e58387e14303fdaa 100644 Binary files a/docs/RNA-Seq Pipeline Design Flowchart.jpg and b/docs/RNA-Seq Pipeline Design Flowchart.jpg differ diff --git a/docs/RNA-Seq Pipeline Design Flowchart.pdf b/docs/RNA-Seq Pipeline Design Flowchart.pdf deleted file mode 100644 index ee209a0414db1337a9212fce7597a64082fd8101..0000000000000000000000000000000000000000 Binary files a/docs/RNA-Seq Pipeline Design Flowchart.pdf and /dev/null differ diff --git a/docs/RNA-Seq Pipeline Design Process Table.docx b/docs/RNA-Seq Pipeline Design Process Table.docx index a90cccfa2a4c9d94972baacd84f0cd91740f1a97..92cc22938e92753c3e77cdfc5279806820bca1d2 100644 Binary files a/docs/RNA-Seq Pipeline Design Process Table.docx and b/docs/RNA-Seq Pipeline Design Process Table.docx differ diff --git a/docs/dag.png b/docs/dag.png index a4b6d60f4891bc2680f6d2908670442ff1e314df..55e0e4781a59ea7159802b8922d9ff2baf411a29 100644 Binary files a/docs/dag.png and b/docs/dag.png differ diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index a6f5fba4f7f6600733c0accce8d853fefb1fce63..83fac1f1092e641a25214c2af2b61745ed352a83 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -24,7 +24,10 @@ process { withName: dedupData { queue = 'super' } - withName: fastqc { + withName:fastqc { + queue = 'super' + } + withName:inferMetadata { queue = 'super' } withName: makeBigWig { diff --git a/workflow/nextflow.config b/workflow/nextflow.config index f8729b004f1637fcce49fd9c13fafc2a5111bf1a..f0be347ef28c9306abe43974d48464484f48b5c4 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -17,13 +17,13 @@ process { withName:getData { container = 'bicf/gudmaprbkfilexfer:1.3' } - withName:parseMetadata { + withName: parseMetadata { container = 'bicf/python3:1.3' } - withName:getRef { + withName: getRef { container = 'bicf/awscli:1.1' } - withName:trimData { + withName: trimData { container = 'bicf/trimgalore:1.1' } withName: alignData { @@ -32,11 +32,14 @@ process { withName: dedupData { container = 'bicf/gudmaprbkdedup:2.0.0' } + withName: makeBigWig { + container = 'bicf/deeptools3.3:2.0.0' + } withName: fastqc { container = 'bicf/fastqc:2.0.0' } - withName: makeBigWig { - container = 'bicf/deeptools3.3:2.0.0' + withName:inferMetadata{ + container = 'bicf/rseqc3.0:2.0.0' } withName: makeFeatureCounts { container = 'bicf/subread2:2.0.0' diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf old mode 100755 new mode 100644 index e4427f61a1d8074dcf71bf8d120b00a18754b8b9..0c56150ca6923ba7cea1c6da0699e92b83f2f1e6 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -39,6 +39,7 @@ referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references" script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh") script_parseMeta = Channel.fromPath("${baseDir}/scripts/parseMeta.py") script_calculateTPM = Channel.fromPath("${baseDir}/scripts/calculateTPM.R") +script_inferMeta = Channel.fromPath("${baseDir}/scripts/inferMeta.sh") /* * splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid @@ -113,7 +114,7 @@ process getData { """ } -// Split fastq's +// Replicate raw fastqs for multiple process inputs fastqs.into { fastqs_trimData fastqs_fastqc @@ -185,6 +186,7 @@ metadata.splitCsv(sep: ",", header: false).separate( spike, species ) +// Replicate metadata for multiple process inputs endsManual.into { endsManual_trimData endsManual_alignData @@ -212,9 +214,7 @@ process getRef { val species_getRef output: - path ("hisat2") type 'dir' into reference - path ("bed") type 'dir' into bedFile - tuple val ("${refRID}"), path ("genome.fna"), path ("genome.gtf") into featureCountsRef + path ("*") into reference script: """ @@ -257,6 +257,13 @@ process getRef { """ } +// Replicate reference for multiple process inputs +reference.into { + reference_alignData + reference_makeFeatureCounts + reference_inferMeta +} + /* * trimData: trims any adapter or non-host sequences from the data */ @@ -270,6 +277,7 @@ process trimData { output: path ("*.fq.gz") into fastqs_trimmed + path ("*_trimming_report.txt") into trimQC path ("${repRID}.trimData.log") path ("${repRID}.trimData.err") @@ -289,10 +297,6 @@ process trimData { """ } -reference.into { - reference_alignData -} - /* * alignData: aligns the reads to a reference database */ @@ -307,8 +311,8 @@ process alignData { path reference_alignData output: - path ("${repRID}.sorted.bam") into rawBam - path ("${repRID}.sorted.bai") into rawBai + tuple val ("${repRID}"), path ("${repRID}.sorted.bam"), path ("${repRID}.sorted.bam.bai") into rawBam + path ("*.alignSummary.txt") into alignQC path ("${repRID}.align.out") path ("${repRID}.align.err") @@ -320,19 +324,24 @@ process alignData { # align reads if [ "${endsManual_alignData}" == "se" ] then - hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} -U ${fastq[0]} 1>${repRID}.align.out 2>${repRID}.align.err + hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} -U ${fastq[0]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>${repRID}.align.out 2>${repRID}.align.err elif [ "${endsManual_alignData}" == "pe" ] then - hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} 1>${repRID}.align.out 2>${repRID}.align.err + hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>${repRID}.align.out 2>${repRID}.align.err fi # convert sam to bam and sort and index samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam 1>>${repRID}.align.out 2>>${repRID}.align.err; samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.bam ${repRID}.bam 1>>${repRID}.align.out 2>>${repRID}.align.err; - samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bai 1>>${repRID}.align.out 2>>${repRID}.align.err; + samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bam.bai 1>>${repRID}.align.out 2>>${repRID}.align.err; """ } +// Replicate rawBam for multiple process inputs +rawBam.into { + rawBam_dedupData +} + /* *dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates */ @@ -342,11 +351,11 @@ process dedupData { publishDir "${logsDir}", mode: 'copy', pattern: "*.dedup.{out,err}" input: - path rawBam + set val (repRID), path (inBam), path (inBai) from rawBam_dedupData output: - tuple val ("${repRID}"), path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bai") into dedupBam - tuple val ("${repRID}"), path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bai") into featureCountsIn + tuple val ("${repRID}"), path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam + path ("*.deduped.Metrics.txt") into dedupQC path ("${repRID}.dedup.out") path ("${repRID}.dedup.err") @@ -356,21 +365,28 @@ process dedupData { ulimit -a >>${repRID}.dedup.err # remove duplicated reads - java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err + java -jar /picard/build/libs/picard.jar MarkDuplicates I=${inBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err - samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bai 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err + samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err """ } +// Replicate dedup bam/bai for multiple process inputs +dedupBam.into { + dedupBam_makeFeatureCounts + dedupBam_makeBigWig + dedupBam_inferMeta +} + /* - *makeBigWig: make bigwig file + *Make BigWig files for output */ process makeBigWig { tag "${repRID}" publishDir "${logsDir}", mode: 'copy', pattern: "*.makeBigWig.err" input: - tuple val (repRID), path (inBam), path (inBai) from dedupBam + set val (repRID), path (inBam), path (inBai) from dedupBam_makeBigWig output: path ("${repRID}.bw") @@ -386,30 +402,32 @@ process makeBigWig { */ process makeFeatureCounts { tag "${repRID}" - publishDir "${outDir}/featureCounts", mode: 'copy', pattern: "${repRID}*.featureCounts*" + publishDir "${outDir}/featureCounts", mode: 'copy', pattern: "${repRID}*.countTable.csv" publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.makeFetureCounts.{out,err}" input: path script_calculateTPM - tuple val (repRID1), path (bam), path (bai) from featureCountsIn - tuple val (repRID2), path (genome), path (gtf) from featureCountsRef + tuple val (repRID), path (bam), path (bai) from dedupBam_makeFeatureCounts + path reference_makeFeatureCounts val endsManual_featureCounts output: - tuple val ("${repRID}"), path ("${repRID}.featureCounts.summary"), path ("${repRID}.featureCounts"), path ("${bam}.featureCounts.sam") into featureCountsOut + path ("*.countTable.csv") into counts + path ("*.featureCounts.summary") into countsQC script: """ - if [ "${endsManual_featureCounts }" == "se" ]; then - featureCounts -R SAM -p -G ${genome} -T `nproc` -s 1 -a ${gtf} -o ${repRID}.featureCounts -g 'gene_name' ${repRID}.sorted.deduped.bam; - elif [ "${endsManual_featureCounts }" == "pe" ]; then - featureCounts -R SAM -p -G ${genome} -T `nproc` -s 1 -p -a ${gtf} -o ${repRID}.featureCounts -g 'gene_name' ${repRID}.sorted.deduped.bam; - fi; + if [ "${endsManual_featureCounts }" == "se" ] + then + featureCounts -R SAM -p -G ./genome.fna -T `nproc` -s 1 -a ./genome.gtf -o ${repRID}.featureCounts -g 'gene_name' ${repRID}.sorted.deduped.bam; + elif [ "${endsManual_featureCounts }" == "pe" ] + then + featureCounts -R SAM -p -G ./genmome.fna -T `nproc` -s 1 -p -a ./genome.gtf -o ${repRID}.featureCounts -g 'gene_name' ${repRID}.sorted.deduped.bam; + fi Rscript calculateTPM.R --count "${repRID}.featureCounts"; """ } - /* *fastqc: run fastqc on untrimmed fastq's */ @@ -433,3 +451,73 @@ process fastqc { fastqc *.fastq.gz -o . >>${repRID}.fastqc.err """ } + +/* + *inferMetadata: run RSeQC to collect stats and infer experimental metadata +*/ + +process inferMetadata { + tag "${repRID}" + publishDir "${logsDir}", mode: 'copy', pattern: "*.rseqc.err" + + input: + path script_inferMeta + path reference_inferMeta + set val (repRID), path (inBam), path (inBai) from dedupBam_inferMeta + + output: + path "infer.csv" into inferedMetadata + path "${inBam.baseName}.tin.xls" into tin + path "${repRID}.insertSize.inner_distance_freq.txt" optional true into innerDistance + + + script: + """ + hostname >${repRID}.rseqc.err + ulimit -a >>${repRID}.rseqc.err + + # infer experimental setting from dedup bam + infer_experiment.py -r ./bed/genome.bed -i "${inBam}" >${repRID}.rseqc.log + + endness=`bash inferMeta.sh endness ${repRID}.rseqc.log` + fail=`bash inferMeta.sh fail ${repRID}.rseqc.log` + if [ \${endness} == "PairEnd" ] + then + percentF=`bash inferMeta.sh pef ${repRID}.rseqc.log` + percentR=`bash inferMeta.sh per ${repRID}.rseqc.log` + inner_distance.py -i "${inBam}" -o ${repRID}.insertSize -r ./bed/genome.bed + elif [ \${endness} == "SingleEnd" ] + then + percentF=`bash inferMeta.sh sef ${repRID}.rseqc.log` + percentR=`bash inferMeta.sh ser ${repRID}.rseqc.log` + fi + if [ \$percentF -gt 0.25 ] && [ \$percentR -lt 0.25 ] + then + stranded="forward" + if [ \$endness == "PairEnd" ] + then + strategy="1++,1--,2+-,2-+" + else + strategy="++,--" + fi + elif [ \$percentR -gt 0.25 ] && [ \$percentF -lt 0.25 ] + then + stranded="reverse" + if [ \$endness == "PairEnd" ] + then + strategy="1+-,1-+,2++,2--" + else + strategy="+-,-+" + fi + else + stranded="unstranded" + strategy="us" + fi + + # calcualte TIN values per feature + tin.py -i "${inBam}" -r ./bed/genome.bed + + # write infered metadata to file + echo \${endness},\${stranded},\${strategy},\${percentF},\${percentR},\${fail} > infer.csv + """ +} \ No newline at end of file diff --git a/workflow/scripts/bdbagFetch.sh b/workflow/scripts/bdbagFetch.sh index 902222a2ebb6aa7e978f0a820ad3c04472395848..fc0ffabe6de392cf220f9b0a93659ca19888a7ba 100644 --- a/workflow/scripts/bdbagFetch.sh +++ b/workflow/scripts/bdbagFetch.sh @@ -1,7 +1,14 @@ #!/bin/bash -bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz $1 -for i in $(find */ -name "*.R*.fastq.gz"); do - path=${2}$(echo ${i##*/} | grep -o "\.R.\.fastq\.gz"); - mv ${i} ./${path} -done; \ No newline at end of file +if [ -z "${3}" ] +then +bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz ${1} + for i in $(find */ -name "*.R*.fastq.gz") + do + path=${2}$(echo ${i##*/} | grep -o "\.R.\.fastq\.gz") + mv ${i} ./${path} + done +elif [ "${3}" == "TEST" ] +then + bdbag --resolve-fetch all --fetch-filter filename\$*.txt ${1} +fi \ No newline at end of file diff --git a/workflow/scripts/inferMeta.sh b/workflow/scripts/inferMeta.sh new file mode 100644 index 0000000000000000000000000000000000000000..a8c9839ae2328cbed3dd39b9f2be427be12722ba --- /dev/null +++ b/workflow/scripts/inferMeta.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +if [ "${1}" == "endness" ] +then + awk '/Data/ {print}' "${2}" | sed -e 's/^This is //' -e 's/ Data$//' +elif [ "${1}" == "fail" ] +then + awk '/Fraction of reads failed/ {print}' "${2}" | sed -e 's/^Fraction of reads failed to determine: //' +elif [ "${1}" == "sef" ] +then + awk '/\+\+,--/ {print}' "${2}" | sed -e 's/^Fraction of reads explained by "++,--": //' +elif [ "${1}" == "ser" ] +then + awk '/\+-,-\+/ {print}' "${2}" | sed -e 's/^Fraction of reads explained by "+-,-+": //' +elif [ "${1}" == "pef" ] +then + awk '/1\+\+,1--,2\+-,2-\+/ {print}' "${2}" | sed -e 's/^Fraction of reads explained by "1++,1--,2+-,2-+": //' +elif [ "${1}" == "per" ] +then + awk '/1\+-,1-\+,2\+\+,2--/ {print}' "${2}" | sed -e 's/^Fraction of reads explained by "1+-,1-+,2++,2--": //' +fi \ No newline at end of file diff --git a/workflow/tests/test_alignReads.py b/workflow/tests/test_alignReads.py index d95e63a6426e5d1cbec874b0c4e86b17388ba975..90c9052a83f0fb430d8f34386cfcf4c507f7fb6b 100644 --- a/workflow/tests/test_alignReads.py +++ b/workflow/tests/test_alignReads.py @@ -13,11 +13,11 @@ data_output_path = os.path.dirname(os.path.abspath(__file__)) + \ def test_alignData_se(): assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.se.unal.gz')) assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.se.sorted.bam')) - assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.se.sorted.bai')) + assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.se.sorted.bam.bai')) @pytest.mark.alignData def test_alignData_pe(): assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.pe.unal.gz')) assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.pe.sorted.bam')) - assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.pe.sorted.bai')) \ No newline at end of file + assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.pe.sorted.bam.bai')) diff --git a/workflow/tests/test_dedupReads.py b/workflow/tests/test_dedupReads.py index 0c0cd7aa67ab0e47168f8bf438191f6a9c217b72..5ca60ebd6c10957b0da697ee8cacf196fa75d525 100644 --- a/workflow/tests/test_dedupReads.py +++ b/workflow/tests/test_dedupReads.py @@ -11,4 +11,5 @@ data_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.dedupData def test_dedupData(): - assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.se.deduped.bam')) \ No newline at end of file + assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.se.sorted.deduped.bam')) + assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.se.sorted.deduped.bam.bai')) diff --git a/workflow/tests/test_getData.py b/workflow/tests/test_getData.py index 36d0b22155fc5ef860497bf887dc595ca368c6ad..454e3c08ea7d227f6a8840956f882fa2e0c1d61e 100644 --- a/workflow/tests/test_getData.py +++ b/workflow/tests/test_getData.py @@ -10,5 +10,5 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.getData def test_getData(): - assert os.path.exists(os.path.join(test_output_path, 'Replicate_16-1ZX4/bagit.txt')) - assert os.path.exists(os.path.join(test_output_path, '16-1ZX4.R1.fastq.gz')) \ No newline at end of file + assert os.path.exists(os.path.join(test_output_path, 'Study_Q-Y4H0/bagit.txt')) + assert os.path.exists(os.path.join(test_output_path, 'Study_Q-Y4H0/data/assets/Study/Q-Y4H0/Experiment/Q-Y4BY/Replicate/Q-Y5F8/hMARIS_SIX2+_RiboDep#1.gene.rpkm.txt')) \ No newline at end of file diff --git a/workflow/tests/test_inferMetadata.py b/workflow/tests/test_inferMetadata.py new file mode 100644 index 0000000000000000000000000000000000000000..44ffbc3f239630b357b5fde7746787fe9ca65d62 --- /dev/null +++ b/workflow/tests/test_inferMetadata.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + +import pytest +import pandas as pd +from io import StringIO +import os + +test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ + '/../../' + +@pytest.mark.inferMetadata +def test_inferMetadata(): + assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.se.sorted.deduped.tin.xls')) +