From 4bd19e37f556149c92eed1465072782d9b30a9ed Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Fri, 19 Feb 2021 17:22:22 -0600 Subject: [PATCH] Move non nextflow configs back to workflow and symlink files --- nextflow.config | 16 +- {conf => nextflowConf}/.gitkeep | 0 .../Execution_Run_For_Output_Bag.json | 0 .../Replicate_For_Input_Bag.json | 0 {conf => nextflowConf}/aws.config | 0 {conf => nextflowConf}/bdbag.json | 0 {conf => nextflowConf}/biohpc.config | 0 {conf => nextflowConf}/biohpc_local.config | 0 {conf => nextflowConf}/biohpc_max.config | 0 {conf => nextflowConf}/dnanexus.config | 0 {conf => nextflowConf}/local.config | 0 {conf => nextflowConf}/multiqc_config.yaml | 0 {conf => nextflowConf}/ondemand.config | 0 {conf => nextflowConf}/spot.config | 0 workflow/conf/.gitkeep | 0 .../conf/Execution_Run_For_Output_Bag.json | 64 +++++++ workflow/conf/Replicate_For_Input_Bag.json | 97 ++++++++++ workflow/conf/bdbag.json | 28 +++ workflow/conf/multiqc_config.yaml | 180 ++++++++++++++++++ workflow/nextflow.config | 1 + workflow/nextflowConf | 1 + workflow/rna-seq.nf | 1 + 22 files changed, 380 insertions(+), 8 deletions(-) rename {conf => nextflowConf}/.gitkeep (100%) rename {conf => nextflowConf}/Execution_Run_For_Output_Bag.json (100%) rename {conf => nextflowConf}/Replicate_For_Input_Bag.json (100%) rename {conf => nextflowConf}/aws.config (100%) rename {conf => nextflowConf}/bdbag.json (100%) rename {conf => nextflowConf}/biohpc.config (100%) rename {conf => nextflowConf}/biohpc_local.config (100%) rename {conf => nextflowConf}/biohpc_max.config (100%) rename {conf => nextflowConf}/dnanexus.config (100%) rename {conf => nextflowConf}/local.config (100%) rename {conf => nextflowConf}/multiqc_config.yaml (100%) rename {conf => nextflowConf}/ondemand.config (100%) rename {conf => nextflowConf}/spot.config (100%) create mode 100644 workflow/conf/.gitkeep create mode 100755 workflow/conf/Execution_Run_For_Output_Bag.json create mode 100644 workflow/conf/Replicate_For_Input_Bag.json create mode 100644 workflow/conf/bdbag.json create mode 100644 workflow/conf/multiqc_config.yaml create mode 120000 workflow/nextflow.config create mode 120000 workflow/nextflowConf create mode 120000 workflow/rna-seq.nf diff --git a/nextflow.config b/nextflow.config index 927e605..288fc9d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,23 +1,23 @@ profiles { standard { - includeConfig 'conf/biohpc.config' + includeConfig 'nextflowConf/biohpc.config' } biohpc { - includeConfig 'conf/biohpc.config' + includeConfig 'nextflowConf/biohpc.config' } biohpc_max { - includeConfig 'conf/biohpc_max.config' + includeConfig 'nextflowConf/biohpc_max.config' } aws_ondemand { - includeConfig 'conf/aws.config' - includeConfig 'conf/ondemand.config' + includeConfig 'nextflowConf/aws.config' + includeConfig 'nextflowConf/ondemand.config' } aws_spot { - includeConfig 'conf/aws.config' - includeConfig 'conf/spot.config' + includeConfig 'nextflowConf/aws.config' + includeConfig 'nextflowConf/spot.config' } dnanexus { - includeConfig 'conf/dnanexus.config' + includeConfig 'nextflowConf/dnanexus.config' } } diff --git a/conf/.gitkeep b/nextflowConf/.gitkeep similarity index 100% rename from conf/.gitkeep rename to nextflowConf/.gitkeep diff --git a/conf/Execution_Run_For_Output_Bag.json b/nextflowConf/Execution_Run_For_Output_Bag.json similarity index 100% rename from conf/Execution_Run_For_Output_Bag.json rename to nextflowConf/Execution_Run_For_Output_Bag.json diff --git a/conf/Replicate_For_Input_Bag.json b/nextflowConf/Replicate_For_Input_Bag.json similarity index 100% rename from conf/Replicate_For_Input_Bag.json rename to nextflowConf/Replicate_For_Input_Bag.json diff --git a/conf/aws.config b/nextflowConf/aws.config similarity index 100% rename from conf/aws.config rename to nextflowConf/aws.config diff --git a/conf/bdbag.json b/nextflowConf/bdbag.json similarity index 100% rename from conf/bdbag.json rename to nextflowConf/bdbag.json diff --git a/conf/biohpc.config b/nextflowConf/biohpc.config similarity index 100% rename from conf/biohpc.config rename to nextflowConf/biohpc.config diff --git a/conf/biohpc_local.config b/nextflowConf/biohpc_local.config similarity index 100% rename from conf/biohpc_local.config rename to nextflowConf/biohpc_local.config diff --git a/conf/biohpc_max.config b/nextflowConf/biohpc_max.config similarity index 100% rename from conf/biohpc_max.config rename to nextflowConf/biohpc_max.config diff --git a/conf/dnanexus.config b/nextflowConf/dnanexus.config similarity index 100% rename from conf/dnanexus.config rename to nextflowConf/dnanexus.config diff --git a/conf/local.config b/nextflowConf/local.config similarity index 100% rename from conf/local.config rename to nextflowConf/local.config diff --git a/conf/multiqc_config.yaml b/nextflowConf/multiqc_config.yaml similarity index 100% rename from conf/multiqc_config.yaml rename to nextflowConf/multiqc_config.yaml diff --git a/conf/ondemand.config b/nextflowConf/ondemand.config similarity index 100% rename from conf/ondemand.config rename to nextflowConf/ondemand.config diff --git a/conf/spot.config b/nextflowConf/spot.config similarity index 100% rename from conf/spot.config rename to nextflowConf/spot.config diff --git a/workflow/conf/.gitkeep b/workflow/conf/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/workflow/conf/Execution_Run_For_Output_Bag.json b/workflow/conf/Execution_Run_For_Output_Bag.json new file mode 100755 index 0000000..5945b1e --- /dev/null +++ b/workflow/conf/Execution_Run_For_Output_Bag.json @@ -0,0 +1,64 @@ +{ + "bag": { + "bag_name": "Execution_Run_{rid}", + "bag_algorithms": [ + "md5" + ], + "bag_archiver": "zip", + "bag_metadata": {} + }, + "catalog": { + "catalog_id": "2", + "query_processors": [ + { + "processor": "csv", + "processor_params": { + "output_path": "Execution_Run", + "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/RID,Replicate_RID:=Replicate,Workflow_RID:=Workflow,Reference_Genone_RID:=Reference_Genome,Input_Bag_RID:=Input_Bag,Notes,Execution_Status,Execution_Status_Detail,RCT,RMT?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Workflow", + "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Workflow?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Reference_Genome", + "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Reference_Genome?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Input_Bag", + "query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Input_Bag?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "mRNA_QC", + "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/(RID)=(RNASeq:mRNA_QC:Execution_Run)/RID,Execution_Run_RID:=Execution_Run,Replicate_RID:=Replicate,Paired_End,Strandedness,Median_Read_Length,Raw_Count,Final_Count,Notes,RCT,RMT?limit=none" + } + }, + { + "processor": "fetch", + "processor_params": { + "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Output_Files", + "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/(RID)=(RNASeq:Processed_File:Execution_Run)/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none" + } + }, + { + "processor": "fetch", + "processor_params": { + "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Input_Bag", + "query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/RNASeq:Input_Bag/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none" + } + } + ] + } +} \ No newline at end of file diff --git a/workflow/conf/Replicate_For_Input_Bag.json b/workflow/conf/Replicate_For_Input_Bag.json new file mode 100644 index 0000000..508a024 --- /dev/null +++ b/workflow/conf/Replicate_For_Input_Bag.json @@ -0,0 +1,97 @@ +{ + "bag": { + "bag_name": "{rid}_inputBag", + "bag_algorithms": [ + "md5" + ], + "bag_archiver": "zip" + }, + "catalog": { + "query_processors": [ + { + "processor": "csv", + "processor_params": { + "output_path": "Study", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Study_RID)=(RNASeq:Study:RID)/Study_RID:=RID,Internal_ID,Title,Summary,Overall_Design,GEO_Series_Accession_ID,GEO_Platform_Accession_ID,Funding,Pubmed_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Experiment", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Experiment_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Experiment Antibodies", + "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Antibodies:Experiment_RID)?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Experiment Custom Metadata", + "query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Custom_Metadata:Experiment_RID)?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Experiment Settings", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Strandedness,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Replicate", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/RID,Study_RID,Experiment_RID,Biological_Replicate_Number,Technical_Replicate_Number,Specimen_RID,Collection_Date,Mapped_Reads,GEO_Sample_Accession_ID,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Specimen", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/S:=(Specimen_RID)=(Gene_Expression:Specimen:RID)/T:=left(Stage_ID)=(Vocabulary:Developmental_Stage:ID)/$S/RID,Title,Species,Stage_ID,Stage_Name:=T:Name,Stage_Detail,Assay_Type,Strain,Wild_Type,Sex,Passage,Phenotype,Cell_Line,Parent_Specimen,Upload_Notes,Preparation,Fixation,Embedding,Internal_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT,GUDMAP2_Accession_ID?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Specimen_Anatomical_Source", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Tissue:Specimen_RID)/RID,Specimen_RID,Tissue,RCT,RMT?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Specimen_Cell_Types", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Cell_Type:Specimen)/RID,Specimen_RID:=Specimen,Cell_Type,RCT,RMT?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "Single Cell Metrics", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:Single_Cell_Metrics:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Reads_%28Millions%29,Reads%2FCell,Detected_Gene_Count,Genes%2FCell,UMI%2FCell,Estimated_Cell_Count,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none" + } + }, + { + "processor": "csv", + "processor_params": { + "output_path": "File", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Caption,File_Type,File_Name,URI,File_size,MD5,GEO_Archival_URL,dbGaP_Accession_ID,Processed,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT,Legacy_File_RID,GUDMAP_NGF_OID,GUDMAP_NGS_OID?limit=none" + } + }, + { + "processor": "fetch", + "processor_params": { + "output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}", + "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/File_Type=FastQ/File_Name::ciregexp::%5B_.%5DR%5B12%5D%5C.fastq%5C.gz/url:=URI,length:=File_size,filename:=File_Name,md5:=MD5,Study_RID,Experiment_RID,Replicate_RID?limit=none" + } + } + ] + } +} diff --git a/workflow/conf/bdbag.json b/workflow/conf/bdbag.json new file mode 100644 index 0000000..2c2ab24 --- /dev/null +++ b/workflow/conf/bdbag.json @@ -0,0 +1,28 @@ +{ + "fetch_config": { + "http": { + "http_cookies": { + "file_names": [ + "*cookies.txt" + ], + "scan_for_cookie_files": true, + "search_paths": [ + "." + ], + "search_paths_filter": "*cookies.txt" + } + }, + "https": { + "http_cookies": { + "file_names": [ + "*cookies.txt" + ], + "scan_for_cookie_files": true, + "search_paths": [ + "." + ], + "search_paths_filter": "*cookies.txt" + } + } + } +} diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml new file mode 100644 index 0000000..ed1375a --- /dev/null +++ b/workflow/conf/multiqc_config.yaml @@ -0,0 +1,180 @@ +custom_logo: './bicf_logo.png' +custom_logo_url: 'https/utsouthwestern.edu/labs/bioinformatics/' +custom_logo_title: 'Bioinformatics Core Facility' + +report_header_info: + - Contact Email: 'bicf@utsouthwestern.edu' + - Application Type: 'RNA-Seq Analytic Pipeline for GUDMAP/RBK' + - Department: 'Bioinformatic Core Facility, Department of Bioinformatics, University of Texas Southwestern Medical Center' + +title: RNA-Seq Analytic Pipeline for GUDMAP/RBK + +report_comment: > + This report has been generated by the <a href="https://doi.org/10.5281/zenodo.3625056">GUDMAP/RBK RNA-Seq Pipeline</a> + +top_modules: + - fastqc: + name: 'Raw' + info: 'Replicate Raw fastq QC Results' + - cutadapt: + name: 'Trim' + info: 'Replicate Trim Adapter QC Results' + - hisat2: + name: 'Align' + info: 'Replicate Alignment QC Results' + path_filters: + - '*alignSummary*' + - picard: + name: 'Dedup' + info: 'Replicate Alignement Deduplication QC Results' + - rseqc: + name: 'Inner Distance' + info: 'Replicate Paired End Inner Distance Distribution Results' + path_filters: + - '*insertSize*' + - custom_content + - featureCounts: + name: 'Count' + info: 'Replicate Feature Count QC Results' + - hisat2: + name: 'Inference: Align' + info: 'Inference Alignment (1M downsampled reads) QC Results' + path_filters: + - '*alignSampleSummary*' + - rseqc: + name: 'Inference: Stranded' + info: '1M Downsampled Reads Strandedness Inference Results' + path_filters: + - '*infer_experiment*' + +report_section_order: + run: + order: 4000 + rid: + order: 3000 + meta: + order: 2000 + ref: + order: 1000 + software_versions: + order: -1000 + software_references: + order: -2000 + +skip_generalstats: true + +custom_data: + run: + file_format: 'tsv' + section_name: 'Run' + description: 'This is the run information' + plot_type: 'table' + pconfig: + id: 'run' + scale: false + format: '{}' + headers: + Session: + description: '' + Session ID: + description: 'Nextflow session ID' + Pipeline Version: + description: 'BICF pipeline version' + Input: + description: 'Input overrides' + rid: + file_format: 'tsv' + section_name: 'RID' + description: 'This is the identifying RIDs' + plot_type: 'table' + pconfig: + id: 'rid' + scale: false + format: '{}' + headers: + Replicate: + description: '' + Replicate RID: + description: 'Replicate RID' + Experiment RID: + description: 'Experiment RID' + Study RID: + description: 'Study RID' + meta: + file_format: 'tsv' + section_name: 'Metadata' + description: 'This is the comparison of infered metadata, submitter provided, and calculated' + plot_type: 'table' + pconfig: + id: 'meta' + scale: false + format: '{:,.0f}' + headers: + Source: + description: 'Metadata source' + Species: + description: 'Species' + Ends: + description: 'Single or paired end sequencing' + Stranded: + description: 'Stranded (forward/reverse) or unstranded library prep' + Spike-in: + description: 'ERCC spike in' + Raw Reads: + description: 'Number of reads of the sequencer' + Assigned Reads: + description: 'Final reads after fintering' + Median Read Length: + description: 'Average read length' + Median TIN: + description: 'Average transcript integrity number' + + ref: + file_format: 'tsv' + section_name: 'Reference' + description: 'This is the reference version information' + plot_type: 'table' + pconfig: + id: 'ref' + scale: false + format: '{}' + headers: + Species: + description: 'Reference species' + Genome Reference Consortium Build: + description: 'Reference source build' + Genome Reference Consortium Patch: + description: 'Reference source patch version' + GENCODE Annotation Release: + description: 'Annotation release version' + tin: + file_format: 'tsv' + section_name: 'TIN' + description: 'This is the distribution of TIN values calculated by the tool RSeQC' + plot_type: 'bargraph' + pconfig: + id: 'tin' + headers: + chrom + 1 - 10 + 11 - 20 + 21 - 30 + 31 - 40 + 41 - 50 + 51 - 60 + 61 - 70 + 71 - 80 + 81 - 90 + 91 - 100 + +sp: + run: + fn: "run.tsv" + rid: + fn: 'rid.tsv' + meta: + fn: 'metadata.tsv' + ref: + fn: 'reference.tsv' + tin: + fn: '*_tin.hist.tsv' diff --git a/workflow/nextflow.config b/workflow/nextflow.config new file mode 120000 index 0000000..2984cee --- /dev/null +++ b/workflow/nextflow.config @@ -0,0 +1 @@ +../nextflow.config \ No newline at end of file diff --git a/workflow/nextflowConf b/workflow/nextflowConf new file mode 120000 index 0000000..8e5c4cf --- /dev/null +++ b/workflow/nextflowConf @@ -0,0 +1 @@ +../nextflowConf/ \ No newline at end of file diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf new file mode 120000 index 0000000..e5aa8d1 --- /dev/null +++ b/workflow/rna-seq.nf @@ -0,0 +1 @@ +../rna-seq.nf \ No newline at end of file -- GitLab