Skip to content
Snippets Groups Projects
Commit 9c320c12 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch 'develop' into 'dnanexus'

# Conflicts:
#   nextflow.config
#   rna-seq.nf
parents 53c18086 a566d140
Branches
Tags
2 merge requests!76Develop,!70Dnanexus
Pipeline #9264 failed with stages
in 5 minutes and 57 seconds
{
"bag": {
"bag_name": "Execution_Run_{rid}",
"bag_algorithms": [
"md5"
],
"bag_archiver": "zip",
"bag_metadata": {}
},
"catalog": {
"catalog_id": "2",
"query_processors": [
{
"processor": "csv",
"processor_params": {
"output_path": "Execution_Run",
"query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/RID,Replicate_RID:=Replicate,Workflow_RID:=Workflow,Reference_Genone_RID:=Reference_Genome,Input_Bag_RID:=Input_Bag,Notes,Execution_Status,Execution_Status_Detail,RCT,RMT?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Workflow",
"query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Workflow?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Reference_Genome",
"query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Reference_Genome?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Input_Bag",
"query_path": "/entity/M:=RNASeq:Execution_Run/RID=17-BPAG/RNASeq:Input_Bag?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "mRNA_QC",
"query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/(RID)=(RNASeq:mRNA_QC:Execution_Run)/RID,Execution_Run_RID:=Execution_Run,Replicate_RID:=Replicate,Paired_End,Strandedness,Median_Read_Length,Raw_Count,Final_Count,Notes,RCT,RMT?limit=none"
}
},
{
"processor": "fetch",
"processor_params": {
"output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Output_Files",
"query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/(RID)=(RNASeq:Processed_File:Execution_Run)/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
}
},
{
"processor": "fetch",
"processor_params": {
"output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}/Execution_Run/{Execution_Run_RID}/Input_Bag",
"query_path": "/attribute/M:=RNASeq:Execution_Run/RID=17-BPAG/R:=RNASeq:Replicate/$M/RNASeq:Input_Bag/url:=File_URL,length:=File_Bytes,filename:=File_Name,md5:=File_MD5,Execution_Run_RID:=M:RID,Study_RID:=R:Study_RID,Experiment_RID:=R:Experiment_RID,Replicate_RID:=R:RID?limit=none"
}
}
]
}
}
\ No newline at end of file
{
"bag": {
"bag_name": "{rid}_inputBag",
"bag_algorithms": [
"md5"
],
"bag_archiver": "zip"
},
"catalog": {
"query_processors": [
{
"processor": "csv",
"processor_params": {
"output_path": "Study",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Study_RID)=(RNASeq:Study:RID)/Study_RID:=RID,Internal_ID,Title,Summary,Overall_Design,GEO_Series_Accession_ID,GEO_Platform_Accession_ID,Funding,Pubmed_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Experiment",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Experiment_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Experiment Antibodies",
"query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Antibodies:Experiment_RID)?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Experiment Custom Metadata",
"query_path": "/entity/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Custom_Metadata:Experiment_RID)?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Experiment Settings",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Strandedness,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Replicate",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/RID,Study_RID,Experiment_RID,Biological_Replicate_Number,Technical_Replicate_Number,Specimen_RID,Collection_Date,Mapped_Reads,GEO_Sample_Accession_ID,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Specimen",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/S:=(Specimen_RID)=(Gene_Expression:Specimen:RID)/T:=left(Stage_ID)=(Vocabulary:Developmental_Stage:ID)/$S/RID,Title,Species,Stage_ID,Stage_Name:=T:Name,Stage_Detail,Assay_Type,Strain,Wild_Type,Sex,Passage,Phenotype,Cell_Line,Parent_Specimen,Upload_Notes,Preparation,Fixation,Embedding,Internal_ID,Principal_Investigator,Consortium,Release_Date,RCT,RMT,GUDMAP2_Accession_ID?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Specimen_Anatomical_Source",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Tissue:Specimen_RID)/RID,Specimen_RID,Tissue,RCT,RMT?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Specimen_Cell_Types",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Specimen_RID)=(Gene_Expression:Specimen:RID)/(RID)=(Gene_Expression:Specimen_Cell_Type:Specimen)/RID,Specimen_RID:=Specimen,Cell_Type,RCT,RMT?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "Single Cell Metrics",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:Single_Cell_Metrics:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Reads_%28Millions%29,Reads%2FCell,Detected_Gene_Count,Genes%2FCell,UMI%2FCell,Estimated_Cell_Count,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
}
},
{
"processor": "csv",
"processor_params": {
"output_path": "File",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/RID,Study_RID,Experiment_RID,Replicate_RID,Caption,File_Type,File_Name,URI,File_size,MD5,GEO_Archival_URL,dbGaP_Accession_ID,Processed,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT,Legacy_File_RID,GUDMAP_NGF_OID,GUDMAP_NGS_OID?limit=none"
}
},
{
"processor": "fetch",
"processor_params": {
"output_path": "assets/Study/{Study_RID}/Experiment/{Experiment_RID}/Replicate/{Replicate_RID}",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(RID)=(RNASeq:File:Replicate_RID)/File_Type=FastQ/File_Name::ciregexp::%5B_.%5DR%5B12%5D%5C.fastq%5C.gz/url:=URI,length:=File_size,filename:=File_Name,md5:=MD5,Study_RID,Experiment_RID,Replicate_RID?limit=none"
}
}
]
}
}
params {
refSource = "aws"
}
workDir = 's3://gudmap-rbk.output/work'
aws.client.storageEncryption = 'AES256'
aws {
region = 'us-east-2'
batch {
cliPath = '/home/ec2-user/miniconda/bin/aws'
}
}
process {
executor = 'awsbatch'
cpus = 1
memory = '1 GB'
withName:trackStart {
cpus = 1
memory = '1 GB'
}
withName:getBag {
cpus = 1
memory = '1 GB'
}
withName:getData {
cpus = 1
memory = '1 GB'
}
withName:parseMetadata {
cpus = 15
memory = '1 GB'
}
withName:trimData {
cpus = 20
memory = '2 GB'
}
withName:getRefInfer {
cpus = 1
memory = '1 GB'
}
withName:downsampleData {
cpus = 1
memory = '1 GB'
}
withName:alignSampleData {
cpus = 50
memory = '5 GB'
}
withName:inferMetadata {
cpus = 5
memory = '1 GB'
}
withName:checkMetadata {
cpus = 1
memory = '1 GB'
}
withName:getRef {
cpus = 1
memory = '1 GB'
}
withName:alignData {
cpus = 50
memory = '10 GB'
}
withName:dedupData {
cpus = 5
memory = '20 GB'
}
withName:countData {
cpus = 2
memory = '5 GB'
}
withName:makeBigWig {
cpus = 15
memory = '5 GB'
}
withName:fastqc {
cpus = 1
memory = '1 GB'
}
withName:dataQC {
cpus = 15
memory = '2 GB'
}
withName:aggrQC {
cpus = 2
memory = '1 GB'
}
withName:uploadInputBag {
cpus = 1
memory = '1 GB'
}
withName:uploadExecutionRun {
cpus = 1
memory = '1 GB'
}
withName:uploadQC {
cpus = 1
memory = '1 GB'
}
withName:uploadProcessedFile {
cpus = 1
memory = '1 GB'
}
withName:uploadOutputBag {
cpus = 1
memory = '1 GB'
}
withName:finalizeExecutionRun {
cpus = 1
memory = '1 GB'
}
withName:failPreExecutionRun {
cpus = 1
memory = '1 GB'
}
withName:failExecutionRun {
cpus = 1
memory = '1 GB'
}
withName:uploadQC_fail {
cpus = 1
memory = '1 GB'
}
}
params {
refSource = "biohpc"
}
process {
executor = 'slurm'
queue = 'super'
clusterOptions = '--hold'
time = '4h'
errorStrategy = 'retry'
maxRetries = 1
withName:trackStart {
executor = 'local'
}
withName:getBag {
executor = 'local'
}
withName:getData {
queue = 'super'
}
withName:parseMetadata {
executor = 'local'
}
withName:trimData {
queue = 'super'
}
withName:getRefInfer {
queue = 'super'
}
withName:downsampleData {
executor = 'local'
}
withName:alignSampleData {
queue = '128GB,256GB,256GBv1,384GB'
}
withName:inferMetadata {
queue = 'super'
}
withName:checkMetadata {
executor = 'local'
}
withName:getRef {
queue = 'super'
}
withName:alignData {
queue = '256GB,256GBv1'
}
withName:dedupData {
queue = 'super'
}
withName:countData {
queue = 'super'
}
withName:makeBigWig {
queue = 'super'
}
withName:fastqc {
queue = 'super'
}
withName:dataQC {
queue = 'super'
}
withName:aggrQC {
executor = 'local'
}
withName:uploadInputBag {
executor = 'local'
}
withName:uploadExecutionRun {
executor = 'local'
}
withName:uploadQC {
executor = 'local'
}
withName:uploadProcessedFile {
executor = 'local'
}
withName:uploadOutputBag {
executor = 'local'
}
withName:finalizeExecutionRun {
executor = 'local'
}
withName:failPreExecutionRun {
executor = 'local'
}
withName:failExecutionRun {
executor = 'local'
}
withName:uploadQC_fail {
executor = 'local'
}
}
singularity {
enabled = true
cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/'
}
env {
http_proxy = 'http://proxy.swmed.edu:3128'
https_proxy = 'http://proxy.swmed.edu:3128'
all_proxy = 'http://proxy.swmed.edu:3128'
}
process {
executor = 'slurm'
queue = '256GB,256GBv1,384GB,128GB'
clusterOptions = '--hold'
}
singularity {
enabled = true
cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/'
}
env {
http_proxy = 'http://proxy.swmed.edu:3128'
https_proxy = 'http://proxy.swmed.edu:3128'
all_proxy = 'http://proxy.swmed.edu:3128'
}
custom_logo: './bicf_logo.png'
custom_logo_url: 'https/utsouthwestern.edu/labs/bioinformatics/'
custom_logo_title: 'Bioinformatics Core Facility'
report_header_info:
- Contact Email: 'bicf@utsouthwestern.edu'
- Application Type: 'RNA-Seq Analytic Pipeline for GUDMAP/RBK'
- Department: 'Bioinformatic Core Facility, Department of Bioinformatics, University of Texas Southwestern Medical Center'
title: RNA-Seq Analytic Pipeline for GUDMAP/RBK
report_comment: >
This report has been generated by the <a href="https://doi.org/10.5281/zenodo.3625056">GUDMAP/RBK RNA-Seq Pipeline</a>
top_modules:
- fastqc:
name: 'Raw'
info: 'Replicate Raw fastq QC Results'
- cutadapt:
name: 'Trim'
info: 'Replicate Trim Adapter QC Results'
- hisat2:
name: 'Align'
info: 'Replicate Alignment QC Results'
path_filters:
- '*alignSummary*'
- picard:
name: 'Dedup'
info: 'Replicate Alignement Deduplication QC Results'
- rseqc:
name: 'Inner Distance'
info: 'Replicate Paired End Inner Distance Distribution Results'
path_filters:
- '*insertSize*'
- custom_content
- featureCounts:
name: 'Count'
info: 'Replicate Feature Count QC Results'
- hisat2:
name: 'Inference: Align'
info: 'Inference Alignment (1M downsampled reads) QC Results'
path_filters:
- '*alignSampleSummary*'
- rseqc:
name: 'Inference: Stranded'
info: '1M Downsampled Reads Strandedness Inference Results'
path_filters:
- '*infer_experiment*'
report_section_order:
run:
order: 4000
rid:
order: 3000
meta:
order: 2000
ref:
order: 1000
software_versions:
order: -1000
software_references:
order: -2000
skip_generalstats: true
custom_data:
run:
file_format: 'tsv'
section_name: 'Run'
description: 'This is the run information'
plot_type: 'table'
pconfig:
id: 'run'
scale: false
format: '{}'
headers:
Session:
description: ''
Session ID:
description: 'Nextflow session ID'
Pipeline Version:
description: 'BICF pipeline version'
Input:
description: 'Input overrides'
rid:
file_format: 'tsv'
section_name: 'RID'
description: 'This is the identifying RIDs'
plot_type: 'table'
pconfig:
id: 'rid'
scale: false
format: '{}'
headers:
Replicate:
description: ''
Replicate RID:
description: 'Replicate RID'
Experiment RID:
description: 'Experiment RID'
Study RID:
description: 'Study RID'
meta:
file_format: 'tsv'
section_name: 'Metadata'
description: 'This is the comparison of infered metadata, submitter provided, and calculated'
plot_type: 'table'
pconfig:
id: 'meta'
scale: false
format: '{:,.0f}'
headers:
Source:
description: 'Metadata source'
Species:
description: 'Species'
Ends:
description: 'Single or paired end sequencing'
Stranded:
description: 'Stranded (forward/reverse) or unstranded library prep'
Spike-in:
description: 'ERCC spike in'
Raw Reads:
description: 'Number of reads of the sequencer'
Assigned Reads:
description: 'Final reads after fintering'
Median Read Length:
description: 'Average read length'
Median TIN:
description: 'Average transcript integrity number'
ref:
file_format: 'tsv'
section_name: 'Reference'
description: 'This is the reference version information'
plot_type: 'table'
pconfig:
id: 'ref'
scale: false
format: '{}'
headers:
Species:
description: 'Reference species'
Genome Reference Consortium Build:
description: 'Reference source build'
Genome Reference Consortium Patch:
description: 'Reference source patch version'
GENCODE Annotation Release:
description: 'Annotation release version'
tin:
file_format: 'tsv'
section_name: 'TIN'
description: 'This is the distribution of TIN values calculated by the tool RSeQC'
plot_type: 'bargraph'
pconfig:
id: 'tin'
headers:
chrom
1 - 10
11 - 20
21 - 30
31 - 40
41 - 50
51 - 60
61 - 70
71 - 80
81 - 90
91 - 100
sp:
run:
fn: "run.tsv"
rid:
fn: 'rid.tsv'
meta:
fn: 'metadata.tsv'
ref:
fn: 'reference.tsv'
tin:
fn: '*_tin.hist.tsv'
process {
queue = 'highpriority-0ef8afb0-c7ad-11ea-b907-06c94a3c6390'
}
process {
queue = 'default-0ef8afb0-c7ad-11ea-b907-06c94a3c6390'
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment