Skip to content
Snippets Groups Projects
Commit ec163e1a authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '27-FragSizeDist' into 'master'

Resolve "Add fragment size distribution"

Closes #27

See merge request !24
parents 28833353 781b8eb6
1 merge request!24Resolve "Add fragment size distribution"
Pipeline #7105 passed with stages
in 8 hours, 8 minutes, and 42 seconds
......@@ -22,6 +22,7 @@ All notable changes to this project will be documented in this file.
- Added percentage of reads in mitochondria (unfiltered and dedup)
- Added FRiP score (per sample: reads in replicated peaks/total reads per)
- Added TSS enrichment
- Added Fragment/Insert Length size
## [publish_1.0.0 ] - 2019-12-03
Initial release of pipeline
......
......@@ -87,6 +87,9 @@ experimentQC | *.FRiPscore.tsv | File containing FRiP score
experimentQC | *.TSSenrichment.tsv | File containing TSS enrichment
experimentQC | *_large_tss-enrich.pdf | TSS Enrichment heatmap and metagene plot
experimentQC | *_tss-enrich.pdf | TSS Enrichment metagene plot
experimentQC | *.fragment_length_linear.pdf | Paired-end only, fragment/insert size densities, linear
experimentQC | *.fragment_length_linear.pdf | Paired-end only, log10 fragment/insert size densities
experimentQC | *.fragment_length_count.txt | Paired-end only, count and fragment length, raw data
multiqcReport | multiqc_report.html | Quality control report of percent mitochondria, NRF, PBC1, PBC2, NSC, and RSC. Also contains software versions and references to cite.
......@@ -96,7 +99,7 @@ multiqcReport | multiqc_report.html | Quality control report of percent mitochon
2. crossReads/*cc.plot.pdf: make sure your sample data has the correct signal intensity and location. See [HERE](https://ccg.epfl.ch//var/sib_april15/cases/landt12/strand_correlation.html) for more details.
3. filterReads/sample/*.pbc.qc: column 6 (NRF) > 0.9, column 7 (PBC1) > 0.9, and column 8 (PBC2) >3.
4. experimentQC/coverage.pdf, experimentQC/heatmeap_SpearmanCorr.pdf, experimentQC/heatmeap_PearsonCorr.pdf: See [HERE](https://deeptools.readthedocs.io/en/develop/content/list_of_tools.html) for more details.
5. experimentQC/: Common Quality controls for ATAC-seq: FRiP score and TSS enrichment
5. experimentQC/: Common Quality controls for ATAC-seq: FRiP score, TSS enrichment, Fragment/Insert length densities (paired-end only)
## Common Errors
......
......@@ -87,6 +87,9 @@ experimentQC | *.FRiPscore.tsv | File containing FRiP score
experimentQC | *.TSSenrichment.tsv | File containing TSS enrichment
experimentQC | *_large_tss-enrich.pdf | TSS Enrichment heatmap and metagene plot
experimentQC | *_tss-enrich.pdf | TSS Enrichment metagene plot
experimentQC | *.fragment_length_linear.pdf | Paired-end only, fragment/insert size densities, linear
experimentQC | *.fragment_length_linear.pdf | Paired-end only, log10 fragment/insert size densities
experimentQC | *.fragment_length_count.txt | Paired-end only, count and fragment length, raw data
multiqcReport | multiqc_report.html | Quality control report of percent mitochondria, NRF, PBC1, PBC2, NSC, and RSC. Also contains software versions and references to cite.
......@@ -96,7 +99,7 @@ multiqcReport | multiqc_report.html | Quality control report of percent mitochon
2. crossReads/*cc.plot.pdf: make sure your sample data has the correct signal intensity and location. See [HERE](https://ccg.epfl.ch//var/sib_april15/cases/landt12/strand_correlation.html) for more details.
3. filterReads/sample/*.pbc.qc: column 6 (NRF) > 0.9, column 7 (PBC1) > 0.9, and column 8 (PBC2) >3.
4. experimentQC/coverage.pdf, experimentQC/heatmeap_SpearmanCorr.pdf, experimentQC/heatmeap_PearsonCorr.pdf: See [HERE](https://deeptools.readthedocs.io/en/develop/content/list_of_tools.html) for more details.
5. experimentQC/: Common Quality controls for ATAC-seq: FRiP score and TSS enrichment
5. experimentQC/: Common Quality controls for ATAC-seq: FRiP score, TSS enrichment, Fragment/Insert length densities (paired-end only)
## Common Errors
......
......@@ -543,29 +543,53 @@ process experimentQC {
output:
file '*.{pdf,npz}' into deepToolsStats
file '*.fragment_length_count.txt' optional true into fdlStats
file('version_*.txt') into experimentQCVersions
file '*.FRiPscore.tsv' into FRiPscore
file '*.TSSenrichment.tsv' into TSSenrichment
script:
if (params.astrocyte == true) {
"""
module load python/3.6.1-2-anaconda
module load deeptools/2.5.0.1
module load samtools/1.4.1
module load bedtools/2.26.0
python3 ${baseDir}/scripts/experiment_qc.py -d ${designExperimentQC}
bash ${baseDir}/scripts/make_tss.sh ${gtfFile}
module load singularity/2.6.1
singularity run /project/shared/bicf_workflow_ref/singularity_images/metaseq.simg ${baseDir}/scripts/atac_qc.py -d ${designExperimentQC} -l ${fqlengthDesign} -t gencode.tss -c ${chromSizes}
"""
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load deeptools/2.5.0.1
module load samtools/1.4.1
module load bedtools/2.26.0
python3 ${baseDir}/scripts/experiment_qc.py -d ${designExperimentQC} -p
bash ${baseDir}/scripts/make_tss.sh ${gtfFile}
module load singularity/2.6.1
singularity run /project/shared/bicf_workflow_ref/singularity_images/metaseq.simg ${baseDir}/scripts/atac_qc.py -d ${designExperimentQC} -l ${fqlengthDesign} -t gencode.tss -c ${chromSizes}
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load deeptools/2.5.0.1
module load samtools/1.4.1
module load bedtools/2.26.0
python3 ${baseDir}/scripts/experiment_qc.py -d ${designExperimentQC}
bash ${baseDir}/scripts/make_tss.sh ${gtfFile}
module load singularity/2.6.1
singularity run /project/shared/bicf_workflow_ref/singularity_images/metaseq.simg ${baseDir}/scripts/atac_qc.py -d ${designExperimentQC} -l ${fqlengthDesign} -t gencode.tss -c ${chromSizes}
"""
}
}
else {
"""
python3 ${baseDir}/scripts/experiment_qc.py -d ${designExperimentQC}
bash ${baseDir}/scripts/make_tss.sh ${gtfFile}
singularity run /project/shared/bicf_workflow_ref/singularity_images/metaseq.simg ${baseDir}/scripts/atac_qc.py -d ${designExperimentQC} -l ${fqlengthDesign} -t gencode.tss -c ${chromSizes}
"""
if (pairedEnd) {
"""
python3 ${baseDir}/scripts/experiment_qc.py -d ${designExperimentQC} -p
bash ${baseDir}/scripts/make_tss.sh ${gtfFile}
singularity run /project/shared/bicf_workflow_ref/singularity_images/metaseq.simg ${baseDir}/scripts/atac_qc.py -d ${designExperimentQC} -l ${fqlengthDesign} -t gencode.tss -c ${chromSizes}
"""
}
else {
"""
python3 ${baseDir}/scripts/experiment_qc.py -d ${designExperimentQC}
bash ${baseDir}/scripts/make_tss.sh ${gtfFile}
singularity run /project/shared/bicf_workflow_ref/singularity_images/metaseq.simg ${baseDir}/scripts/atac_qc.py -d ${designExperimentQC} -l ${fqlengthDesign} -t gencode.tss -c ${chromSizes}
"""
}
}
}
......
......@@ -11,6 +11,10 @@ import shutil
from multiprocessing import cpu_count
import pandas as pd
import utils
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
EPILOG = '''
......@@ -37,6 +41,11 @@ def get_args():
help="The design file to run QC (tsv format).",
required=True)
parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.",
default=False,
action='store_true')
args = parser.parse_args()
return args
......@@ -195,9 +204,57 @@ def FRiP(sample, design):
f.close()
def FLD(sample, design):
''' Calculate and plot Fragment Length Distribution'''
logger.info("Determining Fragment Length Distribution for PE")
# Assign bam file
bam_file = design.loc[ : , "bamReads"].values[0]
# Assign output
fdl_filename = "%s.fragment_length_count.txt" % (sample)
fdl_fig_lin = "%s.fragment_length_linear.pdf" % (sample)
fdl_fig_log = "%s.fragment_length_log10.pdf" % (sample)
# Calculate Fragment length
steps = [
"samtools view %s" % (bam_file),
"awk '$9>0'",
"cut -f 9",
"sort",
"uniq -c",
"sort -b -k 2,2n",
"sed -e 's/^[ \t]*//'",
]
out, err = utils.run_pipe(steps, fdl_filename)
# Print figures
# linear
result = pd.read_csv(fdl_filename, sep=' ', header=None)
x = result[1]
y = result[0].div(result[0].sum())
plt.title('Fragment Length Distribution')
plt.ylabel('Density')
plt.xlabel('Fragment length (bp)')
plt.plot(x,y)
plt.savefig(fdl_fig_lin)
plt.close()
# log
plt.title('Fragment Length Distribution')
plt.yscale('log')
plt.ylabel('Density (log10)')
plt.xlabel('Fragment length (bp)')
plt.plot(x,y)
plt.savefig(fdl_fig_log)
plt.close()
def main():
args = get_args()
design = args.design
paired = args.paired
# Create a file handler
handler = logging.FileHandler('experiment_qc.log')
......@@ -221,5 +278,10 @@ def main():
for sample, df_sample in design_df.groupby('SampleID'):
FRiP_score = FRiP(sample, df_sample)
# Fragment Length Distribution - PE only
if paired:
for sample, df_sample in design_df.groupby('SampleID'):
FLD_plots = FLD(sample, df_sample)
if __name__ == '__main__':
main()
......@@ -48,6 +48,9 @@ def test_coverage_pairedend_mouse():
assert os.path.exists(os.path.join(test_output_path, 'coverage.pdf'))
assert os.path.exists(os.path.join(test_output_path, 'heatmap_SpearmanCorr.pdf'))
assert os.path.exists(os.path.join(test_output_path, 'heatmap_PearsonCorr.pdf'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB122XDP.fragment_length_count.txt'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB122XDP.fragment_length_linear.pdf'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB122XDP.fragment_length_log10.pdf'))
@pytest.mark.pairedend_mouse
def test_FRiP_pairedend_mouse():
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment