Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
chipseq_analysis
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Requirements
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Astrocyte
Workflows
BICF
chipseq_analysis
Commits
cec86acd
Commit
cec86acd
authored
6 years ago
by
Venkat Malladi
Browse files
Options
Downloads
Patches
Plain Diff
Change fastq's to be new sample names as defined by the sample_id.
parent
78b410c7
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
workflow/main.nf
+2
-2
2 additions, 2 deletions
workflow/main.nf
workflow/scripts/trim_reads.py
+36
-1
36 additions, 1 deletion
workflow/scripts/trim_reads.py
workflow/tests/test_trim_reads.py
+4
-4
4 additions, 4 deletions
workflow/tests/test_trim_reads.py
with
42 additions
and
7 deletions
workflow/main.nf
+
2
−
2
View file @
cec86acd
...
...
@@ -104,12 +104,12 @@ process trimReads {
if (pairedEnd) {
"""
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -p
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]}
-s $sampleId
-p
"""
}
else {
"""
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]}
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]}
-s $sampleId
"""
}
...
...
This diff is collapsed.
Click to expand it.
workflow/scripts/trim_reads.py
+
36
−
1
View file @
cec86acd
...
...
@@ -5,6 +5,7 @@
import
subprocess
import
argparse
import
shutil
import
os
import
logging
EPILOG
=
'''
...
...
@@ -32,6 +33,10 @@ def get_args():
nargs
=
'
+
'
,
required
=
True
)
parser
.
add_argument
(
'
-s
'
,
'
--sample
'
,
help
=
"
The name of the sample.
"
,
required
=
True
)
parser
.
add_argument
(
'
-p
'
,
'
--paired
'
,
help
=
"
True/False if paired-end or single end.
"
,
default
=
False
,
...
...
@@ -61,6 +66,32 @@ def check_tools():
raise
Exception
(
'
Missing cutadapt
'
)
def
rename_reads
(
fastq
,
sample
,
paired
):
'''
Rename fastq files by sample name.
'''
# Get current directory to build paths
cwd
=
os
.
getcwd
()
renamed_fastq
=
[]
if
paired
:
# paired-end data
# Set file names
renamed_fastq
[
0
]
=
cwd
+
'
/
'
+
sample
+
'
_R1.fastq.gz
'
renamed_fastq
[
1
]
=
cwd
+
'
/
'
+
sample
+
'
_R2.fastq.gz
'
# Great symbolic links
os
.
symlink
(
fastq
[
0
],
renamed_fastq
[
0
])
os
.
symlink
(
fastq
[
1
],
renamed_fastq
[
1
])
else
:
# Set file names
renamed_fastq
[
0
]
=
cwd
+
'
/
'
+
sample
+
'
_R1.fastq.gz
'
# Great symbolic links
os
.
symlink
(
fastq
[
0
],
renamed_fastq
[
0
])
return
fastq_rename
def
trim_reads
(
fastq
,
paired
):
'''
Run trim_galore on 1 or 2 files.
'''
...
...
@@ -82,6 +113,7 @@ def trim_reads(fastq, paired):
def
main
():
args
=
get_args
()
fastq
=
args
.
fastq
sample
=
args
.
sample
paired
=
args
.
paired
# Create a file handler
...
...
@@ -91,8 +123,11 @@ def main():
# Check if tools are present
check_tools
()
# Rename fastq files by sample
fastq_rename
=
rename_reads
(
fastq
,
sample
paired
)
# Run trim_reads
trim_reads
(
fastq
,
paired
)
trim_reads
(
fastq
_rename
,
paired
)
if
__name__
==
'
__main__
'
:
...
...
This diff is collapsed.
Click to expand it.
workflow/tests/test_trim_reads.py
+
4
−
4
View file @
cec86acd
...
...
@@ -13,9 +13,9 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend
def
test_trim_reads_singleend
():
raw_fastq
=
test_data_path
+
'
ENCFF833BLU.fastq.gz
'
trimmed_fastq
=
test_output_path
+
'
ENC
FF833BLU
_trimmed.fq.gz
'
trimmed_fastq
=
test_output_path
+
'
ENC
LB144FDT
_trimmed.fq.gz
'
trimmed_fastq_report
=
test_output_path
+
\
'
ENC
FF833BLU
.fastq.gz_trimming_report.txt
'
'
ENC
LB144FDT
.fastq.gz_trimming_report.txt
'
assert
os
.
path
.
getsize
(
raw_fastq
)
!=
os
.
path
.
getsize
(
trimmed_fastq
)
assert
os
.
path
.
getsize
(
trimmed_fastq
)
==
2512853101
assert
'
Trimming mode: single-end
'
in
open
(
trimmed_fastq_report
).
readlines
()[
4
]
...
...
@@ -24,9 +24,9 @@ def test_trim_reads_singleend():
@pytest.mark.pairedend
def
test_trim_reads_pairedend
():
raw_fastq
=
test_data_path
+
'
ENCFF582IOZ.fastq.gz
'
trimmed_fastq
=
test_output_path
+
'
ENC
FF582IOZ
_val_2.fq.gz
'
trimmed_fastq
=
test_output_path
+
'
ENC
LB637LZP
_val_2.fq.gz
'
trimmed_fastq_report
=
test_output_path
+
\
'
ENC
FF582IOZ
.fastq.gz_trimming_report.txt
'
'
ENC
LB637LZP
.fastq.gz_trimming_report.txt
'
assert
os
.
path
.
getsize
(
raw_fastq
)
!=
os
.
path
.
getsize
(
trimmed_fastq
)
assert
os
.
path
.
getsize
(
trimmed_fastq
)
==
2229312710
assert
'
Trimming mode: paired-end
'
in
open
(
trimmed_fastq_report
).
readlines
()[
4
]
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment