Skip to content
Snippets Groups Projects
Commit 2f239170 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Update datahub ref download

parent accde9b7
Branches
Tags
2 merge requests!58Develop,!45Resolve "Move references to GUDMAP/RBK"
Pipeline #8129 passed with stages
in 1 minute and 57 seconds
params { params {
refSource = "biohpc refSource = "biohpc"
} }
process { process {
......
...@@ -53,8 +53,8 @@ if (params.source == "dev") { ...@@ -53,8 +53,8 @@ if (params.source == "dev") {
} }
if (params.refSource == "biohpc") { if (params.refSource == "biohpc") {
referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references" referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references"
} else if (params.refSource == "aws") { //} else if (params.refSource == "aws") {
referenceBase = "s3://bicf-references" // referenceBase = "s3://bicf-references"
} else if (params.refSource == "datahub") { } else if (params.refSource == "datahub") {
referenceBase = "dev.gudmap.org" referenceBase = "dev.gudmap.org"
} }
...@@ -403,30 +403,35 @@ process getRefInfer { ...@@ -403,30 +403,35 @@ process getRefInfer {
# retreive appropriate reference appropriate location # retreive appropriate reference appropriate location
echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log
if [ ${referenceBase} == "s3://bicf-references" ] if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ]
then
aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive
aws s3 cp "\${references}"/bed ./${refName}/bed --recursive
aws s3 cp "\${references}"/genome.fna ./
aws s3 cp "\${references}"/genome.gtf ./
elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ]
then then
ln -s "\${references}"/hisat2 ln -s "\${references}"/hisat2
ln -s "\${references}"/bed ${refName}/bed ln -s "\${references}"/bed ${refName}/bed
ln -s "\${references}"/genome.fna ln -s "\${references}"/genome.fna
ln -s "\${references}"/genome.gtf ln -s "\${references}"/genome.gtf
#elif [ ${referenceBase} == "s3://bicf-references" ]
#then
# aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive
# aws s3 cp "\${references}"/bed ./${refName}/bed --recursive
# aws s3 cp "\${references}"/genome.fna ./
# aws s3 cp "\${references}"/genome.gtf ./
elif [ ${referenceBase} == "dev.gudmap.org" ] elif [ ${referenceBase} == "dev.gudmap.org" ]
then then
GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1) GRCv=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f1)
GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2) GRCp=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f2)
GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3) GENCODE=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f3)
query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}) query=\$(echo 'https://'"${referenceBase}"'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE})
curl --request GET ${query} > refQuery.json curl --request GET \${query} > refQuery.json
refURL=$(python ./workflow/scripts/extractRefData.py --returnParam URL) refURL=\$(python ./workflow/scripts/extractRefData.py --returnParam URL)
loc=$(dirname ${refURL}) loc=\$(dirname \${refURL})
if [ "${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi fName=\$(python ./workflow/scripts/extractRefData.py --returnParam fName)
filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)') fName=\${fName%.*}
deriva-hatrac-cli --host ${referenceBase} get ${refURL} if [ "\${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi
filename=\$(echo \$(basename \${refURL}) | grep -oP '.*(?=:)')
deriva-hatrac-cli --host ${referenceBase} get \${refURL}
unzip \$(basename \${refURL})
mkdir -p \${references}
mv \${fName}/* \${references}/
fi fi
echo -e "LOG: fetched" >> ${repRID}.${refName}.getRefInfer.log echo -e "LOG: fetched" >> ${repRID}.${refName}.getRefInfer.log
...@@ -724,36 +729,41 @@ process getRef { ...@@ -724,36 +729,41 @@ process getRef {
# retreive appropriate reference appropriate location # retreive appropriate reference appropriate location
echo -e "LOG: fetching ${species} reference files from ${referenceBase}" >> ${repRID}.getRef.log echo -e "LOG: fetching ${species} reference files from ${referenceBase}" >> ${repRID}.getRef.log
if [ ${referenceBase} == "s3://bicf-references" ] if [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ]
then
echo -e "LOG: grabbing reference files from S3" >> ${repRID}.getRef.log
aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive
aws s3 cp "\${references}"/bed ./bed --recursive
aws s3 cp "\${references}"/genome.fna ./
aws s3 cp "\${references}"/genome.gtf ./
aws s3 cp "\${references}"/geneID.tsv ./
aws s3 cp "\${references}"/Entrez.tsv ./
elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ]
then then
echo -e "LOG: grabbing reference files from local (BioHPC)" >> ${repRID}.getRef.log
ln -s "\${references}"/hisat2 ln -s "\${references}"/hisat2
ln -s "\${references}"/bed ln -s "\${references}"/bed
ln -s "\${references}"/genome.fna ln -s "\${references}"/genome.fna
ln -s "\${references}"/genome.gtf ln -s "\${references}"/genome.gtf
ln -s "\${references}"/geneID.tsv ln -s "\${references}"/geneID.tsv
ln -s "\${references}"/Entrez.tsv ln -s "\${references}"/Entrez.tsv
#elif [ ${referenceBase} == "s3://bicf-references" ]
#then
# echo -e "LOG: grabbing reference files from S3" >> ${repRID}.getRef.log
# aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive
# aws s3 cp "\${references}"/bed ./bed --recursive
# aws s3 cp "\${references}"/genome.fna ./
# aws s3 cp "\${references}"/genome.gtf ./
# aws s3 cp "\${references}"/geneID.tsv ./
# aws s3 cp "\${references}"/Entrez.tsv ./
elif [ ${referenceBase} == "dev.gudmap.org" ] elif [ ${referenceBase} == "dev.gudmap.org" ]
then then
GRCv=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f1) GRCv=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f1)
GRCp=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f2) GRCp=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f2)
GENCODE=$(echo ${references} | grep -o ${refName}.* | cut -d '.' -f3) GENCODE=\$(echo \${references} | grep -o ${refName}.* | cut -d '.' -f3)
query=$(echo 'https://'${referenceBase}'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='${GRCv}'.'${GRCp}'/Annotation_Version=GENCODE%20'${GENCODE}) query=\$(echo 'https://'"${referenceBase}"'/ermrest/catalog/2/entity/RNASeq:Reference_Genome/Reference_Version='\${GRCv}'.'\${GRCp}'/Annotation_Version=GENCODE%20'\${GENCODE})
curl --request GET ${query} > refQuery.json curl --request GET \${query} > refQuery.json
refURL=$(python ./workflow/scripts/extractRefData.py --returnParam URL) refURL=\$(python ./workflow/scripts/extractRefData.py --returnParam URL)
loc=$(dirname ${refURL}) loc=\$(dirname \${refURL})
if [ "${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi fName=\$(python ./workflow/scripts/extractRefData.py --returnParam fName)
filename=$(echo $(basename ${refURL}) | grep -oP '.*(?=:)') fName=\${fName%.*}
deriva-hatrac-cli --host ${referenceBase} get ${refURL} if [ "\${loc}" = "/hatrac/*" ]; then echo "LOG: Reference not present in hatrac"; exit 1; fi
fi filename=\$(echo \$(basename \${refURL}) | grep -oP '.*(?=:)')
deriva-hatrac-cli --host ${referenceBase} get \${refURL}
unzip \$(basename \${refURL})
mkdir -p \${references}
mv \${fName}/* \${references}/
fi fi
echo -e "LOG: fetched" >> ${repRID}.getRef.log echo -e "LOG: fetched" >> ${repRID}.getRef.log
""" """
......
...@@ -17,7 +17,9 @@ def main(): ...@@ -17,7 +17,9 @@ def main():
if refQuery["File_URL"].count() == 1: if refQuery["File_URL"].count() == 1:
if args.returnParam == "URL": if args.returnParam == "URL":
print(refQuery["File_URL"].values[0]) print(refQuery["File_URL"].values[0])
elif args.returnParam == "": elif args.returnParam == "fName":
print(refQuery["File_Name"].values[0])
elif args.returnParam == "MD5":
print(refQuery["File_MD5"].values[0]) print(refQuery["File_MD5"].values[0])
else: else:
raise Exception("Multple references found: \n%s" % raise Exception("Multple references found: \n%s" %
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment