diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config
index 83fac1f1092e641a25214c2af2b61745ed352a83..5bcbc27e1fc72ae7cb15b2f20a7f903b8be4d5d4 100755
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -18,6 +18,9 @@ process {
   withName:trimData {
     queue = 'super'
   }
+  withName:downsampleData {
+    executor = 'local'
+  }
   withName:alignData {
     queue = '256GB,256GBv1'
   }
diff --git a/workflow/nextflow.config b/workflow/nextflow.config
index f0be347ef28c9306abe43974d48464484f48b5c4..eb9ca4d9332b6efddca753663ad9f59855044dab 100644
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -26,6 +26,9 @@ process {
   withName: trimData {
     container = 'bicf/trimgalore:1.1'
   }
+  withName: downsampleData {
+    container = 'bicf/seqtk:2.0.0'
+  }
   withName: alignData {
     container = 'bicf/gudmaprbkaligner:2.0.0'
   }
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index 22852231d9d29bfda93b65fecf43cf77df98c51d..625e3a0ec6d0d36ff542103bf849c2286923c079 100644
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -117,6 +117,7 @@ process getData {
 
 // Replicate raw fastqs for multiple process inputs
 fastqs.into {
+  fastqs_downsampleData
   fastqs_trimData
   fastqs_fastqc
 }
@@ -190,6 +191,7 @@ metadata.splitCsv(sep: ",", header: false).separate(
 )
 // Replicate metadata for multiple process inputs
 endsManual.into {
+  endsManual_downsampleData
   endsManual_trimData
   endsManual_alignData
   endsManual_featureCounts
@@ -284,7 +286,7 @@ process trimData {
     path (fastq) from fastqs_trimData
 
   output:
-    path ("*.fq.gz") into fastqs_trimmed
+    path ("*.fq.gz") into fastqsTrim
     path ("*_trimming_report.txt") into trimQC
     path ("${repRID}.trimData.{out,err}")
 
@@ -306,6 +308,47 @@ process trimData {
     """
 }
 
+// Replicate trimmed fastqs
+fastqsTrim.into {
+  fastqsTrim_downsampleData
+  fastqsTrim_alignData
+}
+
+/*
+ * downsampleData: downsample fastq's for metadata inference
+ */
+process downsampleData {
+  tag "${repRID}"
+  publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.downsampleData.{out,err}"
+
+  input:
+    val endsManual_downsampleData
+    path fastq from fastqsTrim_downsampleData
+
+  output:
+    path ("sampled.{1,2}.fq") into fastqsSample
+    path ("${repRID}.downsampleData.{out,err}")
+
+  script:
+    """
+    hostname > ${repRID}.downsampleData.err
+    ulimit -a >> ${repRID}.downsampleData.err
+    export https_proxy=\${http_proxy}
+
+    if [ "${endsManual_downsampleData}" == "se" ]
+    then
+      echo "LOG: downsampling single-end trimmed fastq" >> ${repRID}.downsampleData.err
+      seqtk sample -s100 *trimmed.fq.gz 10000 1> sampled.1.fq 2>> ${repRID}.downsampleData.err
+    elif [ "${endsManual_downsampleData}" == "pe" ]
+    then
+      echo "LOG: downsampling read 1 of paired-end trimmed fastq" >> ${repRID}.downsampleData.err
+      seqtk sample -s100 *1.fq.gz 1000000 1> sampled.1.fq 2>> ${repRID}.downsampleData.err
+      echo "LOG: downsampling read 2 of paired-end trimmed fastq" >> ${repRID}.downsampleData.err
+      seqtk sample -s100 *2.fq.gz 1000000 1> sampled.2.fq 2>> ${repRID}.downsampleData.err
+    fi
+    """
+}
+
 /*
  * alignData: aligns the reads to a reference database
 */
@@ -316,7 +359,7 @@ process alignData {
   input:
     val endsManual_alignData
     val stranded_alignData
-    path fastq from fastqs_trimmed
+    path fastq from fastqsTrim_alignData
     path reference_alignData
 
   output: