diff --git a/data/non-curated/SampleInputFileLinks.txt b/data/non-curated/SampleInputFileLinks.txt deleted file mode 100644 index 0c16ca5d34275adeeaa67a21707cff39ed5d07ad..0000000000000000000000000000000000000000 --- a/data/non-curated/SampleInputFileLinks.txt +++ /dev/null @@ -1,12 +0,0 @@ -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/096/SRR11862696/SRR11862696_1.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/096/SRR11862696/SRR11862696_2.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/097/SRR11862697/SRR11862697_1.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/097/SRR11862697/SRR11862697_2.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/099/SRR11862699/SRR11862699_1.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/099/SRR11862699/SRR11862699_2.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/082/SRR11862682/SRR11862682_1.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/082/SRR11862682/SRR11862682_2.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/091/SRR11862691/SRR11862691_1.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/091/SRR11862691/SRR11862691_2.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/092/SRR11862692/SRR11862692_1.fastq.gz -ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR118/092/SRR11862692/SRR11862692_2.fastq.gz \ No newline at end of file diff --git a/data/non-curated/TrainingDataSet_Process_Codes.xlsx b/data/non-curated/TrainingDataSet_Process_Codes.xlsx deleted file mode 100644 index 9274d9783e06864b6f3e6a3af5613f3def54ba61..0000000000000000000000000000000000000000 Binary files a/data/non-curated/TrainingDataSet_Process_Codes.xlsx and /dev/null differ diff --git a/vendor/shodhka/test_20220727/README.md b/vendor/shodhka/test_20220727/README.md index 619539f0b1191ab60c98c637c1fa44a8458dce38..beb34db18b95c442101985f2cdc07486d387eafc 100644 --- a/vendor/shodhka/test_20220727/README.md +++ b/vendor/shodhka/test_20220727/README.md @@ -1,5 +1,10 @@ -a) Sample raw data: enclosed is a text file with dataset download links -b) file types for inputs and outputs at each step (particularly for FASTQC and TrimGalore (first two steps) operations): enclosed as a table below (also enclosed the corresponding excel file): -S.No. Analysis stage Tool used Input file type Code Output data type -1 Quality check FastQC fq / fastq ./fastqc *fastq.gz text, html QC reports -2 Quality trimming TrimGalore fq / fastq trim_galore --gzip --fastqc --max_n 2 --paired --length 50 SRR11862696_1.fastq.gz SRR11862696_2.fastq.gz fq; fastq +1. Sample raw data: enclosed is a text file with dataset download links. +2. File types for inputs and outputs at each step (particularly for FASTQC and +TrimGalore (first two steps) operations) + +Enclosed as a table below (also enclosed the corresponding excel file): + +S.No. Analysis stage Tool used Input file type Code Output data type 1 Quality check FastQC fq / fastq ./fastqc +*fastq.gz text, html QC reports 2 Quality trimming TrimGalore fq / fastq +trim_galore --gzip --fastqc --max_n 2 --paired --length 50 +SRR11862696_1.fastq.gz SRR11862696_2.fastq.gz fq; fastq diff --git a/vendor/shodhka/test_20220727/run.copr.sh b/vendor/shodhka/test_20220727/run.copr.sh new file mode 100755 index 0000000000000000000000000000000000000000..c265cdf84e911c1ffd5dc92198aca116c179501c --- /dev/null +++ b/vendor/shodhka/test_20220727/run.copr.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +FILES=$(cat ./data/SampleInputFileLinks.txt) +for file in $FILES; do + # --no-clobber, do not download if file already exists. + wget -nc $file +done + +fastqc *fastq.gz +trim_galore --gzip --fastqc --max_n 2 --paired --length 50 \ + SRR11862696_1.fastq.gz SRR11862696_2.fastq.gz