From 0022645d04de5d0fc9628b5f4fecf07a6efb9a48 Mon Sep 17 00:00:00 2001 From: Bhanu Priya <bhanupriya151@gmail.com> Date: Mon, 15 Aug 2022 14:31:47 +0530 Subject: [PATCH] Add pipeline 2 - DGE analysis --- .../shodhka/test_20220812/data/SampleGroups.txt | 9 +++++++++ .../test_20220812/data/SampleInputFileLinks.txt | 12 ++++++++++++ vendor/shodhka/test_20220812/dge_analysis.r | 16 ++++++++++++++++ vendor/shodhka/test_20220812/run.copr.sh | 13 +++++++++++++ 4 files changed, 50 insertions(+) create mode 100644 vendor/shodhka/test_20220812/data/SampleGroups.txt create mode 100644 vendor/shodhka/test_20220812/data/SampleInputFileLinks.txt create mode 100755 vendor/shodhka/test_20220812/dge_analysis.r create mode 100755 vendor/shodhka/test_20220812/run.copr.sh diff --git a/vendor/shodhka/test_20220812/data/SampleGroups.txt b/vendor/shodhka/test_20220812/data/SampleGroups.txt new file mode 100644 index 0000000..69f5d8a --- /dev/null +++ b/vendor/shodhka/test_20220812/data/SampleGroups.txt @@ -0,0 +1,9 @@ +Sample Condition +SRR15322680 Metastatic +SRR15322681 Metastatic +SRR15322682 Metastatic +SRR15322683 Primary +SRR15322684 Primary +SRR15322685 Primary + + diff --git a/vendor/shodhka/test_20220812/data/SampleInputFileLinks.txt b/vendor/shodhka/test_20220812/data/SampleInputFileLinks.txt new file mode 100644 index 0000000..a938b0b --- /dev/null +++ b/vendor/shodhka/test_20220812/data/SampleInputFileLinks.txt @@ -0,0 +1,12 @@ +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/080/SRR15322680/SRR15322680_1.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/080/SRR15322680/SRR15322680_2.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/081/SRR15322681/SRR15322681_1.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/081/SRR15322681/SRR15322681_2.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/082/SRR15322682/SRR15322682_1.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/082/SRR15322682/SRR15322682_2.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/083/SRR15322683/SRR15322683_1.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/083/SRR15322683/SRR15322683_2.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/084/SRR15322684/SRR15322684_1.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/084/SRR15322684/SRR15322684_2.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/085/SRR15322685/SRR15322685_1.fastq.gz +ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR153/085/SRR15322685/SRR15322685_2.fastq.gz diff --git a/vendor/shodhka/test_20220812/dge_analysis.r b/vendor/shodhka/test_20220812/dge_analysis.r new file mode 100755 index 0000000..a02f605 --- /dev/null +++ b/vendor/shodhka/test_20220812/dge_analysis.r @@ -0,0 +1,16 @@ +#!/usr/bin/env Rscript + +library(edgeR) +RawCounts <- read.delim("count_matrix.txt", row.names = "gene_id") +group <- read.table("SampleGroups.txt", header=TRUE, sep="\t", row.names=1) +dgecomplete <- DGEList(RawCounts, group = group$Condition) +logcpm <- cpm(dgecomplete, log=TRUE) +filtData <- filterByExpr(dgecomplete) +dgecomplete <- dgecomplete[filtData, keep.lib.sizes=FALSE] +dgecomplete <- calcNormFactors(dgecomplete) +dgecomplete <- estimateDisp(y = dgecomplete) +fit <- glmQLFit(y=dgecomplete) +qlf <- glmQLFTest(fit, coef = 2) +diff_results <- topTags(qlf, n=Inf) +write.csv(diff_results, file="edgeR_diff_genes.csv") +write.csv(as.data.frame(logcpm), file="edgeR_normcounts.csv") diff --git a/vendor/shodhka/test_20220812/run.copr.sh b/vendor/shodhka/test_20220812/run.copr.sh new file mode 100755 index 0000000..1e87706 --- /dev/null +++ b/vendor/shodhka/test_20220812/run.copr.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +FILES=$(cat ./data/SampleInputFileLinks.txt) +for file in $FILES; do + # --no-clobber, do not download if file already exists. + wget -nc $file +done + +hisat2-build -p 8 genome.fa hg38_hisat +hisat2 –x index -1 sample1_trim_R1.fq -2 sample1_trim_R2.fq –U unpair.fq –S sample1.sam +htseq-count –r sample1.sam sample2.sam samplen.sam hs.gff > count_matrix.txt + +Rscript dge_analysis.r -- GitLab