Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
## R script for combine HTSeq outputs into one read count matrix in R
## Written by Ahmed Alhendi
## Usage example: Rscript htseq-combine_all.R <workdir> <Reacount_matrix_output_name>
args <- commandArgs(TRUE)
path <- as.character(args[1])
myoutname <-as.character(args[2])
print(paste0("############################"))
##Read files names
files <- list.files(path=path, pattern="*.txt")
print(sprintf("## Files to be merged are: ##"))
print(files)
print(paste0("############################"))
##Read files names
files <- list.files(path=path, pattern="*.txt")
print(sprintf("## Files to be merged are: ##"))
print(files)
print(paste0("############################"))
# using perl to manpulate file names by trimming file extension
labs <- paste("", gsub("\\.txt", "", files, perl=TRUE), sep="")
##Load all files to list object, use paste to return the trimpping parts to file name
print(sprintf("######### file read START ######### %s", format(Sys.time(),"%b_%d_%Y_%H_%M_%S_%Z")))
cov <- list()
for (i in labs) {
filepath <- file.path(path,paste(i,".txt",sep=""))
cov[[i]] <- read.table(filepath,sep = "\t", header=F, stringsAsFactors=FALSE)
colnames(cov[[i]]) <- c("ENSEMBL_GeneID", i)
}
print(sprintf("######### file read END ######### %s", format(Sys.time(),"%b_%d_%Y_%H_%M_%S_%Z")))
## construct one data frame from list of data.frames using reduce function
print(sprintf("######### merge START ######### %s",
format(Sys.time(),"%b_%d_%Y_%H_%M_%S_%Z")))
df <-Reduce(function(x,y) merge(x = x, y = y, by ="ENSEMBL_GeneID"), cov)
print(sprintf("######### merge END ######### %s", format(Sys.time(),"%b_%d_%Y_%H_%M_%S_%Z")))
print(sprintf("Exported merged table within work directory in txt and Rdata format with file name merged_%s_%s", make.names(format(Sys.time(),"%b_%d_%Y_%H_%M_%S_%Z")), myoutname))
write.table(df,paste(path, myoutname,".txt",sep=""), sep="\t", quote= F, row.names = F)
save.image(paste(path, myoutname,".Rdata",sep=""))
print(sprintf("######### MERGE FUNCTION COMPLETE ######### %s", format(Sys.time(),"%b_%d_%Y_%H_%M_%S_%Z")))