
# preparing input data
python prepare_for_deseq_salmon.py expression_samples.txt control case SALMON_OUT ensembl_data_human.txt case_vs_control.txt



------------  DESeq 2  ------------



library(DESeq2)
library(gplots)

counts <- read.delim('case_vs_control.txt', sep="\t", header=T, row.names=1)
counts <- as.matrix(counts)
design <- data.frame( condition=factor( c("control", "control", "control",  "test", "test", "test") ) )
rownames(design) <- colnames(counts)
dataset <- DESeqDataSetFromMatrix(countData = counts, colData = design, design = ~condition)
dataset <- DESeq(dataset)
de_results <- results(dataset)
new_columns <- data.frame(GeneID=rownames(de_results))
de_results <- cbind( new_columns, de_results)

de_results <- de_results[ de_results$padj < 0.05 & complete.cases(de_results$padj), ]
de_results <- de_results[order(de_results$padj),]

write.table(de_results, file='deseq2.tsv', sep="\t", quote=F, row.names=F)

# MA plot

pdf("case_vs_control_MA plot.pdf")
plotMA(dataset, main="MA plot", ylim=c(-2, 2))
dev.off()

# heatmap

normalized_expression <- counts(dataset, normalized=T)
de_genes <- results(dataset)

genes <- normalized_expression[de_genes$padj < 0.05 & complete.cases(de_genes$padj),]
pdf("case_vs_control_heatmap.pdf")
heatmap.2( as.matrix(genes), labRow=F, col=redgreen(100), scale="row", dendrogram="column", trace="none", cexCol=1.2, hclust=function(x) hclust(x,method="centroid"), distfun=function(x) as.dist(1-cor(t(x))) )
dev.off()

# volcano plot

pdf("case_vs_control_VolcanoPlot.pdf")
plot(main = "case vs control", de_genes$log2FoldChange,-log10(de_genes$padj),pch=19,cex=0.5,xlab="Log2FoldChange",ylab="-log10(Adjusted P-value)",col=ifelse(de_genes$padj<0.05,"red","black"))
dev.off()


# annotating the results
python deseq2_annotate.py deseq2.tsv ensembl_data_human.txt





-------------  DESeq  -------------



library(DESeq)

counts <- read.delim("case_vs_control.txt", row.names="gene")

design <- factor( c("control", "control", "control", "stress", "stress", "stress") )

dataset <- newCountDataSet( counts, design )

dataset <- estimateSizeFactors( dataset )

dataset <- estimateDispersions( dataset )

de_results <- nbinomTest( dataset, "control", "stress" )

colnames(de_results)[1] <- "gene"

de_results <- de_results[ de_results$padj < 0.05 & complete.cases(de_results$padj), ]

de_results <- de_results[ order(de_results$padj, decreasing=F),]

write.table(de_results, file="deseq.tsv", sep="\t", quote=F, row.names=F)




------------------  edgeR  -------------------


library(edgeR)

counts <- read.delim("case_vs_control.txt", row.names="gene")

design <- factor( c("control", "control", "control", "stress", "stress", "stress") )

y <- DGEList(counts=counts, group=design)

y <- calcNormFactors(y)

y <- estimateCommonDisp(y)

y <- estimateTagwiseDisp(y)

de_results <- exactTest(y, pair=c("control", "stress") )

de_results <- topTags(de_results, n=Inf)

de_results <- de_results$table

de_results <- de_results[ de_results$FDR < 0.05 & complete.cases(de_results$FDR), ]

de_results <- de_results[ order(de_results$FDR, decreasing=F),]

de_results <- cbind( data.frame(Geneid=rownames(de_results), Fold_change=2**de_results$logFC), de_results)

write.table(de_results, file="edger.tsv", sep="\t", quote=F, row.names=F)





----------------  Venn diagrams  -----------------



# loading the library
library(VennDiagram)

# Reading results of DE analysis
deseq2 <- read.delim( "deseq2.tsv", sep="\t", header=T )

deseq <- read.delim( "deseq.tsv", sep="\t", header=T )

edger <- read.delim( "edger.tsv", sep="\t", header=T )

# Drawing a Venn diagram
pdf("VennDiagram.pdf")

venn.plot <- venn.diagram( list( DESeq2 = deseq2$GeneID, DESeq = deseq$gene, edgeR = edger$Geneid), fill = c("red", "green", "blue"), alpha = c(0.5, 0.5, 0.5), height = 2000, width = 2000, filename = NULL, euler.d=F, scaled=F);

grid.draw(venn.plot);

dev.off()



