﻿-------------------  RNA-Seq --------------------


# Downloading RNA-Seq data (already done)
# wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR990/ERR990413/ERR990413_1.fastq.gz
# wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR990/ERR990413/ERR990413_2.fastq.gz


# Decompressing the data and saving only 2,500,000 first records (already done)
# gunzip -c ERR990413_1.fastq.gz | head -10000000 > ERR990413_1.fastq
# gunzip -c ERR990413_2.fastq.gz | head -10000000 > ERR990413_2.fastq

The ERR990413_1.fastq and ERR990413_2.fastq files have been compressed, therefore we have to decompress them:
gunzip ERR990413_1.fastq.gz
gunzip ERR990413_2.fastq.gz


# quality control: before filtering
mkdir FASTQC_out
mkdir FASTQC_out/ERR990413_raw
fastqc ERR990413_1.fastq ERR990413_2.fastq --quiet --noextract --nogroup --outdir FASTQC_out/ERR990413_raw


# data filtering: Trimmomatic
mkdir TRIMMED
mkdir STATUS
java -Xms4g -Xmx4g -jar trimmomatic/trimmomatic.jar PE -threads 1 -phred33 ERR990413_1.fastq ERR990413_2.fastq TRIMMED/ERR990413_trimmomatic_R1_trimmed.fastq /dev/null TRIMMED/ERR990413_trimmomatic_R2_trimmed.fastq /dev/null  ILLUMINACLIP:trimmomatic/adapters/TruSeq3-PE.fa:2:30:10 LEADING:20 TRAILING:20 SLIDINGWINDOW:5:20 MINLEN:50 &> STATUS/ERR990413_trimmomatic.log


# data filtering: BBDUK2
bbmap/bbduk2.sh -Xmx2g threads=2 in=ERR990413_1.fastq in2=ERR990413_2.fastq out=TRIMMED/ERR990413_bbduk2_R1.fastq out2=TRIMMED/ERR990413_bbduk2_R2.fastq qtrim=w trimq=20 maq=10 rref=bbmap/resources/adapters.fa k=23 mink=11 hdist=1 tbo tpe minlength=50 removeifeitherbad=t overwrite=t stats=STATUS/ERR990413.bbduk2_stats.txt 2> STATUS/ERR990413.bbduk2_trimming.txt


# quality control: after filtering
# Trimmomatic
mkdir FASTQC_out/ERR990413_filtered_trimmomatic
fastqc TRIMMED/ERR990413_trimmomatic_R1_trimmed.fastq TRIMMED/ERR990413_trimmomatic_R2_trimmed.fastq --quiet --noextract --nogroup --outdir FASTQC_out/ERR990413_filtered_trimmomatic

# bbduk2
mkdir FASTQC_out/ERR990413_filtered_bbduk2
fastqc TRIMMED/ERR990413_bbduk2_R1.fastq TRIMMED/ERR990413_bbduk2_R2.fastq --quiet --noextract --nogroup --outdir FASTQC_out/ERR990413_filtered_bbduk2




-------------------  smallRNA-Seq --------------------



# data download
wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR158/006/SRR1586016/SRR1586016.fastq.gz
gunzip SRR1586016.fastq.gz


# quality control: before filtering
mkdir FASTQC_out/SRR1586016_raw
fastqc SRR1586016.fastq --quiet --noextract --nogroup --outdir FASTQC_out/SRR1586016_raw


# adapter clipping
fastx_clipper -Q33 -v -l 17 -a TGGAATTCTCGGGTGCCAAGG -c -i SRR1586016.fastq -o SRR1586016_clipped.fastq


# quality filtering
fastq_quality_filter -Q33 -v -q 20 -p 95 -i SRR1586016_clipped.fastq -o SRR1586016_QC.fastq


# QC pipeline
fastx_clipper -Q33 -v -l 17 -a TGGAATTCTCGGGTGCCAAGG -c -i SRR1586016.fastq | fastq_quality_filter -Q33 -v -q 20 -p 95 -o SRR1586016_QC.fastq


# FASTA conversion
fastq_to_fasta -i SRR1586016.fastq -o SRR1586016.fasta -v


# read trimming
fastx_trimmer  -i SRR1586016.fastq -o SRR1586016.trimmer.fastq -f 5 -l 25 -v


# sequence collapsing
fastx_collapser -i SRR1586016.fastq -o SRR1586016_collapsed.fasta -v





