# Removing rRNA-mapping reads
mkdir index
bowtie2-build human_rRNA.fasta index/human_rRNA
bowtie2 -t -p 4 -X 1000 -1 TRIMMED/ERR990413_bbduk2_R1.fastq -2 TRIMMED/ERR990413_bbduk2_R2.fastq -x index/human_rRNA --fast --un-conc ERR990413.fastq > /dev/null

mv ERR990413.1.fastq TRIMMED/ERR990413_clean_R1.fastq
mv ERR990413.2.fastq TRIMMED/ERR990413_clean_R2.fastq


# Downloading chr 22
wget ftp://ftp.ensembl.org/pub/release-88/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.22.fa.gz
gunzip Homo_sapiens.GRCh38.dna.chromosome.22.fa.gz


# Building an index for chr 22
hisat/hisat-build Homo_sapiens.GRCh38.dna.chromosome.22.fa index/chr22


# Preparing a file with known splice sites
wget ftp://ftp.ensembl.org/pub/release-88/gtf/homo_sapiens/Homo_sapiens.GRCh38.88.gtf.gz
gunzip Homo_sapiens.GRCh38.88.gtf.gz
python hisat/extract_splice_sites.py Homo_sapiens.GRCh38.88.gtf > human_splice_sites.txt


#mapping
hisat/hisat -q -p 4 -X 1000 --time --met-file  ERR990413.met_file --phred33 --rna-strandness RF --known-splicesite-infile human_splice_sites.txt --novel-splicesite-outfile novel.splice_sites.txt -x index/chr22 -1 TRIMMED/ERR990413_clean_R1.fastq -2 TRIMMED/ERR990413_clean_R2.fastq -S ERR990413.sam > STATUS/ERR990413_hisat.txt


# Converting SAM to BAM
samtools view -bS ERR990413.sam > ERR990413.bam
rm ERR990413.sam


# Correcting the names of paired reads (optional)
samtools fixmate -O bam ERR990413.bam ERR990413_fixmate.bam
rm ERR990413.bam


# Sorting and indexing BAM files
samtools sort ERR990413_fixmate.bam -o ERR990413.sorted.bam
samtools index ERR990413.sorted.bam


# All mappings to chr 22
samtools view ERR990413.sorted.bam 22


# All mappings from the given region
samtools view ERR990413.sorted.bam 22:16614517-17614517


# Only display the numer of mappings (option -c)
samtools view -c ERR990413.sorted.bam 22:16614517-17614517


# Only return mappings from the provided sets of genomic coordinates in a BED file (option -L FILE)
samtools view -c -L RefSeq_chr22.bed  ERR990413.sorted.bam 22


# visualizing the mappings
samtools view -b -h ERR990413.sorted.bam 22:16614517-17614517 > ERR990413.fragment.bam
samtools index ERR990413.fragment.bam  # ----> ERR990413.fragment.bam.bai


# quality control for the BAM file
mkdir QUALIMAP
mkdir QUALIMAP/ERR990413/
qualimap bamqc -bam ERR990413.sorted.bam -nt 2 --java-mem-size=2G -outdir QUALIMAP/ERR990413




-----------------  Mapping smallRNA-Seq data  -----------------




# building the genome index
bowtie-build Homo_sapiens.GRCh38.dna.chromosome.22.fa index/chr22


# mapping
bowtie -t -p 2 -v 2 -a -S -q index/chr22 SRR1586016_QC.fastq SRR1586016.sam


# converting SAM to BAM
samtools view -bS SRR1586016.sam > SRR1586016.bam
rm SRR1586016.sam


# sorting the BAM file
samtools sort SRR1586016.bam -o SRR1586016.sorted.bam


# quality control for the BAM file
mkdir QUALIMAP/SRR1586016/
qualimap bamqc -bam SRR1586016.sorted.bam -nt 2 --java-mem-size=2G -outdir QUALIMAP/SRR1586016/




