################################################################
## Read mapping and quality control
##
## Replication origins from nascent strands.
## Dataset from Besnard et al.

include ${RSAT}/makefiles/util.mk
MAKEFILE=scripts/makefiles/01_read_mapping.mk

## GEO Sample identifier
GSM=GSM927236

## Data location
SRX_ID=SRX145992
SRA_DIR=data/from_GEO/${SRX_ID}
SRA_ID=SRR494093
SRA_IDS=SRR494093 SRR494094 SRR494095

################################################################
## List IDs of the SRA files
list_ids:
	@echo
	@echo "SRA_ID	${SRA_ID}"
	@echo "SRA_IDS	${SRA_IDS}"

################################################################
## Iterate a task over read files
TASK=fastq_dump
iterate_reads:
	@echo "Iterating over SRA IDs	${SRA_IDS}"
	@for r in ${SRA_IDS}; do \
		${MAKE} ${TASK} SRA_ID=$${r} ; \
	done

################################################################
## Convert sra to fastqc
READS=${SRA_DIR}/${SRA_ID}
FASTQ_DUMP=fastq-dump.2.1.12
FASTQ_DUMP_CMD=${FASTQ_DUMP} -A ${SRA_ID} --outdir ${SRA_DIR} ${READS}.sra
fastq_dump:
	@echo
	@echo "${DATE}	Convertig SRA to FASTQ with ${FASTQ_DUMP}"
	@echo "	${READS}.sra"
	@${MAKE} my_command MY_COMMAND='${FASTQ_DUMP_CMD}'
	@echo
	@echo "${DATE}	Converted SRA to FASTQ"
	@echo "	${READS}.fastq"

################################################################
## Control read quality
FASTQC_DIR=${SRA_DIR}/${SRA_ID}_fastqc
FASTQC_CMD=fastqc --outdir ${FASTQC_DIR} --format fastq ${READS}.fastq
fastqc:
	@echo
	@echo "${DATE}	Estimating read quality with fastqc"
	@mkdir -p ${FASTQC_DIR}
	@${MAKE} my_command MY_COMMAND='${FASTQC_CMD}'
	@echo "${DATE}	fastqc results directory"
	@echo "	${FASTQC_DIR}"

################################################################
## Run bowtie2 to map the read onto the reference genome
#GENOME=hg18
GENOME=hg19
ORGANISM=Homo_sapiens
##BOWTIE_INDEX_PREFIX=/jolidisk/genomes/${ORGANISM}/UCSC/${GENOME}/Sequence/Bowtie2Index/${GENOME}
BOWTIE_INDEX_PREFIX=/jolidisk/genomes/${ORGANISM}/UCSC/${GENOME}/Sequence/Bowtie2Index/genome
##BOWTIE_INDEX_PREFIX=/jolidisk/genomes/${ORGANISM}/UCSC/${GENOME}/Sequence/WholeGenomeFasta/genome.fa
ALIGNED_READS=${READS}_${GENOME}
BOWTIE2_CMD=bowtie2 --end-to-end --sensitive -p 1 -t -x ${BOWTIE_INDEX_PREFIX} -q -U ${READS}.fastq -S ${ALIGNED_READS}.sam; \
	 ${SAM2BAM_CMD}; ${SORT_CMD} ; ${BAM2BED_CMD}
map_reads:
	@echo
	@echo "${DATE}	Read mapping with bowtie2"
	@echo
	@echo "Raw reads"
	@echo "	${READS}.fastq"
	@${MAKE} my_command MY_COMMAND='${BOWTIE2_CMD}'
	@echo "${DATE}	Mapped reads"
	@echo "	${ALIGNED_READS}.sam"
	@echo "	${ALIGNED_READS}.bam"

SAM2BAM_CMD=samtools view -b -S ${ALIGNED_READS}.sam > ${ALIGNED_READS}.bam
sam_to_bam:
	@echo
	@echo "${DATE}	Converting SAM to BAM"
	@echo "	${ALIGNED_READS}.sam"
	${SAM2BAM_CMD}
	@echo "${DATE}	Converted SAM to BAM"
	@echo "	${ALIGNED_READS}.bam"

################################################################
## We need sorted reads for further analysis (e.g. bedgraph
## conversion, peak calling with SWEMBL)
SORTED_READS=${READS}_${GENOME}_sorted
SORT_CMD=samtools sort ${ALIGNED_READS}.bam ${SORTED_READS}
sort_reads:
	@echo
	@echo "${DATE}	Sorting reads"
	@echo "	${ALIGNED_READS}.bam"
	@${MAKE} my_command MY_COMMAND='${SORT_CMD}'
	@echo "${DATE}	Sorted reads"
	@echo "	${SORTED_READS}.bam"

################################################################
## Convert sorted reads to bed files
SORTED_BED_DIR=data/reads
SORTED_BED=${SORTED_BED_DIR}/Besnard_${SRA_ID}.bed
BAM2BED_CMD=bamToBed -i ${SORTED_READS}.bam > ${SORTED_BED}
sorted_bam2bed:
	@echo
	@echo "${DATE}	Converting sorted reads (bam to bed)"
	@echo "	${SORTED_READS}.bam"
	@${MAKE} my_command MY_COMMAND='${BAM2BED_CMD}'
	@echo "${DATE}	Converted sorted reads (bam to bed)"
	@echo "	${SORTED_BED}"


################################################################
## Create BedGraph coverage file
bam2bedgraph:
	genomeCoverageBed -bg -ibam ${SAMPLE}_sorted.bam -g chromsizes.txt > sample.bedgraph
