#!/usr/bin/perl -w
use strict;
use lib "/jolidisk/software/src/ensembl/v65/ensembl/modules";
use lib "/jolidisk/software/src/ensembl/v65/ensembl-compara/modules";
use lib "/jolidisk/software/src/ensembl/bioperl-1.2.3/";
use Bio::EnsEMBL::Registry;
use POSIX;
#use Data::Dumper;

my $bedcoord = $ARGV[0];
my $reads    = $ARGV[1];
my $bin      = $ARGV[2];
my $kb       = $ARGV[3];
my $input    = $ARGV[4];

my $verbose; # = 1 ;
#---------------------------
# Connect to DBs
#--------------------------
my $registry   = 'Bio::EnsEMBL::Registry';
$registry->load_registry_from_db(
    -host    => 'ensembldb.ensembl.org',
    -user    => 'anonymous',
    );

print "api V:",$registry->software_version,"\n" if ($verbose);

#-- Get the Compara Adaptors
my $gdb_a      = $registry->get_adaptor("Compara", "compara", "GenomeDB");
my $dnafrag_a  = $registry->get_adaptor("Compara", "compara", "Dnafrag");
my $gdb        = $gdb_a->fetch_by_dbID("57");



#-- Get DnaFrog - Chro - Length
my $h_dnafrags; 
my $dnafrags = $dnafrag_a->fetch_all_by_GenomeDB_region($gdb, 'chromosome');
foreach my $dnafrag(@$dnafrags){
    my $e    = $dnafrag->length;
    my $chr  = $dnafrag->name;
    $h_dnafrags->{$chr} = $e;
}



#-- load all BED / ORI
open(FILE, $bedcoord) or die("Unable to open $bedcoord");

my $hash_peaks;
my $hash_nbs; 
my @desc;
while (my $line = <FILE>) {
    chomp $line;
    my ($chro, $start, $end) = split(" ", $line);
    #push(@{ $hash_peaks->{$chro} }, $line); 
    push (@desc, $line);
    $hash_nbs->{$chro}++;
}


#my @sorted_desc = reverse sort { $hash_nbs->{$a} cmp $hash_nbs->{$b} } keys %$hash_nbs; 



#---------------------------
#-- Read/Bins BED files by 100bp bins
#-- Chip - Input for each bin 
#-- Each window was divided into 100 bins of 100 bp in size. An enrichment value was assigned to each bin by counting the number of sequencing reads in that bin and subtracting the number of reads in the same bin of an input library.
#---------------------------
my $hoh_reads = &binBed($reads, $bin, $h_dnafrags);
my $hoh_input = &binBed($input, $bin, $h_dnafrags) if ($input) ;


#-- get their RFs, nbs of RFs 
#-- Foreach description group get the Bed data

foreach my $line (@desc){
#foreach my $chro (@sorted_desc){
    #print join(" ",$chro,$hash_nbs->{$chro})."\n";
    
#    my $lines = $hash_peaks->{$chro};
#    foreach my $line (@$lines){
	my ($chro2, $start, $end, $nb) = split(" ", $line);
	my $center = floor($start + ($end - $start)/2) ;
	my $center_floor = floor($center / $bin) * $bin +1 ;
	##    print "->$center $center_floor\n";
	
	#if ($chro eq "M"){ $chro = "MT"}
	my $range_s = $center_floor - $kb ;
	my $range_e = $center_floor + $kb ;
	
	my @line_array_es;

	my $size_array = 0;
	for (my $coord = $range_s; $coord <= $range_e ; $coord+=$bin) {
	    my $start = $coord;
	    my $end   = $coord + $bin -1;
	    #my $chro = $module->seq_region_name;
	    #if ($chro eq "M"){ $chro = "MT"};

	    #-- get reads counts for each bins
	    my $tmpreads_es = $hoh_reads->{$chro2}->{join(":",$chro2,$start,$end)} ;
	  
	    my $reads_input = 0;
	    $reads_input = $hoh_input->{$chro2}->{join(":",$chro2,$start,$end)} if ($hoh_input->{$chro2}->{join(":",$chro2,$start,$end)} && ($input));
	    
	    my $reads_es;
	    if (!$tmpreads_es){
		$reads_es = 0 ;
	    }else{
		if ($reads_input > 0){
		    $reads_es = $tmpreads_es - $reads_input ;
		}else{
		    $reads_es = $tmpreads_es;
		}
	    }
	    push @line_array_es, $reads_es;

	  
	    $size_array++ ;
	}#<-- end bins
#	$i++;
##??
	if (scalar @line_array_es == $size_array ){
	    my $line_es ;
	      
	    foreach my $val (@line_array_es){
		$line_es .= $val." ";
	    }
	    
	    
	    chop $line_es;
	    

	    print   join("\t",$chro2, $start, $end, $nb)." ".$line_es."\n"; 
	    
	    ##??
	}
	
    	
#    }#<- end module
#    my $fake_line;
#    for (0..$bin){
#	$fake_line .= "999 ";
#    }
#    chop $fake_line;
#    print OUT_ES $fake_line."\n";
#    print OUT_NP $fake_line."\n";
#    print OUT_19H $fake_line."\n";

}#<- end description module



#---------------------------
#-- Read/Bins BED files by 100bp bins
#-- Chip - Input for each bin 
#-- Each window was divided into 100 bins of 100 bp in size. An enrichment value was assigned to each bin by counting the number of sequencing reads in that bin and subtracting the number of reads in the same bin of an input library.
#---------------------------





#----------------------------------------------
#--              SUBROUTINES 
#----------------------------------------------
sub binBed{
##    print "Enter binBed\n";
    my ($file, $bin, $h_dnafrags) = @_;
    #-----------------------------------
    #-- Create the hash of bins
    #-----------------------------------
    my $hoh;
#    my $dnafrags = $dnafrag_a->fetch_all_by_GenomeDB_region($gdb, 'chromosome');
#    foreach my $dnafrag(@$dnafrags){
    while ( my ($chr, $e) = each(%$h_dnafrags) ) {
	my $h;
	my $s    = 1;
	#my $e    = $dnafrag->length;
	#my $chr  = $dnafrag->name;
	#    print $chr, "\n";
	for (my $coord = $s; $coord <= $e ; $coord+=$bin) {
	    my $end = $coord + $bin -1;
	    my $tag = join(":", "chr".$chr,$coord,$end);
	    # print "$tag\n" ;
	    #1:1:500
	    #1:501:1000
	    #1:1001:1500
	    #1:1501:2000
	    
	    $h->{$tag} = 0;
	}
	$hoh->{$chr} = $h;
	
    }
    #--------------------
    #-- read file 
    #--------------------
    warn "open $file\n";
    open(FILE, $file) or die("Unable to open ". $file);


    #-----------------------------------
    #-- populate the hash of bins with nb of reads
    #-----------------------------------
    my $i = 0 ;
    while (my $line = <FILE>) {
	chop $line;
	#chr1    12      53      HWUSI-EAS582_186:4:29:334:1442/1        0       +
	my ($chro, $rs, $re, $tmp, $score, $strand) = split(" ",$line);
	#$chro =~ s/chr//;
	#if ($chro eq "M"){ $chro = "MT"}
	
	#-- this round the start coordinate of the read into a multiple of 
	#-- the bin size eg 152,632 for a bin of 100 => 152,600 
	my $range_s = floor($rs / $bin) * $bin +1 ;
	my $range_e = $range_s + $bin -1;
	my $key  = join(":", $chro,$range_s,$range_e);
	
	$hoh->{$chro}->{$key}++;
	my $nb = $hoh->{$chro}->{$key};
	
	#   print " => $line -- $key $nb\n";
	
	$i++;
    }
    return $hoh;
}

__END__



./hm_generic.pl  concat_MACS_merged300bp.bed  /jolidisk/users/ballester/mechali/data/geo/GSE28254_henk/GSM821394_mES_V6.5_H3K27m3_ChIP.reads.bed


#--------------------------------------------
#-- Heatmap mark histo
#--------------------------------------------

cd  /jolidisk/users/ballester/mechali/data/geo/GSE36114_cell2012/

 ./hm_generic.pl  ~/jmechali/analysis/heatmap/concat_MACS_merged300bp.bed  GSM881348_E14_H2AZ.bed     > GSM881348_E14_H2AZ.hm
 ./hm_generic.pl  ~/jmechali/analysis/heatmap/concat_MACS_merged300bp.bed  GSM881349_E14_H3K27ac.bed  > GSM881349_E14_H3K27ac.hm
 ./hm_generic.pl  ~/jmechali/analysis/heatmap/concat_MACS_merged300bp.bed  GSM881350_E14_H3K27me3.bed > GSM881350_E14_H3K27me3.hm
 ./hm_generic.pl  ~/jmechali/analysis/heatmap/concat_MACS_merged300bp.bed  GSM881351_E14_H3K36me3.bed > GSM881351_E14_H3K36me3.hm
 ./hm_generic.pl  ~/jmechali/analysis/heatmap/concat_MACS_merged300bp.bed  GSM881352_E14_H3K4me1.bed  > GSM881352_E14_H3K4me1.hm
 ./hm_generic.pl  ~/jmechali/analysis/heatmap/concat_MACS_merged300bp.bed  GSM881353_E14_H3K4me2.bed  > GSM881353_E14_H3K4me2.hm
 ./hm_generic.pl  ~/jmechali/analysis/heatmap/concat_MACS_merged300bp.bed  GSM881354_E14_H3K4me3.bed  > GSM881354_E14_H3K4me3.hm




























