#!/usr/bin/perl -w
use strict;
use lib "/jolidisk/software/src/ensembl/v65/ensembl/modules";
use lib "/jolidisk/software/src/ensembl/v65/ensembl-compara/modules";
use lib "/jolidisk/software/src/ensembl/bioperl-1.2.3/";
use Bio::EnsEMBL::Registry;
use POSIX;
#use Data::Dumper;

my $tag  = $ARGV[0]; #-- concat_MACS_merged300bp
my $bin  = $ARGV[1]; # 100;
my $kb   = $ARGV[2]; # 5000;
my $file = $ARGV[3]; #"/jolidisk/users/ballester/mechali/analysis/heatmap/concat_MACS_merged300bp.bed";


my %fileh = (
    "ES"    => '/jolidisk/users/ballester/mechali/data/reads_filtered/ES_indiff.bed2',
    "NP"    => '/jolidisk/users/ballester/mechali/data/reads_filtered/s_3_NP.bed2',
    "19H"   => '/jolidisk/users/ballester/mechali/data/reads_filtered/s_1_ES_19H.bed2',
    "input" => '/jolidisk/users/ballester/mechali/data/reads_filtered/input.bed2',
    );


my $bedes = $fileh{"ES"};
my $bed19h = $fileh{"19H"};
my $bednp = $fileh{"NP"};
my $bedinput  = $fileh{"input"};

#-- Output
my $out_es  = join("_", $tag, "ES",  $bin, $kb) . ".bins2";
my $out_19h = join("_", $tag, "19H", $bin, $kb) . ".bins2";
my $out_np  = join("_", $tag, "NP",  $bin, $kb) . ".bins2";

open OUT_ES,  ">", $out_es  or die $!;
open OUT_19H, ">", $out_19h or die $!;
open OUT_NP,  ">", $out_np  or die $!;


my $verbose; # = 1 ;
#---------------------------
# Connect to DBs
#--------------------------
my $registry   = 'Bio::EnsEMBL::Registry';
$registry->load_registry_from_db(
    -host    => 'ensembldb.ensembl.org',
    -user    => 'anonymous',
    );

print "api V:",$registry->software_version,"\n" if ($verbose);

#-- Get the Compara Adaptors
my $gdb_a      = $registry->get_adaptor("Compara", "compara", "GenomeDB");
my $dnafrag_a  = $registry->get_adaptor("Compara", "compara", "Dnafrag");
my $gdb        = $gdb_a->fetch_by_dbID("57");



#-- Get DnaFrog - Chro - Length
my $h_dnafrags; 
my $dnafrags = $dnafrag_a->fetch_all_by_GenomeDB_region($gdb, 'chromosome');
foreach my $dnafrag(@$dnafrags){
    my $e    = $dnafrag->length;
    my $chr  = $dnafrag->name;
    $h_dnafrags->{$chr} = $e;
}


#-- load all BED / ORI
###my $file = "/jolidisk/users/ballester/mechali/analysis/heatmap/concat_MACS_merged300bp.bed";
open(FILE, $file) or die("Unable to open file");

my $hash_peaks;
my $hash_nbs; 
while (my $line = <FILE>) {
    chomp $line;
    my ($chro, $start, $end, $nb ) = split(" ", $line);
    push(@{ $hash_peaks->{$chro} }, $line); 
    $hash_nbs->{$chro}++;
}


my @sorted_desc = reverse sort { $hash_nbs->{$a} cmp $hash_nbs->{$b} } keys %$hash_nbs; 



#---------------------------
#-- Read/Bins BED files by 100bp bins
#-- Chip - Input for each bin 
#-- Each window was divided into 100 bins of 100 bp in size. An enrichment value was assigned to each bin by counting the number of sequencing reads in that bin and subtracting the number of reads in the same bin of an input library.
#---------------------------
my $hoh_es    = &binBed($bedes, $bin, $h_dnafrags);
my $hoh_np    = &binBed($bednp, $bin, $h_dnafrags);
my $hoh_19h   = &binBed($bed19h, $bin, $h_dnafrags);
my $hoh_input = &binBed($bedinput, $bin, $h_dnafrags);


#-- get their RFs, nbs of RFs 
#-- Foreach description group get the Bed data
foreach my $chro (@sorted_desc){
    #print join(" ",$chro,$hash_nbs->{$chro})."\n";
    
    my $lines = $hash_peaks->{$chro};
    foreach my $line (@$lines){
	my ($chro2, $start, $end, $nb ) = split(" ", $line);
	my $center = floor($start + ($end - $start)/2) ;
	my $center_floor = floor($center / $bin) * $bin +1 ;
	##    print "->$center $center_floor\n";
	
	#if ($chro eq "M"){ $chro = "MT"}
	my $range_s = $center_floor - $kb ;
	my $range_e = $center_floor + $kb ;
	
	my @line_array_es;
	my @line_array_np;
	my @line_array_19h;
	#my @line_array_hnf6;

	my $size_array = 0;
	for (my $coord = $range_s; $coord <= $range_e ; $coord+=$bin) {
	    my $start = $coord;
	    my $end   = $coord + $bin -1;
	    #my $chro = $module->seq_region_name;
	    #if ($chro eq "M"){ $chro = "MT"};

	    #-- get reads counts for each bins
	    my $tmpreads_es = $hoh_es->{$chro}->{join(":",$chro,$start,$end)} ;
	    my $tmpreads_np = $hoh_np->{$chro}->{join(":",$chro,$start,$end)} ;
	    my $tmpreads_19h = $hoh_19h->{$chro}->{join(":",$chro,$start,$end)} ;
	    my $reads_input = 0;
	    $reads_input = $hoh_input->{$chro}->{join(":",$chro,$start,$end)} if ($hoh_input->{$chro}->{join(":",$chro,$start,$end)});
	    
	    my $reads_es;
	    if (!$tmpreads_es){
		$reads_es = 0 ;
	    }else{
		if ($reads_input > 0){
		    $reads_es = $tmpreads_es - $reads_input ;
		}else{
		    $reads_es = $tmpreads_es;
		}
	    }
	    push @line_array_es, $reads_es;

	    my $reads_np;
	    if (!$tmpreads_np){
		$reads_np = 0 ;
	    }else{
		if ($reads_input > 0){
		    $reads_np = $tmpreads_np - $reads_input ;
		}else{
		    $reads_np = $tmpreads_np;
		}
	    }
	    push @line_array_np, $reads_np;

	    
	    my $reads_19h;
	    if (!$tmpreads_19h){
		$reads_19h = 0 ;
	    }else{
		if ($reads_input > 0){
		    $reads_19h = $tmpreads_19h - $reads_input ;
		}else{
		    $reads_19h = $tmpreads_19h;
		}
	    }
	    push @line_array_19h, $reads_19h;

	    $size_array++ ;
	}#<-- end bins
#	$i++;
##??
	if (scalar @line_array_es == $size_array ){
	    my $line_es ;
	    my $line_np ;
	    my $line_19h ;   
	    foreach my $val (@line_array_es){
		$line_es .= $val." ";
	    }
	    foreach my $val (@line_array_19h){
		$line_19h .= $val." ";
	    }
	    foreach my $val (@line_array_np){
		$line_np .= $val." ";
	    }
	    
	    chop $line_es;
	    chop $line_19h;
	    chop $line_np;

	    print OUT_ES  join("\t",$chro2, $start, $end, $nb)." ".$line_es."\n"; 
	    print OUT_19H join("\t",$chro2, $start, $end, $nb)." ".$line_19h."\n";
	    print OUT_NP  join("\t",$chro2, $start, $end, $nb)." ".$line_np."\n";
	    ##??
	}
	
    	
    }#<- end module
#    my $fake_line;
#    for (0..$bin){
#	$fake_line .= "999 ";
#    }
#    chop $fake_line;
#    print OUT_ES $fake_line."\n";
#    print OUT_NP $fake_line."\n";
#    print OUT_19H $fake_line."\n";

}#<- end description module

close(OUT_ES);
close(OUT_19H);
close(OUT_NP);

#---------------------------
#-- Read/Bins BED files by 100bp bins
#-- Chip - Input for each bin 
#-- Each window was divided into 100 bins of 100 bp in size. An enrichment value was assigned to each bin by counting the number of sequencing reads in that bin and subtracting the number of reads in the same bin of an input library.
#---------------------------





#----------------------------------------------
#--              SUBROUTINES 
#----------------------------------------------
sub binBed{
##    print "Enter binBed\n";
    my ($file, $bin, $h_dnafrags) = @_;
    #-----------------------------------
    #-- Create the hash of bins
    #-----------------------------------
    my $hoh;
#    my $dnafrags = $dnafrag_a->fetch_all_by_GenomeDB_region($gdb, 'chromosome');
#    foreach my $dnafrag(@$dnafrags){
    while ( my ($chr, $e) = each(%$h_dnafrags) ) {
	my $h;
	my $s    = 1;
	#my $e    = $dnafrag->length;
	#my $chr  = $dnafrag->name;
	#    print $chr, "\n";
	for (my $coord = $s; $coord <= $e ; $coord+=$bin) {
	    my $end = $coord + $bin -1;
	    my $tag = join(":", "chr".$chr,$coord,$end);
	    # print "$tag\n" ;
	    #1:1:500
	    #1:501:1000
	    #1:1001:1500
	    #1:1501:2000
	    
	    $h->{$tag} = 0;
	}
	$hoh->{$chr} = $h;
	
    }
    #--------------------
    #-- read file 
    #--------------------
#    print "open $file\n";
    open(FILE, $file) or die("Unable to open file");
##    print "open file\n";

    #-----------------------------------
    #-- populate the hash of bins with nb of reads
    #-----------------------------------
    my $i = 0 ;
    while (my $line = <FILE>) {
	chop $line;
	#chr1    12      53      HWUSI-EAS582_186:4:29:334:1442/1        0       +
	my ($chro, $rs, $re, $tmp, $score, $strand) = split(" ",$line);
	#$chro =~ s/chr//;
	#if ($chro eq "M"){ $chro = "MT"}
	
	#-- this round the start coordinate of the read into a multiple of 
	#-- the bin size eg 152,632 for a bin of 100 => 152,600 
	my $range_s = floor($rs / $bin) * $bin +1 ;
	my $range_e = $range_s + $bin -1;
	my $key  = join(":", $chro,$range_s,$range_e);
	
	$hoh->{$chro}->{$key}++;
	my $nb = $hoh->{$chro}->{$key};
	
	#   print " => $line -- $key $nb\n";
	
	$i++;
    }
    return $hoh;
}

__END__



./heatmap.pl   


