#!/usr/bin/perl -w
use strict;
use lib "/jolidisk/software/src/ensembl/v65/ensembl/modules";
use lib "/jolidisk/software/src/ensembl/v65/ensembl-compara/modules";
use lib "/jolidisk/software/src/ensembl/bioperl-1.2.3/";
use Bio::EnsEMBL::Registry;
use POSIX;
#use Data::Dumper;


my %fileh = (
    "ES"    => '/jolidisk/users/ballester/mechali/data/reads_filtered/ES_indiff.bed2',
    "NP"    => '/jolidisk/users/ballester/mechali/data/reads_filtered/s_3_NP.bed2',
    "19H"   => '/jolidisk/users/ballester/mechali/data/reads_filtered/s_1_ES_19H.bed2',
    "input" => '/jolidisk/users/ballester/mechali/data/reads_filtered/input.bed2',
    );


my $bedes = $fileh{"ES"};
my $bed19h = $fileh{"19H"};
my $bednp = $fileh{"NP"};
my $bedinput  = $fileh{"input"};

#-- Output
open OUT_ES, ">", "out_es_bins2.txt" or die $!;
open OUT_19H, ">", "out_19h_bins2.txt" or die $!;
open OUT_NP, ">", "out_np_bins2.txt" or die $!;


my $bin = 100;
my $kb = 5000;
my $verbose; # = 1 ;
#---------------------------
# Connect to DBs
#--------------------------
my $registry   = 'Bio::EnsEMBL::Registry';
$registry->load_registry_from_db(
    -host    => 'ensembldb.ensembl.org',
    -user    => 'anonymous',
    );

print "api V:",$registry->software_version,"\n" if ($verbose);

#-- Get the Compara Adaptors
my $gdb_a      = $registry->get_adaptor("Compara", "compara", "GenomeDB");
my $dnafrag_a  = $registry->get_adaptor("Compara", "compara", "Dnafrag");
my $gdb        = $gdb_a->fetch_by_dbID("57");



#-- Get DnaFrog - Chro - Length
my $h_dnafrags; 
my $dnafrags = $dnafrag_a->fetch_all_by_GenomeDB_region($gdb, 'chromosome');
foreach my $dnafrag(@$dnafrags){
    my $e    = $dnafrag->length;
    my $chr  = $dnafrag->name;
    $h_dnafrags->{$chr} = $e;
}


#-- load all BED / ORI
my $file = "/jolidisk/users/ballester/mechali/analysis/heatmap/concat_MACS_merged300bp.bed";
open(FILE, $file) or die("Unable to open file");

my $hash_peaks;
my $hash_nbs; 
while (my $line = <FILE>) {
    chomp $line;
    my ($chro, $start, $end, $nb ) = split(" ", $line);
    push(@{ $hash_peaks->{$chro} }, $line); 
    $hash_nbs->{$chro}++;
}


my @sorted_desc = reverse sort { $hash_nbs->{$a} cmp $hash_nbs->{$b} } keys %$hash_nbs; 



#---------------------------
#-- Read/Bins BED files by 100bp bins
#-- Chip - Input for each bin 
#-- Each window was divided into 100 bins of 100 bp in size. An enrichment value was assigned to each bin by counting the number of sequencing reads in that bin and subtracting the number of reads in the same bin of an input library.
#---------------------------
my $hoh_es    = &binBed($bedes, $bin, $h_dnafrags);
my $hoh_np    = &binBed($bednp, $bin, $h_dnafrags);
my $hoh_19h   = &binBed($bed19h, $bin, $h_dnafrags);
my $hoh_input = &binBed($bedinput, $bin, $h_dnafrags);


#-- get their RFs, nbs of RFs 
#-- Foreach description group get the Bed data
foreach my $chro (@sorted_desc){
    #print join(" ",$chro,$hash_nbs->{$chro})."\n";
    
    my $lines = $hash_peaks->{$chro};
    foreach my $line (@$lines){
	my ($chro2, $start, $end, $nb ) = split(" ", $line);
	my $center = floor($start + ($end - $start)/2) ;
	my $center_floor = floor($center / $bin) * $bin +1 ;
	##    print "->$center $center_floor\n";
	
	#if ($chro eq "M"){ $chro = "MT"}
	my $range_s = $center_floor - $kb ;
	my $range_e = $center_floor + $kb ;
	
	my @line_array_es;
	my @line_array_np;
	my @line_array_19h;
	#my @line_array_hnf6;

	my $size_array = 0;
	for (my $coord = $range_s; $coord <= $range_e ; $coord+=$bin) {
	    my $start = $coord;
	    my $end   = $coord + $bin -1;
	    #my $chro = $module->seq_region_name;
	    #if ($chro eq "M"){ $chro = "MT"};

	    #-- get reads counts for each bins
	    my $tmpreads_es = $hoh_es->{$chro}->{join(":",$chro,$start,$end)} ;
	    my $tmpreads_np = $hoh_np->{$chro}->{join(":",$chro,$start,$end)} ;
	    my $tmpreads_19h = $hoh_19h->{$chro}->{join(":",$chro,$start,$end)} ;
	    my $reads_input = 0;
	    $reads_input = $hoh_input->{$chro}->{join(":",$chro,$start,$end)} if ($hoh_input->{$chro}->{join(":",$chro,$start,$end)});
	    
	    my $reads_es;
	    if (!$tmpreads_es){
		$reads_es = 0 ;
	    }else{
		if ($reads_input > 0){
		    $reads_es = $tmpreads_es - $reads_input ;
		}else{
		    $reads_es = $tmpreads_es;
		}
	    }
	    push @line_array_es, $reads_es;

	    my $reads_np;
	    if (!$tmpreads_np){
		$reads_np = 0 ;
	    }else{
		if ($reads_input > 0){
		    $reads_np = $tmpreads_np - $reads_input ;
		}else{
		    $reads_np = $tmpreads_np;
		}
	    }
	    push @line_array_np, $reads_np;

	    
	    my $reads_19h;
	    if (!$tmpreads_19h){
		$reads_19h = 0 ;
	    }else{
		if ($reads_input > 0){
		    $reads_19h = $tmpreads_19h - $reads_input ;
		}else{
		    $reads_19h = $tmpreads_19h;
		}
	    }
	    push @line_array_19h, $reads_19h;

	    $size_array++ ;
	}#<-- end bins
#	$i++;
##??
	if (scalar @line_array_es == $size_array ){
	    my $line_es ;
	    my $line_np ;
	    my $line_19h ;   
	    foreach my $val (@line_array_es){
		$line_es .= $val." ";
	    }
	    foreach my $val (@line_array_19h){
		$line_19h .= $val." ";
	    }
	    foreach my $val (@line_array_np){
		$line_np .= $val." ";
	    }
	    
	    chop $line_es;
	    chop $line_19h;
	    chop $line_np;

	    print OUT_ES  join("\t",$chro2, $start, $end, $nb)." ".$line_es."\n"; 
	    print OUT_19H join("\t",$chro2, $start, $end, $nb)." ".$line_19h."\n";
	    print OUT_NP  join("\t",$chro2, $start, $end, $nb)." ".$line_np."\n";
	    ##??
	}
	
    	
    }#<- end module
#    my $fake_line;
#    for (0..$bin){
#	$fake_line .= "999 ";
#    }
#    chop $fake_line;
#    print OUT_ES $fake_line."\n";
#    print OUT_NP $fake_line."\n";
#    print OUT_19H $fake_line."\n";

}#<- end description module

close(OUT_ES);
close(OUT_19H);
close(OUT_NP);

#---------------------------
#-- Read/Bins BED files by 100bp bins
#-- Chip - Input for each bin 
#-- Each window was divided into 100 bins of 100 bp in size. An enrichment value was assigned to each bin by counting the number of sequencing reads in that bin and subtracting the number of reads in the same bin of an input library.
#---------------------------





#----------------------------------------------
#--              SUBROUTINES 
#----------------------------------------------
sub binBed{
##    print "Enter binBed\n";
    my ($file, $bin, $h_dnafrags) = @_;
    #-----------------------------------
    #-- Create the hash of bins
    #-----------------------------------
    my $hoh;
#    my $dnafrags = $dnafrag_a->fetch_all_by_GenomeDB_region($gdb, 'chromosome');
#    foreach my $dnafrag(@$dnafrags){
    while ( my ($chr, $e) = each(%$h_dnafrags) ) {
	my $h;
	my $s    = 1;
	#my $e    = $dnafrag->length;
	#my $chr  = $dnafrag->name;
	#    print $chr, "\n";
	for (my $coord = $s; $coord <= $e ; $coord+=$bin) {
	    my $end = $coord + $bin -1;
	    my $tag = join(":", "chr".$chr,$coord,$end);
	    # print "$tag\n" ;
	    #1:1:500
	    #1:501:1000
	    #1:1001:1500
	    #1:1501:2000
	    
	    $h->{$tag} = 0;
	}
	$hoh->{$chr} = $h;
	
    }
    #--------------------
    #-- read file 
    #--------------------
#    print "open $file\n";
    open(FILE, $file) or die("Unable to open file");
##    print "open file\n";

    #-----------------------------------
    #-- populate the hash of bins with nb of reads
    #-----------------------------------
    my $i = 0 ;
    while (my $line = <FILE>) {
	chop $line;
	#chr1    12      53      HWUSI-EAS582_186:4:29:334:1442/1        0       +
	my ($chro, $rs, $re, $tmp, $score, $strand) = split(" ",$line);
	#$chro =~ s/chr//;
	#if ($chro eq "M"){ $chro = "MT"}
	
	#-- this round the start coordinate of the read into a multiple of 
	#-- the bin size eg 152,632 for a bin of 100 => 152,600 
	my $range_s = floor($rs / $bin) * $bin +1 ;
	my $range_e = $range_s + $bin -1;
	my $key  = join(":", $chro,$range_s,$range_e);
	
	$hoh->{$chro}->{$key}++;
	my $nb = $hoh->{$chro}->{$key};
	
	#   print " => $line -- $key $nb\n";
	
	$i++;
    }
    return $hoh;
}

__END__



./heatmap.pl   


