#!/usr/bin/perl -w
############################################################
#
# $Id: matrix-clustering,v 1.5 2013/02/19 05:35:16 jvanheld Exp $
#
############################################################

## use strict;

=pod

=head1 NAME

cluster-matrices

=head1 VERSION

$program_version

=head1 DESCRIPTION

Taking as input a set of position-specific scoring matrices, identify
clusters of similar matrices and build consensus motifs by merging the
matrices that belong to the same cluster.

=head1 DEPENDENCIES

The clustering step relies on I<MCL>, the graph-based clustering
algorithm developed by Stijn Van Dongen. MCL must be installed and its
path indicated in the RSAT configuration file
($RSAT/RSAT_config.props). The installation of MCL can be done with a
RSAT makefile:

  cd $RSAT
  make -f makefiles/install_software.mk install_mcl

=head1 AUTHORS

=head2 Implementation

=over

=item Jacques.van-Helden@univ-amu.fr

=item Jaime Castro <jcastro@lcg.unam.mx>

=back

=head2 Conception

=over

=item Jacques van Helden

The following collaborator contributed to the definition of
requirements for this program.

=item Carl Herrmann

=item Denis Thieffry

=item Morgane Thomas-Chollier

=back

=head1 CATEGORY

util

=head1 USAGE

cluster-matrices [-i inputfile] [-o outputfile] [-v ] [...]


=head1 OUTPUT FORMAT

=head1 SEE ALSO

=over

=item I<compare-matrices>

The program I<compare-matrices> is used by I<cluster-matrices> to
measure pairwise similarities and define the best alignment (offset,
strand) between each pair of matrices.

=back

=head1 WISH LIST

=cut

BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}
require "RSA.lib";
require "RSA2.cgi.lib";
use RSAT::util;
use RSAT::matrix;
use RSAT::MatrixReader;

################################################################
## Main package
package main;
{

  ################################################################
  ## Initialise parameters
  local $start_time = &RSAT::util::StartScript();
  $program_version = do { my @r = (q$Revision: 1.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };

  ## Input / output files
  %main::infile = ();
  %main::outfile = ();
  %main::dir = ();
  @dirs = ();

  $main::verbose = 0;
  $main::out = STDOUT;

  local @tab_to_convert = (); ## Tables to convert to html

  ## Input formats: only accept formats supporting multiple matrices
  local @supported_matrix_formats = qw(transfac tf tab clusterbuster cb infogibbs meme stamp uniprobe);
  local %supported_matrix_format = ();
  foreach my $format (@supported_matrix_formats) {
    $supported_matrix_format{$format} = 1;
  }
  local $supported_matrix_formats = join ",", @supported_matrix_formats;

  ## Output formats for hclust
  local @supported_tree_formats = qw (json newick);
  local %supported_tree_formats = ();
  foreach my $format (@supported_tree_formats) {
    $supported_tree_formats{$format} = 1;
  }
  local $supported_tree_formats = join ",", @supported_tree_formats;
  local $export_tree_format = "";

  ## Select d3 base
  ## Note : option link requires internet connection
  local @d3_base_formats = qw (file link);
  local %d3_base_formats = ();
  foreach my $format (@d3_base_formats) {
    $d3_base_formats{$format} = 1;
  }
  local $d3_base_formats = join ",", @d3_base_formats;
  local $d3_base_format = "";
  local $d3_base_url=$ENV{RSAT}."/public_html/lib/d3/";

  ## Select phylo and newick d3 bases
  local $d3_phylogram_base = $ENV{RSAT}."/public_html/lib/d3/d3.phylogram.js";
  local $d3_newick_base = $ENV{RSAT}."/public_html/lib/d3/newick.js";


  ## Supported label fields
  local @supported_label_fields = qw (id name consensus);
  local %supported_label_fields = ();
  foreach my $field (@supported_label_fields) {
    $supported_label_fields{$field} = 1;
  }
  local $supported_label_fields = join ",", @supported_label_fields;
#    local $label_fields_to_return = "";
  local %label_fields_to_return = ();
  local @label_fields_to_return = ();

  ## Supported hclust methods
  local @supported_hclust_methods = qw (average complete single);
  local %supported_hclust_methods = ();
  foreach my $method (@supported_hclust_methods) {
    $supported_hclust_methods{$method} = 1;
  }
  local $supported_hclust_methods = join ",", @supported_hclust_methods;
  local $hclust_method = "complete";


  ## Threshold parameters
  local %lth = ();		# lower threshold values
  local %uth = ();		# upper threshold values
  local @supported_thresholds = qw(
				     cor
                                     Ncor
				   );
  local $supported_thresholds = join ",", @supported_thresholds;
  local %supported_threshold = ();
  foreach my $thr (@supported_thresholds) {
    $supported_threshold{lc($thr)} = 1;
  }


  ## Metric parameters
  local @supported_metrics = qw(
			         cor
                                 Ncor
			     );
  local $supported_metrics = join ",", @supported_metrics;
  local %supported_metrics = ();
  foreach my $met (@supported_metrics) {
    $supported_metrics{lc($met)} = 1;
  }


  ## Consensuses alignment labels
  local @supported_consensuses_alignment_labels = qw(
			         consensus
                                 id
                                 strand
                                 number
			     );
  local $supported_consensuses_alignment_labels = join(",", @supported_consensuses_alignment_labels);
  local %supported_consensuses_alignment_labels = ();
  foreach my $lab (@supported_consensuses_alignment_labels) {
    $supported_consensuses_alignment_labels{lc($lab)} = 1;
  }
  local @cons_labels = @supported_consensuses_alignment_labels;


  ## Matrices
  local @matrices = ();
#  local @temp = ();
  local $show_consensus = 0;


  ## Unrecognized arguments are passed to compare-matrices
  local @args_to_pass = ();
  local $args_to_pass = "";
  
  ## Hash with the path to tha aligned logos
  local %aligned_logos_path = ();
  local %alignment_info = ();

  ## Hash with the cluster information
  ## required to the HTML tree
  local %clusters_info = ();
  local %cluster_nodes = (); 
  local %clusters_to_HTML = (); 

  ## Lower and upper threshold on matrix comparison scores
  local %lth = ();
  local %uth = ();

  ## Merged consensuses logos
  local %merged_consensuses_files = ();
  local @levels_JSON = ();

  ################################################################
  ## Options used for motif comparison
  local %param = ();
  $param{matrix_compa_min_w} = 5; ## min number of aligned columns
  $param{matrix_compa_min_Wr} = 0.3; ## min relative width
  #$param{matrix_compa_min_cor} = 0.7; ## min correlation
  #$param{matrix_compa_min_Ncor} = 0.4; ## min normalizd correlation
  $param{matrix_compa_sort_field} = "Ncor"; ## sorting field
  #$param{matrix_compa_metrics} = "Ncor";
  $param{matrix_compa_metrics} = "cor,Ncor,logoDP,NIcor,NSW,match_rank";
  $param{matrix_compa_score} = "Ncor"; ## The score must be a similarity score for MCL

  ################################################################
  ## Read argument values
  &ReadArguments();

  ################################################################
  ## Check argument values

  &RSAT::message::TimeWarn("Checking parameter values") if ($main::verbose >= 2);

  ## Non-recognized parameters are passed to compare-matrices
  if (scalar(@args_to_pass)) {
    $args_to_pass = join (" ", @args_to_pass);
    &RSAT::message::Info("Unrecognized arguments passed to compare-matrices", $args_to_pass) if ($main::verbose >= 2);
  }

  ## Check that the  input file has been specified
  unless ($infile{matrices}) {
    &RSAT::error::FatalError("You must define the input file (option -i).");
  }

  ## Check that the  input format has been specified
  unless ($matrix_format) {
    &RSAT::error::FatalError("You must define the input matrix format (option -format).");
  }


  ## Check that the output prefix has been specified
  unless ($outfile{prefix}) {
    &RSAT::error::FatalError("You must define the output prefix (option -o).");
  }

  ## Check that at least one field is selected for the option
  ## "-label".
  if (scalar(@label_fields_to_return) == 0) {
    @label_fields_to_return = qw(id name);
    for my $field (@label_fields_to_return) {
      $label_fields_to_return{$field} = 1;
    }
  }
  $label_fields_to_return = join ",", @label_fields_to_return;
  &RSAT::message::Info("Label fields", $label_fields_to_return) if ($main::verbose >= 5);

  ## Create output dir if required
  my $basename;
  ($dir{output}, $basename) = &RSAT::util::SplitFileName($main::outfile{prefix});
  $dir{output} = "." if ($dir{output} eq "");
  &RSAT::util::CheckOutDir($dir{output});
  push @dirs, "output";

  ## Directory containing the matrix logos (to be displayed in the HTML tree)
  $dir{logos} = $main::outfile{prefix}."_pairwise_compa_logos"; push @dirs, "logos";
  &RSAT::util::CheckOutDir($dir{logos});

  ## Directory to create the aligned logos
  $dir{aligned_logos} = $main::outfile{prefix}."_aligned_logos";
  &RSAT::util::CheckOutDir($dir{aligned_logos});
  
  ################################################################
  ## Specify output file names and open output stream
  $main::outfile{log} = $main::outfile{prefix}."_log.txt"; push @outfiles, "log";
  $main::outfile{Rlog} = $main::outfile{prefix}."_Rlog.txt"; push @outfiles, "Rlog";
  $main::out = &OpenOutputFile($main::outfile{log});


  ## Pairwise comparisons between matrices
  $main::outfile{pairwise_compa} = $main::outfile{prefix}."_pairwise_compa.tab"; push @outfiles, "pairwise_compa"; 
  $main::outfile{pairwise_compa_html} = $main::outfile{prefix}."_pairwise_compa.html";  push @outfiles, "pairwise_compa_html";

  ## Individual matrix descriptions
  $main::outfile{matrix_descriptions} = $main::outfile{prefix}."_pairwise_compa_matrix_descriptions.tab"; push @outfiles, "matrix_descriptions"; &AddTabToConvert("matrix_descriptions");

  ## Simliarity network between matrices
  if ($main::return_field{network}) {
    $main::outfile{matrix_network_gml} = $main::outfile{prefix}."_matrix_network.gml"; push @outfiles, "matrix_network_gml";
    $main::outfile{matrix_network_png} = $main::outfile{prefix}."_matrix_network.png"; push @outfiles, "matrix_network_png";
  }

  ## MCL clustering (by segmentation of the matrix network)
  if ($main::return_field{mcl_clusters}) {
    $main::outfile{clusters_mcl} = $main::outfile{prefix}."_clusters.mcl"; push @outfiles, "clusters_mcl";
    $main::outfile{clusters_tab} = $main::outfile{prefix}."_clusters.tab"; push @outfiles, "clusters_tab"; &AddTabToConvert("clusters_tab");
    $main::outfile{clusters_subgraph} = $main::outfile{prefix}."_clusters_subgraph.tab"; push @outfiles, "clusters_subgraph";
    $main::outfile{clusters_subgraph_gml} = $main::outfile{prefix}."_clusters_subgraph.gml"; push @outfiles, "clusters_subgraph_gml";
    $main::outfile{clusters_subgraph_png} = $main::outfile{prefix}."_clusters_subgraph.png"; push @outfiles, "clusters_subgraph_png";
    $main::outfile{intra_cluster_degree} = $main::outfile{prefix}."_intra_clusters_degree.tab"; push @outfiles, "intra_cluster_degree";
  }

  ## Output files for the hierarchical clustering trees
  $main::outfile{distance_table} = $main::outfile{prefix}."_distance_table.tab"; push @outfiles, "distance_table"; &AddTabToConvert("distance_table");
  $main::outfile{alignment_table} = $main::outfile{prefix}."_alignment_table.tab"; push @outfiles, "alignment_table"; &AddTabToConvert("alignment_table");
  $main::outfile{internal_nodes_attributes_table} = $main::outfile{prefix}."_internal_nodes_attributes.tab"; push @outfiles, "internal_nodes_attributes_table"; &AddTabToConvert("internal_nodes_attributes_table");

  $main::outfile{consensus_cladogram_json} = $main::outfile{prefix}."_trees/tree.json"; push @outfiles, "consensus_cladogram_json";
  $main::outfile{logo_cladogram_html} = $main::outfile{prefix}."_logo_tree.html"; push @outfiles, "logo_cladogram_html";
  $main::outfile{consensus_phylogram_png} = $main::outfile{prefix}."_consensus_tree.png"; push @outfiles, "consensus_phylogram_png";
  $main::outfile{consensus_phylogram_pdf} = $main::outfile{prefix}."_consensus_tree.pdf"; push @outfiles, "consensus_phylogram_pdf";
  if ($export_tree_format eq "newick"){
    $main::outfile{consensus_phylogram_newick} = $main::outfile{prefix}."_trees/tree.newick"; push @outfiles, "consensus_phylogram_newick";
    $main::outfile{phylogram_html} = $main::outfile{prefix}."_phylogram.html"; push @outfiles, "phylogram_html";    
  }

  ## Some temporary files
  $main::outfile{temp} = $main::outfile{prefix}."_temporary.html";
  $main::outfile{temp_2} = $main::outfile{prefix}."_temporary_2.html";

  ## Open the HTML index file
  $main::outfile{html_index} = $main::outfile{prefix}."_index.html";
  push @outfiles, "html_index";
  $main::html_index = &OpenOutputFile($main::outfile{html_index});
  my $header = &PrintHtmlResultHeader(program=>"matrix-clustering", refresh_time=>120);
  #      &RSAT::message::Debug("header", $header) if ($main::verbose >= 10);
  print $main::html_index $header;
  
  ## Report command
  print $html_index "<p><tt><b>Command:</b> matrix-clustering ";
  &PrintArguments($main::html_index, 1);
  print $html_index "</tt></p>\n";


  ################################################################
  ## Read input matrices
  if ($infile{matrices}){
    @matrices = &RSAT::MatrixReader::readFromFile($infile{matrices}, $matrix_format);
    &RSAT::message::TimeWarn(scalar(@matrices), "Matrices loaded from file", $infile{matrices})
	if ($main::verbose >= 2);
  }
  
  ## The metric for alignment always must be the same as the metric 
  ## for the threshold
#  if ($main::param{matrix_compa_score} ne $main::thr_field){
#    &RSAT::error::FatalError("The metric for alignment always must be the same as the metric for the threshold. Metric for alignment: $main::param{matrix_compa_score}#; Metric for threshold: $thr_field");
#  }

  ################################################################
  ## Cluster the matrices
  &CompareMatrices();


  ################################################################
  ## Cluster the matrices
  &Hclustering();

  ################################################################
  ## Read the motif to clustr assignation, which will be required for
  ## several functions below.
  &ReadClusterComposition();

  ################################################################
  ## Create the merged matrices and consensuses
  &Merge_matrices();

  ################################################################
  ## Parse the JSON file
  &Add_attributes_to_JSON();

  ################################################################
  ## Creates the body of html to display
  ## each cluster separately
  &Add_div_to_HTML();

  ################################################################
  ## Creates the html template to display the tree
  &Create_html_tree_file();

  ################################################################
  ## Creates the html template to display the pylogram
  &Create_phylogram();

  ################################################################
  ## Convert tab files to HTML files
  &ConvertTabToHTML(@tab_to_convert);

  ################################################################
  ## Print verbose
  &Verbose() if ($main::verbose);

  ################################################################
  ## Generate the HMTL index of input/output files
  &IndexFiles() if ($main::outfile{html_index});

  ################################################################
  ## Close output stream
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); print $main::out $exec_time if ($main::verbose >= 1);
  close $main::out if ($main::outfile{prefix});

  ## Close the HTML index file
  if ($main::outfile{html_index}) {
    print $main::html_index "<hr>";
    print $main::html_index "</body>";
    print $main::html_index "</html>";
    close $main::html_index;
  }

  exit(0);
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################


################################################################
## Display full help message 
sub PrintHelp {
  system "pod2text -c $0";
  exit()
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}

################################################################
## Read arguments 
sub ReadArguments {
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);

=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
	$main::verbose = shift(@arguments);
      } else {
	$main::verbose = 1;
      }

=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();


=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();

=pod

=item B<-i input matrix file>

The input file contains a set of position-specific scoring
matrices.

=cut
    } elsif ($arg eq "-i") {
      $main::infile{matrices} = shift(@arguments);
      
=pod

=item B<-format matrix_format>

Specify the input matrix format.


B<Supported matrix formats>

Since the program takes several matrices as input, it only accepts
matrices in formats supporting several matrices per file (transfac,
tf, tab, clusterbuster, cb, infogibbs, meme, stamp, uniprobe).

For a description of these formats, see the help of I<convert-matrix>.

=cut
    } elsif ($arg eq "-format") {
      $main::matrix_format = shift(@arguments);
      unless ($supported_matrix_format{$matrix_format}) {
	&RSAT::error::FatalError($matrix_format, "Invalid format for input matrices\tSupported: ".$main::supported_matrix_formats);
      }

=pod

=item	B<-o output_prefix>

Prefix for the output files.

Mandatory option: since the program I<cluster-matrices> returns a
list of output files (pariwise matrix comparisons, matrix clusters).

=cut
    } elsif ($arg eq "-o") {
      $main::outfile{prefix} = shift(@arguments);

=pod

=item B<-cons>

Display consensus of merged matrices on the internal branches of the
tree.

=cut
    } elsif ($arg eq "-cons"){
      $show_consensus = 1;
   
=pod

=item B<-export format>

Specify format for the output tree.

The JSON format is always producted, since it is required to display
the logo tree with the d3 library. Additional formats are proposed in
option to enable visualization with classical phylogeny analysis
tools.

B<Supported trees formats>

=over

=item I<json> (default)

JSON (JavaScript Object Notation). Javascrpt format supported by the
d3 library.

=item I<newick> (optional)

Widely used textual format to describe phylogenetic trees.

=back

=cut
      
    } elsif ($arg eq "-export") {
      $export_tree_format = shift(@arguments);
      unless($supported_tree_formats{$export_tree_format}) {
	&RSAT::error::FatalError($export_tree_format, "Invalid format for input matrices\tSupported: ".$main::supported_tree_formats);
      }

=pod

=item	B<-d3_base>

Option to specify any type of URL (file://, http://, ...) as base for the d3 library.
    
B<Supported d3 bases>
    
 (Default: file, optional: link )

=cut
    } elsif ($arg eq "-d3_base") {
      $d3_base_format = shift(@arguments);
      unless ($d3_base_formats{$d3_base_format}) {
	&RSAT::error::FatalError($d3_base_format, "Invalid format for d3 base selectiob\tSupported options: ".$main::d3_base_formats);
      }

=pod

=item	B<-label>

Option to select the matrix label fields displayed in the html tree
    
B<Supported labels>
    
 (name, consensus, id)
 
=cut
    } elsif ($arg eq "-label") {
      my $label_fields_to_return = shift(@arguments);
      my @new_label_fields_to_return = split (",", $label_fields_to_return);
      foreach $field (@new_label_fields_to_return) {
	if ($supported_label_fields{$field}) {
	  $label_fields_to_return{$field} = 1;
	  push @label_fields_to_return, $field;
	} else {
	  &RSAT::error::FatalError(join("\t", $field, "Invalid return field. Supported:", $supported_label_fields));
	}
      }
      
=pod

=item	B<-hclust_method>

Option to select the agglomeration rule for hierarchical clustering.


Supported agglomeration rules:
 
=over

=item I<complete> (default)

Compute inter-cluster distances based on the two most distant nodes.

=item I<average>

Compute inter-cluster distances as the average distance between nodes
belonging to the relative clusters.

=item I<single>

Compute inter-cluster distances based on the closest nodes.

=back

=cut

    } elsif ($arg eq "-hclust_method") {
      $hclust_method = shift(@arguments);
      unless(exists($supported_hclust_methods{$hclust_method})){
	&RSAT::error::FatalError($hclust_method, "Invalid hclust method. Supported:", $supported_hclust_methods);
      }


=pod

=item	B<-consensus_labels>

Option to select the labels displayed in the consensus
alignment picture

Default: consensus, id, strand

B<Supported labels>
    
 (consensus, id, strand, number)

=cut

    } elsif ($arg eq "-consensus_labels") {

	$cons_labels = shift(@arguments);
	my @new_cons_labels = split (",", $cons_labels);
	foreach $field (@new_cons_labels) {
	    if ($supported_consensuses_alignment_labels{$field}) {
		$supported_consensuses_alignment_labels{$field} = 1;
	    } else {
		&RSAT::error::FatalError(join("\t", $field, "Invalid consensus alignment label. Supported:", $supported_consensuses_alignment_labels));
	    }
	}
	@cons_labels =  @new_cons_labels;	

	
=pod

=item	B<-lth param lower_threshold>

=item	B<-uth param upper_threshold>

Threshold on some parameter (-lth: lower, -uth: upper threshold).

Threshold parameters are passed to compare-classes. 

In addition, if a threshold is defined in the (unique) metrics used as
clustering score (option I<-score>), this threshold will be used to
decide whether motifs should be aligned or not. If two motifs have a
similarity score lower (or distance score higher) than the selected
threshold, their aligment will be skipped. The status of each motif
 (Aligned or Non-aligned) is reported in the file
prefix_matrix_alignment_table.tab

Suggested thresholds:

    cor >= 0.7

    Ncor >= 0.4

=cut

    } elsif ($arg eq "-lth") {
      my $thr_field = shift(@arguments);
      my $thr_value =  lc(shift(@arguments));
      unless ($supported_threshold{lc($thr_field)}) {
	&RSAT::error::FatalError("Invalid threshold field $thr_field. Supported: $supported_thresholds");
      }
      &RSAT::error::FatalError($thr_value, "Invalid value for a lower threshold. Should be a real number. ")
	  unless (&RSAT::util::IsReal($thr_value));
      $lth{$thr_field} = $thr_value;

      
      ### Upper threshold
    } elsif ($arg eq "-uth") {
      my $thr_field = shift(@arguments);
      my $thr_value = shift(@arguments);
      unless ($supported_threshold{lc($thr_field)}) {
	&RSAT::error::FatalError("Invalid threshold field $thr_field. Supported: $supported_thresholds");
      }
      &RSAT::error::FatalError($thr_value, "Invalid value for an upper threshold. Should be a real number. ")
	  unless (&RSAT::util::IsReal($thr_value));
      $uth{$thr_field} = $thr_value;

      ## TEMPORARILY INACTIVATE PASSING THRESHOLDS TO compare-matrices
#      push @args_to_pass, join(" ", "-uth", $thr_field, $thr_value);

=pod

=item	B<-score metric>

Select the metric which will be used to cluster the motifs.

Supported metrics : cor, Ncor

Default: Ncor 

=cut

    }elsif ($arg eq "-score") {
      my $metric = shift(@arguments);
      unless ($supported_metrics{lc($metric)}) {
	&RSAT::error::FatalError("Invalid metric field $metric. Supported: $supported_metrics");	
      }
      $main::param{matrix_compa_score} = $metric;
      $main::param{matrix_compa_sort_field} = $metric;

    ## Additional arguments are passed to compare-matrices
    }else {
        if ($arg =~ /\s/) {
	    push @args_to_pass, "'".$arg."'";
        } else {
	    push @args_to_pass, $arg;
        }
    }
  }
=pod

=back

=cut

}

################################################################
## Verbose message
sub Verbose {
  print $main::out "; cluster-matrices ";
  &PrintArguments($main::out);
  printf $main::out "; %-22s\t%s\n", "Program version", $program_version;
  if (%main::infile) {
    print $main::out "; Input files\n";
    while (my ($key,$value) = each %main::infile) {
      printf $main::out ";\t%-28s\t%s\n", $key, $value;
    }
  }
  printf $main::out  "; %-28s\t%s\n", "Number of matrices", scalar(@matrices);

  if (%main::outfile) {
    print $main::out "; Output files\n";
    while (my ($key,$value) = each %main::outfile) {
      printf $main::out ";\t%-28s\t%s\n", $key, $value;
    }
  }
}

################################################################
## Compare each discovered motifs to each other.
sub CompareMatrices {
  ## Pariwise comparisons between discovered matrices.  We don't
  ## the option "distinct" in order to ensure that all the motifs are
  ## in the output graph even if they are not related to any other
  ## motif.
  $matrix_compa_verbose = &RSAT::stats::max(($main::verbose-1), 1);

  &RSAT::message::TimeWarn("Pairwise matrix comparison.") if ($main::verbose >= 2);
  my $cmd = $SCRIPTS."/compare-matrices -v ".$matrix_compa_verbose; 
  $cmd .= " -mode scores";
  $cmd .= " -format ".$main::matrix_format;
  $cmd .=  " -file ".$main::infile{matrices};
  $cmd .= " -DR";
  $cmd .= " -sort ".$main::param{matrix_compa_sort_field};
  $cmd .= " -return matrix_id,matrix_label,strand,offset,".$main::param{matrix_compa_metrics}.",consensus,matrix_desc";
  $cmd .= " -labels ".$label_fields_to_return;
  $cmd .= " ".$args_to_pass;
  $cmd .= " -o ".$main::outfile{pairwise_compa};
  $cmd .= "; ".$SCRIPTS."/text-to-html -i ".$main::outfile{pairwise_compa};
  $cmd .= " -o ".$main::outfile{pairwise_compa_html};
  &RSAT::util::one_command($cmd, 1,"");

  &RSAT::message::TimeWarn("Matrix comparison table", $main::outfile{pairwise_compa}) if ($main::verbose >= 2);

  ## Parse compare-matrix result file to read score columns
#  open $compa, $main::outfile{pairwise_compa};
  my ($compa) = &OpenInputFile($main::outfile{pairwise_compa});
  $main::param{score_column} = 0;
  while (<$compa>) {
    if (/;\t(\d+)\t$param{matrix_compa_score}/) {
      $main::param{score_column} = $1;
      &RSAT::message::Info("Score", $param{matrix_compa_score}, 
			   "column", $main::param{score_column}, 
			   "file", $main::outfile{pairwise_compa}) 
	  if ($main::verbose >= 2);
      last;
    }
  }
  close $compa;

  if ($main::param{score_column} == 0) {
    &RSAT::error::FatalError("Cannot identify score column (".$param{matrix_compa_score}.") in matrix comparison file (".$main::outfile{pairwise_compa}.").");
  }

  ## Create temporary files with information about single matrices
  %alignment_info = &Create_single_matrix_files();

  # ## JvH: for Debug
  # foreach my $key (keys %alignment_info) {
  #   my $value = $alignment_info{$key};
  #   my %hash = %{$value};
  #   print join ("\t", "alignment_info", $key , $value, "\n");
  #   foreach my $key2 (keys %hash) {
  #     my $attr = $hash{$key2};
  #     print join ("\t", "field", $key , $value, $key2, $attr, "\n");
  #   }
  # }

  return();
}


###############################################################
## Run the R script "cluster_motifs.R" 
##
## This script takes as input a matrix comparison file
## (generated by compare-matrices), performs hierarchical
## clustering.
##
## It returns the resulting tree in json format
## 
sub Hclustering {
    
    &RSAT::message::TimeWarn("Clustering matrices on the basis of the selected parameter.") if ($main::verbose >= 2);


    ##################################
    ### This lines are required?
    #my $r_path = &RSAT::server::GetProgramPath("R", 0);
    #RSAT::message::Info("R path", $r_path) if ($main::verbose >= 3);

    ####################################
    ### Modify the path
    chomp(my $r_path = `which R`);


    my $cluster_motifs_script  = $ENV{RSAT}."/R-scripts/cluster_motifs.R";
    &RSAT::error::FatalError("Cannot read cluster motifs script", $cluster_motifs_script) unless (-r $cluster_motifs_script);
    
    ## Create the string with the labels
    ## It will be passed to R as command-line argument
    my @temp_consensus_fields = @cons_labels;
    unshift(@temp_consensus_fields, "consensus");
    foreach (@temp_consensus_fields){
	$_ = "'".$_."'";
    }
    my $arg_labels = "c(";
    my $join_labels = join(",", @temp_consensus_fields);
    $arg_labels .= $join_labels." )"; 

    $r_verbosity = &RSAT::stats::max(($main::verbose-1), 0);
       
#  my $pos_drawing_offset = -($pos_interval-1)/2;
    my $pos_drawing_offset = 0;
    my  $cluster_motifs_cmd = "cat ".$cluster_motifs_script;
    $cluster_motifs_cmd .= " | ".$r_path;
    $cluster_motifs_cmd .= " --slave --no-save --no-restore --no-environ";
    $cluster_motifs_cmd .= " --args \"";
    $cluster_motifs_cmd .= " infile = '".$main::outfile{pairwise_compa}."'";
    $cluster_motifs_cmd .= "; hclust.method = '".$hclust_method."'";
    $cluster_motifs_cmd .= "; description.file = '".$main::outfile{matrix_descriptions}."'";
    $cluster_motifs_cmd .= "; distance.table = '".$main::outfile{distance_table}."'";
    $cluster_motifs_cmd .= "; alignment.file = '".$main::outfile{alignment_table}."'";
    $cluster_motifs_cmd .= "; labels = ".$arg_labels;
    $cluster_motifs_cmd .= "; verbosity = ".$r_verbosity;
    $cluster_motifs_cmd .= "; score = '".$param{matrix_compa_score}."'";
    ## Pass the thresholds on the score to R for clustering
    if (defined($lth{$param{matrix_compa_score}})) {
	my $lthsp = "";
	foreach my $par (keys %lth){
	    $lthsp.= $par."-".$lth{$par}."-";
	}
	$cluster_motifs_cmd .= "; lthsp = '".$lthsp."'";
    }
    if (defined($uth{$param{matrix_compa_score}})) {
      $cluster_motifs_cmd .= "; uth = '".$uth{$param{matrix_compa_score}}."'";
    }
    $cluster_motifs_cmd .= "; out.prefix = '".$main::outfile{prefix}."'";
    if ($export_tree_format eq "newick"){
      $cluster_motifs_cmd .= "; export = '".$export_tree_format."'";
      $cluster_motifs_cmd .= "; export_newick = '".$main::outfile{consensus_phylogram_newick}."'";
    }
    #$cluster_motifs_cmd .= ";score='Ncor'";
    $cluster_motifs_cmd .= "; \"";
    #$cluster_motifs_cmd .= " >& ".$main::outfile{Rlog};
    if ($r_path) {
        &doit($cluster_motifs_cmd, 0, $die_on_error, $verbose);
    } else {
        &RSAT::message::Warning("Could not run motif clustering because the program R is not available") if ($main::verbose >= 1);
    }


    return();
}


################################################################
## Read the cluster composition, which will be required for several
## methods
##
## Results are stored in global variables of the main memory space.
sub ReadClusterComposition {

  ## Read cluster composition, and store it in a hash table where 
  ##   keys = cluster names
  ##   values = lists of nodes per cluster
  &RSAT::message::TimeWarn("Reading cluster sizes from alignment table", $outfile{alignment_table}) if ($main::verbose >= 2);
  my ($cluster_handle) = &RSAT::util::OpenInputFile($outfile{alignment_table});
  while (<$cluster_handle>) {
    next if (/^#/); ## Skip header line
    next if (/^;/); ## Skip comment lines
    next unless (/\S/); ## Skip empty lines
    my ($node, $cluster) = split ("\t");
    push @{$cluster_nodes{$cluster}}, $node;
  }
  @clusters_to_HTML = sort keys %cluster_nodes;

  &RSAT::message::Debug("Cluster names", join ";", @clusters_to_HTML) if ($main::verbose >= 3);
  
  ## Index the number of nodes per cluster
  my %clusters_info = (); 
  foreach my $cluster (@clusters_to_HTML){
    my @nodes = @{$cluster_nodes{$cluster}};
    $clusters_info{$cluster} = scalar(@nodes);
    &RSAT::message::Debug("Cluster:", $cluster, 'nb of nodes:', $clusters_info{$cluster}) if ($main::verbose >= 4);
  }
  close $cluster_handle;
}

################################################################
## Identify clusters of similar motifs using MCL ("Markov clustering",
## an algorithm partitioning a network into clusters), and build
## consensus motifs.
sub ClusterMotifs {

  ## Generate a GML graph with the matrix comparison result (can be opened with CytoScape or Yed)
  $cmd = $SCRIPTS."/convert-graph -i ".$main::outfile{pairwise_compa};
  $cmd .= " -ewidth -ecolors fire";
  $cmd .= " -layout spring_new";
  $cmd .= " -from tab -to gml -scol 1 -tcol 2 -wcol ".$main::param{score_column};
  $cmd .= " -o ".$main::outfile{matrix_network_gml};
  &RSAT::util::one_command($cmd, 1);

  ## Generate a figure of the motif comparison graph
  $cmd = $SCRIPTS."/display-graph";
  $cmd .= " -in_format gml -i ".$main::outfile{matrix_network_gml};
  $cmd .= " -ewidth";
  $cmd .= " -layout none";
  $cmd .= " -out_format png -o ".$main::outfile{compa_png};
  &RSAT::util::one_command($cmd, 1);

  ## Use MCL to partition the motif graph into clusters
  &RSAT::message::TimeWarn("Matrix clustering by running MCL on th matrix-to-matrix network.") if ($main::verbose >= 2);
  my $mcl = &RSAT::server::GetProgramPath("mcl");  
  # my $mcl_dir = $ENV{mcl_dir};
  # unless ($mcl_dir) {
  #   &RSAT::error::FatalError("Motif comparison requires to install MCL and indicate its path in the file $ENV{RSAT}/RSAT_config.props");
  # }
  # my $mcl = $mcl_dir."/mcl";
  $cmd = "grep -v '^;' ".$main::outfile{pairwise_compa}.">".$main::outfile{pairwise_compa}.".mcl";
  $cmd .= "; ".$mcl."/mcl ".$main::outfile{pairwise_compa}.".mcl";
  $cmd .= " -I 1.8 --abc -V all ";
  $cmd .= " -o ".$main::outfile{clusters_mcl};
  $cmd .= " >& /dev/null";
  $cmd .= " ; ${SCRIPTS}/convert-classes -i ".$main::outfile{clusters_mcl};
  $cmd .= " -from mcl -to tab ";
  $cmd .= " -o ".$main::outfile{clusters_tab};
  &RSAT::util::one_command($cmd, 1);

  ## Split the motif graph into clusters as defined by MCL, and
  ## compute the intra-cluster degree (k) and weighted degree (wk) of
  ## each node
  $cmd = $SCRIPTS."/graph-get-clusters -i ".$main::outfile{pairwise_compa};
  $cmd .= " -in_format tab -scol 1 -tcol 2 -wcol ".$main::param{score_column};
  $cmd .= " -return clusters ";
  $cmd .= " -clusters ".$main::outfile{clusters_tab};
  $cmd .= " -out_format tab -o ".$main::outfile{clusters_subgraph};
  &RSAT::util::one_command($cmd, 1);


  ## Identify graph components and count the intra-component degree of
  ## each node. Most connected nodes will serve as seeds for motif
  ## clustering.
  $cmd = $SCRIPTS."/graph-connex-components -v 1";
  $cmd .= " -i ".$main::outfile{clusters_subgraph};
  $cmd .= " -wcol ".$main::param{score_column};
  $cmd .= " -o ".$main::outfile{intra_cluster_degree};
  &RSAT::util::one_command($cmd, 1);

  ## Generate a GML graph with the matrix comparison result (can be opened with CytoScape or Yed)
  $cmd = $SCRIPTS."/convert-graph -i ".$main::outfile{clusters_subgraph};
  $cmd .= " -ewidth -ecolors fire";
  $cmd .= " -layout spring_new";
  $cmd .= " -from tab -to gml -scol 1 -tcol 2 -wcol 3";
  $cmd .= " -o ".$main::outfile{clusters_subgraph_gml};
  &RSAT::util::one_command($cmd, 1);

  ## Generate a figure of the motif comparison graph
  $cmd = $SCRIPTS."/display-graph";
  $cmd .= " -in_format gml -i ".$main::outfile{clusters_subgraph_gml};
  $cmd .= " -ewidth";
  $cmd .= " -layout none";
  $cmd .= " -out_format png -o ".$main::outfile{clusters_subgraph_png};
  &RSAT::util::one_command($cmd, 1);


  return();
}


################################################################
## Add attributes to JSON file, like links to pictures and other
## attributes
sub Add_attributes_to_JSON(){
  my (%matrix_info) = ();

  

  ## Create the logos with the empty columns, which will be displayed
  ## in the HTML5 file
  &Add_empty_columns_to_logos();

  #############################################
  ## Create a hash with the attribute data:
  ##	key :  name
  ##	values : attribute (link, etc)

  ## Open the file with the picture's data
  open(DATA, $main::outfile{matrix_descriptions}) || &RSAT::error::FatalError( "Cannot open DATA file", $main::outfile{matrix_descriptions});
  while(<DATA>){
      next unless (/\S/); ## Skip empty rows
      next if (/^;/); ## Skip comment rows
      next if (/^#/); ## Skip header rows
      chomp();
      @split_line = split(/\s+/, $_);
      my $matrix_label = $split_line[6];
      $matrix_info{$matrix_label}{id} = $split_line[1];
      $matrix_info{$matrix_label}{name} = $split_line[2];
      $matrix_info{$matrix_label}{width} = $alignment_info{$matrix_info{$matrix_label}{id}}{width};
      $matrix_info{$matrix_label}{consensus} = $split_line[4];
      $matrix_info{$matrix_label}{consensus_rc} = $split_line[5];
      $matrix_info{$matrix_label}{label} = $split_line[6];
      $matrix_info{$matrix_label}{logo_file} = $split_line[7];
      $matrix_info{$matrix_label}{logo_file_rc} = $split_line[8];

      ## The path of the aligned logo (in D or R) that will be displayed 
      if($aligned_logos_path{$split_line[1]}{strand} eq "D"){
	  $matrix_info{$matrix_label}{logo_aligned_file} = $aligned_logos_path{$split_line[1]}{logo};
	  $matrix_info{$matrix_label}{logo_aligned_file_rc} = $aligned_logos_path{$split_line[1]}{logo_rc};
      }else{
	  $matrix_info{$matrix_label}{logo_aligned_file} = $aligned_logos_path{$split_line[1]}{logo}; 
	  $matrix_info{$matrix_label}{logo_aligned_file_rc} = $aligned_logos_path{$split_line[1]}{logo_rc};
      }
  }
  close(DATA);


  ############################################################
  ## Read the JSON file and add the link data to each node
  &RSAT::message::TimeWarn("Linking data to nodes in JSON files") if ($main::verbose >= 2);
  foreach my $cluster(@clusters_to_HTML){
      my $line = "";
      my $children = 0;
      my ($M1, $M2, $Add_this, $Flag);
      my (@Split_line, @Parsed_JSON) = ();
      my $file = $main::outfile{prefix}."_clusters_information/".$cluster."/levels_JSON_".$cluster."_table.tab";
      chomp(@levels_JSON = `more $file | grep -v ';' | cut -f3`);
      my @merged_cons_folders = keys(%merged_consensuses_files);

      ## Open the JSON file produced by R   
      my $JSON = $main::outfile{prefix}."_trees/tree_".$cluster.".json";
      &RSAT::message::TimeWarn("JSON file", $JSON, $cluster ) if ($main::verbose >= 2);
      open(JSON, $JSON) || &RSAT::error::FatalError($JSON, "Cannot open the JSON file");
      while(<JSON>){
	  
	  chomp;
	  $Flag = 0;
	  $line = $_;
	  
	  # # If the user select the option -cons 1
	  # Add the consensus to the json file to be displayed in the tree
	  #if ($show_consensus){
	  	  if ($line =~ /(\s*\"children\":\s*\[)/){
	  
	  	      $children++;
	  	      $Add_this = "";
	  	      if ($children >= 2){
	  		  my $folder = $levels_JSON[$children-2];
	  		  if($folder ~~ @merged_cons_folders){
	  		      my $consensus_link = &RSAT::util::RelativePath($JSON, $merged_consensuses_files{$folder}{$cluster}{logo});
	  		      $consensus_link =~ s/^\.\.\///g;
	  		      $Add_this .= " \"name\" : \"$merged_consensuses_files{$folder}{$cluster}{consensus}\", ";
	  		      $Add_this .= " \"url\" : \"$consensus_link\", ";
	  		      push(@Parsed_JSON, $Add_this."\n");
	  		  }
	  	      }
	  	  }
	  # #}

	  ### Search the pattern separating the matrices names
	  if ($line =~ /\s*\"label\":\s*\"(.+)\",/){
	      $Flag = 1;
	      $Add_this = "";
	      $M1 = $1;
	      
	      ## Define te URL of the logo file, relative to the location of the json file
	      my $aligned_logo_link = &RSAT::util::RelativePath($JSON, $matrix_info{$M1}{logo_aligned_file});
	      $aligned_logo_link =~ s/^\.\.\///g;
	      my $aligned_logo_url = $aligned_logo_link.".png";

	      ## Define te URL of the logo file, relative to the location of the json file
	      my $aligned_rc_logo_link = &RSAT::util::RelativePath($JSON, $matrix_info{$M1}{logo_aligned_file_rc});
	      $aligned_rc_logo_link =~ s/^\.\.\///g;
	      my $aligned_rc_logo_url = $aligned_logo_link.".png";
	      
	      ### Create the line that will be added to JSON file
	      $Add_this .= "\n \"image\" : \"${aligned_logo_link}\"";
	      $Add_this .= ",\n \"image_rc\" : \"${aligned_rc_logo_link}\"";
	      $Add_this .= ",\n \"url\" : \"${aligned_logo_link}\"";
	      
	      foreach my $field (@supported_label_fields) {
		  if ($label_fields_to_return{$field}) {
		      $Add_this .= ",\n \"".$field."\" : \"".$matrix_info{$M1}{$field}."\"";
		  }
	      }
	      $Add_this .= ",\n \"size\" : ".$matrix_info{$M1}{width};
	      $Add_this .= ",\n \"consensus_rc\" : \"".$matrix_info{$M1}{consensus_rc}."\"";
	  }
	  push(@Parsed_JSON, $line."\n");
	  
	  ### Add the new line
	  if ($Flag) {
	      push(@Parsed_JSON, $Add_this."\n");
	  }
      }
      close(JSON);
  
      ### Create the JSON parsed FILE
      open(PARSED_JSON, ">".$main::outfile{prefix}."_trees/parsed_tree_".$cluster.".json") || &RSAT::error::FatalError("Cannot create the PARSED JSON file", $main::outfile{prefix}."_trees/parsed_tree_".$cluster.".json");
      print PARSED_JSON @Parsed_JSON;
      close(PARSED_JSON);
      unlink($JSON);
  }

#  system("rm -r ".$main::outfile{prefix}."_pairwise_compa_logos");
  my $cmd = "rm -r ".$main::outfile{prefix}."_pairwise_compa_logos";
  &doit($cmd, 0, $die_on_error, $verbose);

  return();
}


###################################
## Create the html file to 
## display the tree
sub Create_html_tree_file(){
    
    my $labels_num = 0;
    my $cluster_nb = 0;
    my $main_label = 	        
	"\tnode.append(\"a\")
	\t\t.attr(\"xlink:href\", function(d) { return d.url; })
	\t\t.append(\"text\")
	\t\t.text(function(d) { return d.children ? \"\" : d.; })
	\t\t.attr(\"dx\", function(d) { return d.children ? 0 : 20; })
	\t\t.attr(\"dy\", function(d) { return d.children ? 0 : 3; })
	\t\t.attr(\"text-anchor\", function(d) { return d.children ? \"end\" : \"start\"; });\n";

    my $new_label = 
	"\tnode.append(\"text\")
	\t\t.text(function(d){ return d.children ? \"\" : d.; })
	\t\t.attr(\"x\", function(d){ return d.children ? 0 : 20; })
	\t\t.attr(\"dy\", function(d){ return d.children ? 0 : #; })
	\t\t.attr(\"text-anchor\", function(d){ return d.children ? \"end\" : \"start\"; });";

    open(TREE, $main::outfile{logo_cladogram_html}) || &RSAT::error::FatalError($main::outfile{logo_cladogram_html}, "Cannot open HTML file");
    open(TEMP, ">".$main::outfile{temp}) || &RSAT::error::FatalError($main::outfile{temp}, "Cannot create temporary file");

    ## Select the D3 base
    if ($d3_base_format eq "link"){
	$d3_base_url = "http://d3js.org";	
    }

    while(<TREE>){
	chomp;	

	if(/\(function\(\)\{/){
	    $cluster_nb++;
	}

	if ((defined($ENV{RSA_OUTPUT_CONTEXT})) &&
	    (($ENV{RSA_OUTPUT_CONTEXT}eq "cgi") || ($ENV{RSA_OUTPUT_CONTEXT} eq "RSATWS"))) {
#	if ((defined($ENV{RSA_OUTPUT_CONTEXT})) &&
#	    ($ENV{RSA_OUTPUT_CONTEXT} eq "cgi")) {
	  $_ =~ s|$ENV{RSAT}/public_html/|$ENV{rsat_www}|g;
	}
	
	### Fix the d3 library path
	$_ =~ s|src=\"\"|src=\"${d3_base_url}/d3.v3.js\"|;

	### Set the height 
	if(/\s+height =,/){
	    my $clust = "cluster_".$cluster_nb;
	    my $mat_number = $clusters_info{$clust} + 2;
	    $_ =~ s/\s+height =,/\t\theight = 100 + 85 * $mat_number,/;
	}

	### Add the JSON file name
	if(/^\s+d3.json\(/){
	    $json_file_name = $main::outfile{prefix}."_trees/parsed_tree_cluster_".$cluster_nb.".json";
	    my @name = split(/\//, $json_file_name);
	    my $n1 = pop(@name);
	    my $n2 = pop(@name);
	    $json_file_name = $n2."/".$n1;
	    $_ =~ s/^\s+d3.json\(/d3.json\(\"$json_file_name\"/;
	}

	if(/"#"/){
	    my $clust = "cluster_".$cluster_nb;
	    $_ =~ s/\"#\"/\"#$clust\"/;
	}

	if ($_ =~ /Insert labels/){
	    my $add_this = "";
	    $labels_num = 0;
	    ################################################################
	    ## Insert in the html script the labels selected by the user
	    foreach my $field (@label_fields_to_return) {
		if ($label_fields_to_return{$field}) {
		    $labels_num++;
		    
		    ### Add the first label with the the link to the logo
		    if($labels_num == 1){
			$add_this = $main_label;
			$up = uc($field);
			#$add_this =~ s/d\.;/ \"$up: \" \+ d\.$field;/;
			$add_this =~ s/d\.;/ d\.$field;/;
		    }
		    
		    ### Add the new labels, set the distances among the labels in the tree
		    if($labels_num > 1){
			my $copy = $new_label;
			$up = uc($field);
			#$copy =~ s/d\.;/ \"$up: \" \+ d\.$field;/;
			$copy =~ s/d\.;/ d\.$field;/;
			my $y_displacement = 3 + (($labels_num -1) * 20);
			$copy =~ s/#/$y_displacement/;
			$add_this .= "\n\n".$copy."\n";
		    }
		}
	    }
	    print TEMP $add_this."\n";
	}

	# if ($show_consensus){
	#     if ($_ =~ /Insert consensus/){
	# 	print TEMP $consensus_branch_label."\n";
	#     }
	# }
	print TEMP $_."\n";
    }
    close(TEMP);
    unlink($main::outfile{logo_cladogram_html});
    my $cmd = "mv -f ".$main::outfile{temp}." ".$main::outfile{logo_cladogram_html};
    &doit($cmd, 0, $die_on_error, $verbose);
    return();
}

################################################################
## Index a table to convert to HTML
sub AddTabToConvert {
  my ($key) = @_;
  my $tab = $main::outfile{$key};
  my $html_key = $key."_html";
  my $html = $tab."_html";
  $html =~ s/\.tab.html$/\.html/;
  $html =~ s/\.txt.html$/\.html/;
  $main::outfile{$html_key} = $html; push @outfiles, $html_key; 
  push @tab_to_convert, $key; ## Add the key to the list to be converted
}

################################################################
## Convert tab-delimited files in HTML
sub ConvertTabToHTML {
  my @tab_to_convert = @_;
  &RSAT::message::TimeWarn("Converting tab-delimited to HTML files") if ($main::verbose >= 2);
  for my $key (@tab_to_convert) {
    my $tab = $main::outfile{$key};
    my $html_key = $key."_html";
    my $html = $main::outfile{$html_key};
    my $cmd = $SCRIPTS."/text-to-html ";
    $cmd .= " -i ".$tab;
    $cmd .= " -o ".$html;
#    die join "\t",  "HELLO", $key, $html_key, $html, $cmd, "\n";
    &doit($cmd, 0, $die_on_error, $verbose);
  }
}

################################################################
## Create the phylogram
sub Create_phylogram(){

  ### Save the newick tree in the variable
  chomp(my $newick_tree = `more $main::outfile{consensus_phylogram_newick}`);

#  system("cp ".$ENV{RSAT}."/public_html/lib/d3/display_d3_phylogram.html ".$main::outfile{phylogram_html});
  my $cmd = "cp ".$ENV{RSAT}."/public_html/lib/d3/display_d3_phylogram.html ".$main::outfile{phylogram_html};
  &doit($cmd, 0, $die_on_error, $verbose);
  open(TEMP, ">".$main::outfile{temp_2}) || &RSAT::error::FatalError($main::outfile{temp_2}, "Cannot create phylogram temporary file");
  open(PHYLO,$main::outfile{phylogram_html}) || &RSAT::error::FatalError($main::outfile{phylogram_html}, "Cannot found the HTML tree file");


  ## Select the D3 base
  if ($d3_base_format eq "link"){
    $d3_base_url = "http://d3js.org";	
  }

  while(<PHYLO>){
    chomp;	

#	if ((defined($ENV{RSA_OUTPUT_CONTEXT})) &&
#	    ($ENV{RSA_OUTPUT_CONTEXT} eq "cgi")) {
#	    $_ =~ s|$ENV{RSAT}/public_html/|$ENV{rsat_www}|g;
#	}

    ### Fix the d3 libraries path
    $_ =~ s|src=\"d3\"|src=\"${d3_base_url}/d3.v3.js\"|;
    $_ =~ s|src=\"newick\"|src=\"$d3_newick_base\"|;
    $_ =~ s|src=\"phylog\"|src=\"$d3_phylogram_base\"|;

    ### Paste the newick tree in the HTML code
    $_ =~ s/Paste_newick_here/$newick_tree/;

    print TEMP $_."\n";
  }
  close(TEMP);
  close(PHYLO);
  unlink($main::outfile{phylogram_html});
#  system("mv ".$main::outfile{temp_2}." ".$main::outfile{phylogram_html});
  $cmd = "mv ".$main::outfile{temp_2}." ".$main::outfile{phylogram_html};
  &doit($cmd, 0, $die_on_error, $verbose);
  return();
}


################################################################
## Add the empty columns to the logos that will be displayed, in order
## to get multiple alignment on the logo cladogram.
sub Add_empty_columns_to_logos(){

  ## Open the alignment_table daata
  open(ALIGNMENT_TABLE, $main::outfile{alignment_table}) || &RSAT::error::FatalError( "Cannot open ALIGNMENT_TABLE file", $main::outfile{alignment_table});

  while(<ALIGNMENT_TABLE>){
    next unless (/\S/); ## Skip empty rows
    next if (/^;/); ## Skip comment rows
    next if (/^#/); ## Skip header rows
    chomp();

    
    my @split_line = split(/\s+/, $_);
    $split_line[0] =~ s/\s+//g;

    ## Store the ID, strand and offset of each aligned motif
    $alignment_info{$split_line[0]}{strand} =  $split_line[2];
    $alignment_info{$split_line[0]}{offset_left} =  $split_line[3];
    $alignment_info{$split_line[0]}{offset_right} =  $split_line[4];
    $alignment_info{$split_line[0]}{width} = $split_line[5];

  }
  close(ALIGNMENT_TABLE);

  ## Call the program 'convert-matrix' to add the empty columns
  ## to the aligned logos and retrieve the logos in RC
  foreach my $id (keys %alignment_info){

    ## Reset the variables
    my $offset_left = $alignment_info{$id}{offset_left};
    my $offset_right = $alignment_info{$id}{offset_right};
    my $file_name = $alignment_info{$id}{file_name};
    my $strand = $alignment_info{$id}{strand};

    ## Run the convert-matrix command to add the empty columns to the logos
    if($strand eq "D"){

      ## Add the empty columns
#      system("convert-matrix -i ".$file_name." -from tf -to tf -logo_format png -return counts,consensus,parameters,logo -insert_col_left ".$offset_left." -insert_col_right ".$offset_right." -o ".$main::outfile{prefix}."_aligned_logos/aligned_logos");
      my $cmd = "convert-matrix -i ".$file_name;
      $cmd .= " -from tf -to tf -logo_format png -return counts,consensus,parameters,logo";
      $cmd .= " -insert_col_left ".$offset_left;
      $cmd .= " -insert_col_right ".$offset_right;
      $cmd .= " -o ".$main::outfile{prefix}."_aligned_logos/aligned_logos";
      &doit($cmd, 0, $die_on_error, $verbose);

    }else{

      ## First convert the matrix to reverse complement
#      system("convert-matrix -i ".$file_name." -from tf -to tf -rc -return counts,consensus -o ".$main::outfile{prefix}."_aligned_logos/temp.tf"); 
      my $temp_mat = $main::outfile{prefix}."_aligned_logos/temp.tf";
      $cmd = "convert-matrix -i ".$file_name;
      $cmd .= " -from tf -to tf -rc -return counts,consensus";
      $cmd .= " -o ".$temp_mat;
      &doit($cmd, 0, $die_on_error, $verbose);
      
      ## Then add the gaps
#      system("convert-matrix -i ".$temp_mat." -from tf -to tf -logo_format png -return counts,consensus,parameters,logo -insert_col_left ".$offset_left." -insert_col_right ".$offset_right." -o ".$main::outfile{prefix}."_aligned_logos/aligned_logos");
      $cmd = "convert-matrix -i ".$temp_mat;
      $cmd .= " -from tf -to tf -logo_format png -return counts,consensus,parameters,logo";
      $cmd .= " -insert_col_left ".$offset_left;
      $cmd .= " -insert_col_right ".$offset_right;
      $cmd .= " -o ".$main::outfile{prefix}."_aligned_logos/aligned_logos";
      &doit($cmd, 0, $die_on_error, $verbose);

      unlink($temp_mat);
    }
    unlink($file_name);

    ## Store the path of the logos
    $aligned_logos_path{$id}{strand} = $strand;
    $aligned_logos_path{$id}{logo} = $main::outfile{prefix}."_aligned_logos/".$id."_logo.png";
    $aligned_logos_path{$id}{logo_rc} = $main::outfile{prefix}."_aligned_logos/".$id."_logo_rc.png";
  }    
}


#################################################
## Print the matrices in separated files. They 
## will be used to add empty columns with 
## convert-matrices and to create the merged
## consensus
##
## JvH: we should come back to this procedure, and revise it to avoid
## system calls, if possible. Matrix IDs could be collected at the
## beginning of the script by properly reading all matrices, and
## storing them in a vector.
sub Create_single_matrix_files(){
  &RSAT::message::TimeWarn("Creating single matrix files") if ($main::verbose >= 2);
  my @ids = ();
  my %matrix_files = ();

  ## Split the matrix file into single matrices file, 
  ## in order to process individually each matrix
#  system("convert-matrix -i ".$infile{matrices}." -from ".$main::matrix_format." -to tf -split -o ".$main::outfile{prefix}."_single_matrices-");
  my $cmd = "convert-matrix -i ".$infile{matrices}." -from ".$main::matrix_format." -to tf -split -o ".$main::outfile{prefix}."_single_matrices";
  &doit($cmd, 0, $die_on_error, $verbose);
  
  ## Get the ids of the matrices
  chomp(@ids = `grep -v '#' $main::outfile{matrix_descriptions} | cut -f2`);

  ## Get the file name of each matrix and stores it in the hash
  foreach my $id (@ids){
    chomp($matrix_files{$id}{file_name} = `ls $main::outfile{prefix}* | grep $id | grep '.tf'`);
  }

  return(%matrix_files);
}


################################################################
## Merge the matrices at each level of the hclust tree, and produce a
## branch-wise matrix + logo + consensus.
sub Merge_matrices(){

  &RSAT::message::TimeWarn("Merging matrices for", scalar(@clusters_to_HTML), "clusters") if ($main::verbose >= 2);
  ## Get the cluster sizes

  ## JvH: I suppress this becaus ewe already read this list earlier (I mde it a global variabl)
#  chomp(my @clusters_to_HTML = `more $main::outfile{alignment_table} | grep -v '#' | cut -f2 | uniq`);
  foreach my $cluster (@clusters_to_HTML){
    my @nodes = @{$cluster_nodes{$cluster}};
    &RSAT::message::Debug("Merging matrices for cluster", $cluster, "Nodes", join (";", @nodes)) if ($main::verbose >= 3);

    $clusters_info{$cluster} = scalar(@nodes); 
    $clusters_info{$cluster} =~ s/\s+//g;
    
    
    ## Folders with the aligned matrices
    my @folder_merged_matrices = ();
    chomp(@folder_merged_matrices = `ls $main::outfile{prefix}_clusters_information/$cluster/merged_consensuses`);
    
    foreach my $folder (@folder_merged_matrices){
      
      ## Get the names of the aligned matrices files
      chomp(my @file_names = `ls $main::outfile{prefix}_clusters_information/$cluster/merged_consensuses/$folder`);
      foreach my $file(@file_names){
	$file = $main::outfile{prefix}."_clusters_information/".$cluster."/merged_consensuses/".$folder."/".$file
      }
      my $files = join(" ", @file_names);
      
      ## Merge the matrices and create a single matrix
      my $cat_file = $main::outfile{prefix}."_clusters_information/".$cluster."/merged_consensuses/".$folder."/".$folder."_cat.tf";
      my $merged_mat_file = $main::outfile{prefix}."_clusters_information/".$cluster."/merged_consensuses/".$folder."/".$folder."_matrices.tf";

#      system("cat ".$files." >".$cat_file);
#      system("merge-matrices -i ".$cat_file." -in_format tf -out_format tf -o ".$merged_mat_file);
      my $cmd = "cat ".$files." >".$cat_file;
      $cmd .= "; merge-matrices -i ".$cat_file;
      $cmd .= " -in_format tf -out_format tf";
      $cmd .= " -o ".$merged_mat_file;
      &doit($cmd, 0, $die_on_error, $verbose);
      
      ## Delete files
      unlink($cat_file);
      foreach my $file(@file_names){
	unlink($file);
      }
      
     ## Create branch-wise logos
#      system("convert-matrix -i ".$merged_mat_file." -from tf -to tf -logo_format png -return counts,consensus,parameters,logo -o ".$main::outfile{prefix}."_clusters_information/".$cluster."/merged_consensuses/".$folder."/".$folder.".png");
      $cmd = "convert-matrix -i ".$merged_mat_file;
      $cmd .= " -from tf -to tf -logo_format png -return counts,consensus,parameters,logo";
      ## JvH: Jaime, why do you include a .png exension in the output prefix of convert-matrix ? Since it is in rransfac format, the extension should be .tf, no ?
      $cmd .= " -o ".$main::outfile{prefix}."_clusters_information/".$cluster."/merged_consensuses/".$folder."/".$folder.".png"; 
      &doit($cmd, 0, $die_on_error, $verbose);
      

      $merged_consensuses_files{$folder}{$cluster}{logo} = $main::outfile{prefix}."_clusters_information/".$cluster."/merged_consensuses/".$folder."/merged_logo.png";
      $merged_consensuses_files{$folder}{$cluster}{logo_RC} = $main::outfile{prefix}."_clusters_information/".$cluster."/merged_consensuses/".$folder."/merged_logo_rc.png";
      
      $merged_consensuses_files{$folder}{$cluster}{consensus} = $main::outfile{prefix}."_clusters_information/".$cluster."/merged_consensuses/".$folder."/merged_logo_rc.png";
      
      my $mat_file = $main::outfile{prefix}."_clusters_information/".$cluster."/merged_consensuses/".$folder."/".$folder."_matrices.tf";
      my $cons = `more $mat_file | grep '^DE'`;
      chomp($cons);
      $cons =~ s/^DE\s+//;
      $merged_consensuses_files{$folder}{$cluster}{consensus} = $cons;
    }
  }

}


###################################
## This is the body of d3 script,
## each time a new cluster is added 
## to the file the next code is paste 
## and modified
## 	        .separation(function(a, b) { return (a.parent == b.parent ? 1: 1); });
sub paste_new_cluster_to_HTML(){
    my $d3_tree_base = "(function(){
                var width = 3500,
		height =,
		m = [40, 240, 40, 240],
		w = width ,//-m[0] -m[0],
		h = height //-m[0] -m[2];
	;


	var cluster = d3.layout.cluster()
		.size([height, width - 2500])
	        .separation(function(a, b) { return (a.parent == b.parent ? 1: 1); });


	var svg = d3.select(\"#\").append(\"svg\")
		.attr(\"width\", width)
		.attr(\"height\", height)
		.append(\"g\")
		.attr(\"class\",\"drawarea\")
		.attr(\"transform\", \"translate(40,0)\");
		

	d3.json(, function(json) {
				
	var nodes = cluster.nodes(json);

	var link = svg.selectAll(\"path.link\")
		.data(cluster.links(nodes))
		.enter().append(\"path\")
		.attr(\"class\", \"link\")
		.attr(\"d\", elbow);


	var node = svg.selectAll(\"g.node\")
		.data(nodes)
		.enter().append(\"g\")
		.attr(\"class\", \"node\")
		.attr(\"transform\", function(d) { return \"translate(\" + d.y + \",\" + d.x + \")\"; });

	
	node.append(\"circle\")
		.attr(\"r\", 6.5);

	/*Insert labels*/
	
	
	/*Insert consensus*/
		node.append(\"a\")
		.attr(\"xlink:href\", function(d) { return d.children ? d.url : \"\"; })
		.append(\"text\")
		.text(function(d) { return d.children ? d.name : \"\"; })
		.attr(\"dx\", function(d) { return d.children ? -20 : 0; })
		.attr(\"dy\", function(d) { return d.children ? -6 : 0; })
		.attr(\"fill\", function(d) { return d.children ? \"blue\" : \"\"; })
		.attr(\"text-anchor\", function(d) { return d.children ? \"end\" : \"start\"; });

		
	node.append(\"svg:image\")
		.attr(\"xlink:href\", function(d) { return d.image; })  
		.attr(\"width\", function(d) { return (d.size + 2)  * 24; })
		.attr(\"height\", 80)
		.attr(\"x\", 325)    
		.attr(\"y\", -33) 
		.attr(\"preserveAspectRatio\", \"none\");	
		
	node.append(\"svg:image\")
		.attr(\"xlink:href\", function(d) { return d.image_rc; })  
		.attr(\"width\", function(d) { return (d.size + 2)  * 24; })
		.attr(\"height\", 80)
  		.attr(\"x\", function(d) { return (d.size + 2)  * 24 + 375; })  
		.attr(\"y\", -33) 
		.attr(\"preserveAspectRatio\", \"none\");
/*
	d3.select(\"svg\")
        .call(d3.behavior.zoom()
        .scaleExtent([0.5, 5])
        .on(\"zoom\", zoom));
		.attr(\"x\", function(d) { (d.size + 2) * 24 + 450;})  
*/
			
	});
	

	function elbow(d, i) {
		
		return \"M\" + d.source.y + \",\" + d.source.x
		+ \"V\" + d.target.x + \"H\" + d.target.y;
	}
})();";
    return($d3_tree_base);
}


########################################
## Before modify the width and heigth
## of the alignments, creates the <div>
## in HTML script to visualize the clusters
sub Add_div_to_HTML(){
    my $line = "";
    my $print = "";
    my $temp = paste_new_cluster_to_HTML();

#    system("cp ".$ENV{RSAT}."/public_html/lib/d3/display_d3_tree.html ".$main::outfile{logo_cladogram_html});
    my $cmd = "cp ".$ENV{RSAT}."/public_html/lib/d3/display_d3_tree.html ".$main::outfile{logo_cladogram_html};
    &doit($cmd, 0, $die_on_error, $verbose);

    open(TEMP, ">".$main::outfile{temp}) || &RSAT::error::FatalError($main::outfile{temp}, "Cannot create temporary file");
    open(TREE,$main::outfile{logo_cladogram_html}) || &RSAT::error::FatalError($main::outfile{logo_cladogram_html}, "Cannot found the HTML tree file");

    while(<TREE>){
	chomp;
	$line = $_;
	## Add one <div> for each cluster
	if(/^\s*\/\/Divisions/){
	    $line = "";
	    foreach my $cluster (sort keys %clusters_info){
		$line .= "<div id=\"".$cluster."\"><h2>".$cluster."</h2><\/div>\n";
	    }
	}

	## 
	if(/^\s*\/\/Body/){
	    $line = "";
	    foreach my $cluster (sort keys %clusters_info){
		$line .= $temp."\n";
	    }
	}

	$print .= $line."\n";
    }
    print TEMP $print;
    close(TREE);
    close(TEMP);
    unlink($main::outfile{logo_cladogram_html});
#    system("mv ".$main::outfile{temp}." ".$main::outfile{logo_cladogram_html});
    $cmd = "mv ".$main::outfile{temp}." ".$main::outfile{logo_cladogram_html};
    &doit($cmd, 0, $die_on_error, $verbose);    
}

__END__
