#!/usr/bin/perl -w
############################################################
#
# $Id: template,v 1.48 2013/10/03 17:24:24 jvanheld Exp $
#
############################################################

## use strict;

=pod

=head1 NAME

template

=head1 VERSION

$program_version

=head1 DESCRIPTION

Template for writing new perl scripts.

=head1 AUTHORS

Jaime Castro <castro\@univ-amu.fr>

Samuel Collombet <samuel.collombet\@ens.fr>

Alejandra Medina-Rivera <amedina\@liigh.unam.mx>

Morgane Thomas-Chollier <mthomas\@biologie.ens.fr>

Jacques van Helden <Jacques.van-Helden\@univ-amu.fr>

=head1 CATEGORY

=over

=item motif anlysis

=back

=head1 USAGE

matrix-enrichment [-i inputfile] [-o outputfile] [-v #] [...]

=head1 INPUT FORMAT

=over

=item motifs

See I<convert-matrix> for input format descriptions.

=item Sequences

See I<convert-seq> for input format descriptions.

=item Background model

The background model can be given as an input or can be calculated from input sequences.

For background model file formats see I<convert-background-model>

=back

=head1 OUTPUTS

=over

=item maxNWD heatmap

Maximal values from the NWD curves. NWD curves represent the difference between an empirical  and the theoretical score distributions.

=item Binomial occurence significance curves

A bonimial occurence significance curve represents the over-representation of predicted binding sites of all posible p-values of a given motif and sequence set.

=back

=head1 SEE ALSO

matrix-quality

matrix-distrib

matrix-scan

=head1 WISH LIST

=over

=item B<-bg_input>

Calculate background model from input sequence sets.

All given sequence sets will be merged to calculate this background model to keep distributions comparable.

=item B<wish 2>

=back

=cut

BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}
require "RSA.lib";
require "quality_enrichment.lib.pl";
use RSAT::MatrixReader;
use Data::Dumper;
use List::MoreUtils qw(uniq);
require "RSA2.cgi.lib";
use RSAT::util;
use RSAT::matrix;
use RSAT::SeqUtil;
use Data::Dumper;
use File::Basename;
use File::Path;
require "RSA.disco.lib";
require "footprint.lib.pl";


################################################################
## Main package
package main;
{
  ################################################################
  ## Initialise parameters
  our $start_time = &RSAT::util::StartScript();
  our $program_version = do { my @r = (q$Revision: 1.48 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  #    $program_version = "0.00";

  our %infile = ();
  our %outfile = ();

  our $verbose = 0;
  our $in = STDIN;
  our $out = STDOUT;

  ## 
  %main::supported_matrix_formats=&RSAT::MatrixReader::ListMatrixFormats();
  $main::supported_matrix_formats=join (",", keys(%supported_matrix_formats));

  ## Formats
  $seq_format = "fasta";
  $matrix_format="transfac";

  ## R plots
  my $r_plots=1;
  $rplot_option="";
  if ($r_plots==1) {
      $main::rplot_option=" -r_plot -clean_r_legend ";
  }
  ## bginput
  $main::bg_input="";
  #my @bg_input_options= ("by_seq","all");
  my @bg_input_options= ("all");

  ## Heatmap color palette and classes
  local $heatmap_color_palette = "YlOrRd";
  local $heatmap_color_classes = 9;
  
  ## Supported heatmap colors (taken from http://colorbrewer2.org/)
  local %supported_heatmap_color_palette = ();
  local @supported_heatmap_color_palette = qw (YlOrRd YlOrBr YlGnBu YlGn PuRd PuBuGn PuBu OrRd GnBu BuPu BuGn Reds Purples Oranges Greys Greens Blues);
  local %supported_heatmap_color_palette = ();
  foreach my $palette (@supported_heatmap_color_palette) {
      $supported_heatmap_color_palette{$palette} = 1;
  }
  local $supported_heatmap_color_palette = join ",", @supported_heatmap_color_palette;

  $main::infile{bg_file}="";
  my %bg_files= (); ## Store processed bg_files

  ## Markov order
  $main::markov = 1;
  $bg_model = new RSAT::MarkovModel();

  ## Tasks
  local @supported_tasks = ("all", ## Run all other tasks
			    "convert_matrix", ## Export the matrix and sites in various formats (tab, info, logos)
			    "export_matrix", ## Export matrix
			    "permute", ## Scan sequences with permuted matrices
			    "theor", ## Calculate the theoretical distribution
			    "scan", ## Scan sequences with matrix-scan
			    "compare", ## Compare distributions between the various input files
			    "graphs", ## Draw the graphs with distrib comparisons, pass to R
			    "synthesis", ## Generate a HTML file with a synthetic report + links to all result files
			    "clean", ## Clean temporary files
      );
  $supported_tasks = join (",", @supported_tasks);
  local %supported_task = ();
  foreach my $task (@supported_tasks) {
      $supported_task{$task} = 1;
  }

  $noicon=1;
  %task = (); ## Difine hash to contain user given tasks


  ################################################################
  ## The C command matrix-scan-quick is MUCH faster than
  ## matrix-scan. If it is supported on this machine, use it !
  local $quick_scan_cmd = &RSAT::server::GetProgramPath("matrix-scan-quick");
  &RSAT::message::Info("matrix-scan-quick command", $quick_scan_cmd) if ($main::verbose >= 3);
  local $quick = 0;
  if ($quick_scan_cmd) {
      $quick = 1;
  } else {
      &RSAT::message::Warning("Cannot find the command matrix-scan-quick");
  }
  
  ## Format for the graphs
  @main::image_formats = ("png");
  $main::image_formats = "";
  
  ## Format of the sequence logos
  @logo_formats = ("png");
  $logo_formats = "";

  %to_delete=();

  $main::pseudo_counts=1;
  $decimals = 1;
  $class_interval = 1/(10**$decimals);

  $distrib_score_col = 5; ## Column containing the dCDF (decreasing cumulative density function) in the output of the command matrix-distrib-quick -distrib

  $main::nwd_seqs=0;

  ## All matrix and sequence names
  local @all_matrix_names = ();
  local @all_sequence_names = ();
  
  ################################################################
  ## Read argument values
  &ReadArguments();
  
  
  ################################################################
  ## Check argument values

  ##
  @main::image_formats = uniq(@main::image_formats);
  ## If no tasks has been specified, execute them all
  if (($task{all}) || (scalar(keys(%task))==0)) {
      %task = %supported_task;
      $task{all} = 0;
  }
  
  # foreach (sort keys %raks) {
  #   print "$_ : $myhash{$_}\n";
  # }

  # die "BOOM";
  ## Matrix file is mandatory
  &RSAT::error::FatalError("You must define a matrix file, with the option -matrix")
      unless ($main::infile{motifs});
  
  ## Output prefix is mandatory
  &RSAT::error::FatalError("You must define a prefix for the output files with the option -o")
      unless ($main::prefix{main});
  
  
  ## Output prefix cannot end with a "/" (must be a file prefix, not a directory)
  &RSAT::error::FatalError("Output prefix cannot end with a '/' (must be a file name, not a directory)") if  ($main::prefix{main} =~ /\/$/);
  
  ################################################################
  ## Open main log 
  $outfile{log} = $main::prefix{main}."_log.txt";

  
  ## Create main output directory if required
  ($dir{output}, $short_prefix) = &RSAT::util::SplitFileName($main::prefix{main});
  &RSAT::message::TimeWarn("Checking main output directory", $dir{output}) if ($main::verbose >= 2);
  &RSAT::util::CheckOutDir($dir{output});

  ################################################################
  ## Ensure the access to the java libraries, which are required to
  ## display the results (heatmap, dynamic tables).
  ##
  ## If $include_js_lib is set to 1, the javascript libraries are
  ## included in the output directory, in order to avoid problems with
  ## the links. This solution enables to move the result
  ## (e.g. download a self-contained archive), but costs 500kb of disk
  ## space for each result, we should evaluate alternative solutions.
  ## 
  ## the alternative is to point to the libraries on the RSAT server,
  ## but we faced problems on some servers, due to the the path
  ## public_html/lib, which inludes a soft link (lib ->
  ## ../perl-scripts/lib).

  local $include_js_lib = 1; 
  
  ## Define the base directories for the javascripts, which are
  ## required to display logo trees, animations and dynamic tables
  if ($include_js_lib) {
      
      &RSAT::message::Debug("Copying javascript libraries to output directory") if ($main::verbose >= 3);
      my $js_sync_cmd = "rsync -ruptl";
      $js_sync_cmd .= " ".$ENV{RSAT}."/perl-scripts/lib/js";
      $js_sync_cmd .= " ".$dir{output};
      &doit($js_sync_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);
      $js_base = $dir{output}."/js";
      &RSAT::message::Info("Javascript libraries copied to output directory", $js_base) if ($main::verbose >= 3);
      
  }  elsif (defined($ENV{RSA_OUTPUT_CONTEXT}) && ($ENV{RSA_OUTPUT_CONTEXT} eq "cgi")) {
      $js_base = $ENV{rsat_www}."/lib/js";
  } else {
      
      ## NOTE from JvH to Jaime: This solution does not allow to see the
      ## result on another computer than the RSAT server on which
      ## matrix-clustering ran.
      $js_base = $ENV{RSAT}."/perl-scripts/lib/js";
  }
  $d3_base = &RSAT::util::RelativePath($dir{output}, $js_base."/d3.v3.min.js");
  $d3_array_base = &RSAT::util::RelativePath($dir{output}, $js_base."/d3-array.v0.6.min.js");
  $c3_base = &RSAT::util::RelativePath($dir{output}, $js_base."/c3-0.4.10/c3.js");
  $c3_css_base = &RSAT::util::RelativePath($dir{output}, $js_base."/c3-0.4.10/c3.css");
  $jquery_base = &RSAT::util::RelativePath($dir{output}, $js_base."/DataTables-1.10.4/media/js/jquery.js");
  $datatable_base = &RSAT::util::RelativePath($dir{output}, $js_base."/DataTables-1.10.4/media/js/jquery.dataTables.min.js");
  $datatable_css_base = &RSAT::util::RelativePath($dir{output}, $js_base."/DataTables-1.10.4/media/css/jquery.dataTables.min.css");


  ## ################
  ## Process sequence file if there was a top or skip selection
  ## store it instead of the original seq_type file respectively 

  if ($top_sequences_flag || $skip_sequences_flag) {
      
      foreach $seq_t ( @main::seq_types ) {
	  
	 $main::seqfile{$seq_t} = &ProcessTopSeq( $seq_t, $main::seqfile{$seq_t} ) ;
	  
      }
  }
  
  ## Logo formats : png is default
  unless (scalar(@logo_formats)>0) {
      push (@logo_formats,"png");
  }
  $logo_formats = join ",", @logo_formats; ## For the logo
    &RSAT::message::Info("Image formats for logos: ".join (",", sort(@logo_formats))) if ($main::verbose >= 2);
  
  ## ##############
  ## Process background model options

  &RSAT::error::FatalError("Specify one Background model source") unless ($main::infile{bg_file} || $main::bg_input );
  &RSAT::error::FatalError("Please select only one background model source") if ($main::infile{bg_file} && $main::bg_input );


  ## Create/convert the specified background models
  
  ## Creat directory to store processed background model files
  my $bg_dir = $dir{output}."/"."bg_files";
  &RSAT::util::CheckOutDir($bg_dir);

  
  ################
  ## Note Ale to Jaime: I realized this option is not a consistent one. You need to have the same backgorund model to make things
  ## comparable, so only the all option will be avialble 
  ## background by sequence
  # if ($main::bg_input eq "by_seq") {
  #     &RSAT::message::TimeWarn("Calculating background model by sequence set", $bg_dir) if ($main::verbose >= 3);

  #     foreach $seq (@main::seq_types){

  # 	  my $new_bg_file=$bg_dir."/" . $seq . "_bg_" . $main::markov . "_2str.inclusive";
	  
  # 	  my $bg_command=" $SCRIPTS/oligo-analysis -v 1 -quick -2str " ;
  # 	  $bg_command.=" -i ".$main::seqfile{$seq}." -l ".$main::markov." -noov ";
  # 	  $bg_command.=" | convert-background-model -from oligos -to inclusive ";
  # 	  $bg_command.=" -o ".$new_bg_file;

  # 	  $bg_files{$seq} = $new_bg_file ;
	  
  # 	  &doit($bg_command, $dry, $die_on_error, $verbose, 0, $job_prefix);
  # 	  &RSAT::message::TimeWarn("Background for sequence set", $seq, $new_bg_file) if ($main::verbose >= 3);

  # 	  $outfile{"bg_file_directory"}=$bg_dir;
	  
  #     }
      
  # } els
      
      if ($main::bg_input eq "all") {
      
      ## Create one bg model from all input sequences
      my $all_sequences_file=$bg_dir."/all_sequences.fas";
      my $new_bg_file=$bg_dir."/all_sequences_bg_".$main::markov."_2str.inclusive";

      my @seq_files=values( %main::seqfile);
      
      my $bg_command = " cat ".join( " " , @seq_files ) . " > ". $all_sequences_file ." ; "; 
      $bg_command.= "$SCRIPTS/oligo-analysis -v 1 -quick -2str -i  " .$all_sequences_file;
      $bg_command.= " -l ".$main::markov." -noov ";
      $bg_command.= " | convert-background-model -from oligos -to inclusive ";
      $bg_command.= " -o ".$new_bg_file;
      $main::outfile{"bg_file_inclusive"}=$new_bg_file;
      
      &RSAT::message::Debug("Calculating background model using all sequences"."\n".$bg_command,  "bg_file_inclusive".$new_bg_file  ) if ($main::verbose >= 0);

      &doit($bg_command, $dry, $die_on_error, $verbose, 0, $job_prefix);
      
      push @files_to_index, "bg_file_inclusive";
      &RSAT::message::Debug("Adding file to index ",  "bg_file_inclusive",   $outfile{bg_file_inclusive}) if ($main::verbose >= 3 );
      

      ## Define same bg file for all sequences
      foreach $seq ( @main::seq_types ) {
	  $bg_files{$seq} = $new_bg_file ; 
      }

  }elsif ($main::infile{bg_file}) {
  
      
      ## Define name of the converted bg file
      #$outfile{bg_file_inclusive} = $dir{output};
      $outfile{bg_file_inclusive} = $bg_dir ."/";
      $outfile{bg_file_inclusive} .= &ShortFileName($infile{bg_file});
      $outfile{bg_file_inclusive} =~ s|\.\w$||;
      $outfile{bg_file_inclusive} .= "_inclusive.tab";
      
      
      ## Convert BG file in inclusive format for matrix-scan-quick
      my $bg_convert_cmd = $SCRIPTS."/convert-background-model";
      $bg_convert_cmd .= " -i ".$infile{bg_file};
      $bg_convert_cmd .= " -from ".$bg_format;
      $bg_convert_cmd .= " -to inclusive";
      $bg_convert_cmd .= " -o ".$outfile{bg_file_inclusive};
      &doit($bg_convert_cmd, $dry, $die_on_error, $verbose, $batch, $job_prefix);
      &RSAT::message::TimeWarn("Converted background model to inclusive format", $outfile{bg_file_inclusive}) if ($main::verbose >= 3 );

      ## Define same bg file for all sequences
      foreach $seq ( @main::seq_types ) {
	  
	  $bg_files{$seq} = $outfile{bg_file_inclusive} ;
 
      }
      
      push @files_to_index, "bg_file_inclusive";
      &RSAT::message::Debug("Adding file to index ",  "bg_file_inclusive",   $outfile{bg_file_inclusive}) if ($main::verbose >= 3 );
      

  }


  
  ## Read background model to use for theoretical distribution
  if ($main::outfile{bg_file_inclusive}){
      $bg_model->load_from_file($main::outfile{bg_file_inclusive},"inclusive");
  }
  if (defined($main::bg_pseudo)) {
      $bg_model->force_attribute("bg_pseudo" => $bg_pseudo);
  }
  

  ## Export matrix realted files and matrix information into one table
  
  ## ##############################################################
  ## Evaluate the enrichment of each matrix of the input file
  my $m = 0; ## Matrix counter
  my %matrix_index = (); ## %matrix_index indexes matrix numbers (value) as a function of matrix names (keys)

  
  if ($task{convert_matrix}){
      ## If it is required, convert the input motif file in Transfac format
      
      if( $matrix_format eq "transfac" || $matrix_format eq "tf") {
	  
	  $main::outfile{motifs_transfac} = $main::infile{motifs} ;
	  
	  
	  
      } else {
	  $main::outfile{motifs_transfac}=$dir{output}."/".$short_prefix."_transfac_motifs.tf";
	  
	  &RSAT::message::TimeWarn( "Converting input matrices to Tansfac format, outputfile: ",  $main::outfile{motifs_transfac} ) if ($main::verbose >= 3);
	  $convert_matrix_cmd = $SCRIPTS."/convert-matrix ";
	  $convert_matrix_cmd .= " -i ".$main::infile{motifs};
	  $convert_matrix_cmd .= " -from ".$matrix_format;
	  $convert_matrix_cmd .= " -to transfac ";
	  $convert_matrix_cmd .= " -o ".$main::outfile{motifs_transfac};
	  
	  &doit($convert_matrix_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);
	  $to_delete{'motifs_transfac'} = $main::outfile{motifs_transfac};
	  $matrix_format="trasnfac";
	  $main::infile{motifs}=$main::outfile{motifs_transfac};

      }
      
      

  }
  ####
  ## Select top motifs or skip motifs

  if ($top_matrix_flag || $skip_matrix_flag){
      
      &RSAT::message::TimeWarn("Selecting top and skippped motifs Tansfac format") if ($main::verbose >= 3);
      $main::outfile{motifs_transfac_top_skip}=$dir{output}."/".$short_prefix."_transfac_motifs_topskip_selected_motifs.tf";
      
      $convert_matrix_cmd = $SCRIPTS."/convert-matrix ";
      $convert_matrix_cmd .= " -i ". $main::outfile{motifs_transfac};
      $convert_matrix_cmd .= " -from transfac ";
      $convert_matrix_cmd .= " -to transfac "; 

      $convert_matrix_cmd .= " -top ".$top_matrices     if ($top_matrix_flag==1) ;
      $convert_matrix_cmd .= " -skip ".$skip_matrices   if ($skip_matrix_flag==1) ;
      
      $convert_matrix_cmd .= " -o ".$main::outfile{motifs_transfac_top_skip} ;
      
      $main::infile{motifs} = $main::outfile{motifs_transfac_top_skip} ;
      
      &doit($convert_matrix_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);
      $to_delete{'motifs_transfac'} = $main::outfile{motifs_transfac_top_skip} ;

  }  
  
  ## Read matrix file
  local $matrix_file = $main::infile{motifs};

  &RSAT::message::TimeWarn("Reading matrix", $matrix_file) if ($main::verbose >= 2);

  my @matrices = &RSAT::MatrixReader::readFromFile($matrix_file, $matrix_format);

  ## Open the ID to Motif name reference table
  my ($ID_to_Motif_name_file) = &RSAT::util::OpenOutputFile($main::outfile{motif_ID_to_name});

  ## Analyse one matrix at a time
  foreach my $matrix (@matrices) {
      $m++;
      
     
      ################
      #Initialize storing arrays
      local @files_to_index=();
      local @partial_matrix_files=();
      local @th_distrib_files=();
      local @perm_distrib_files=();
      local @distrib_files=(); ## check something named  @main::distrib_files
      local @temporary_distrib_files=();
      local @local_seq_types= @main::seq_types; # seq_types are all the sequences where the proceadure is performed, for each matrix the matrix_sites are added.
      ## Redefine the matrix name (in case it would have been modified above)
      $matrix->set_attribute("pseudo", $pseudo_counts);
      $matrix->set_attribute("decimals", $decimals);
      $matrix->set_attribute("file", $matrix_file);
      $matrix->force_attribute("matrix.nb", $m);

      print $main::outfile{bg_file_inclusive}."\n";
      $matrix->setMarkovModel($bg_model) if ($main::outfile{bg_file_inclusive}) ;
      
      my $m_width = $matrix->get_attribute("ncol");

      ################################################################
      ## Define matrix name.
      ##
      ## We need a name that is
      ## - unambiguous (two matrices cannot have the same name)
      ## - without system-problematic characters (/, $).
      ##

      local $matrix_id = $matrix->get_attribute("id");
      local $matrix_name = $matrix->get_attribute("name") ;
      
      $matrix_name=~s/\+/plus/;
      $matrix_id=~s/\+/plus/;

      unless ($matrix_name =~ /\S/) {
	  $matrix_name = "matrix_".$m;
      }

      unless ($matrix_id =~ /\S/) {
	  $matrix_id = "matrix_".$m;
      }
      
      $matrix_name =~ s/\//_/; ## Avoid slashes in matrix names because this would make problem for subfolder definitions
      $matrix_name =~ s/\$/_/; ## Avoid $ in matrix names because the following word would be interpreted as a variable in Unix system
      
      $matrix_id =~ s/\//_/; ## Avoid slashes in matrix names because this would make problem for subfolder definitions
      $matrix_id =~ s/\$/_/; ## Avoid $ in matrix names because the following word would be interpreted as a variable in Unix system
      

      
      ## Check if another matrix with the same name has already been indexed)
      if (defined($matrix_index{$matrix_name})) {
	  &RSAT::message::Warning("Matrix file contains sevral matrices with name",
				  $matrix_name, ". Adding suffix m_".$m);
	  $matrix_name .= "_m".$m;
      } else {
	  $matrix_index{$matrix_name} = $m;

      }

      push(@all_matrix_names, $matrix_name);


      $matrix->force_attribute("name", $matrix_name);
      &RSAT::message::TimeWarn("Analyzing matrix", $m, $matrix_name) if ($main::verbose >= 2);
      
      ## Print the ID -> Name in the File
      &RSAT::message::Debug("Adding matrix information to registry table ", $matrix_name , $matrix_id ) if ($main::verbose >= 0);


      ################################################################
      ## Compute min and max weight values for score distributions
      local ($Wmin, $Wmax)  = $matrix->weight_range();

      
      &RSAT::message::Info($matrix_name, "Matrix weight range", $Wmin, $Wmax) if ($main::verbose >= 2);
     
      ################################################################
      ## Define matrix-specific output directory (subfolder)
      $dir{matrix_output} = $dir{output}."/".$matrix_name;
      
      ## Create matrix-specific output directory if required
      &RSAT::message::TimeWarn("Checking matrix output directory", $dir{matrix_output}) if ($main::verbose >= 2);
      &RSAT::util::CheckOutDir($dir{matrix_output});
      
      ## Define matrix-specific prefix
      $matrix_prefix{$matrix_name} = $dir{matrix_output}."/".$short_prefix;
      
      $matrix_prefix{$matrix_name} .= "_".$matrix_name  ;
      
      ## Open matrix-specific log file
      $outfile{matrix_log} = $main::matrix_prefix{$matrix_name}."_log.txt"; push @files_to_index, "matrix_log";
      $main::out = &OpenOutputFile($outfile{matrix_log});
      &RSAT::message::Debug("Adding file to index ",   "matrix_log", $main::matrix_prefix{$matrix_name}."_log.txt" ) if ($main::verbose >= 10);

      ################################################################
      ## Export matrix in various formats
      
      ## Define file names here because we need them for the index, even
      ## if we don't run the export task
      $outfile{matrix_info} = $matrix_prefix{$matrix_name}."_matrix_info.txt";  push @files_to_index, "matrix_info" ;
      &RSAT::message::Debug("Adding file to index ",  "matrix_info",  $matrix_prefix{$matrix_name}."_matrix_info.txt" ) if ($main::verbose >= 10);
      
      ## Transfac outfile
      $outfile{matrix_transfac}=  $matrix_prefix{$matrix_name}."_matrix_transfac.tf";  push @files_to_index, "matrix_transfac";

      ## Tab outfile
      $outfile{matrix_tab}=  $matrix_prefix{$matrix_name}."_matrix_tab.txt";  push @files_to_index, "matrix_tab";

      ## logo file
      $outfile{matrix_logo}= $matrix_prefix{$matrix_name}."_logo" ;
      $matrix_info_general_index{$matrix_name}{logo}=$outfile{matrix_logo};
      $matrix_info_general_index{$matrix_name}{synthesis}=$outfile{matrix_synthesis};
      
      foreach my $logo_format (@logo_formats) {
	  $outfile{"matrix_logo_".$logo_format} = $outfile{matrix_logo}."_m1.".$logo_format; push @files_to_index, "matrix_logo_".$logo_format;
	  &RSAT::message::Debug("Adding file to index ",   "matrix_logo_".$logo_format ,  $outfile{matrix_logo}."_m1.".$logo_format ) if ($main::verbose >= 10);
	  $outfile{"matrix_logo_rc_".$logo_format} = $outfile{matrix_logo}."_m1_rc.".$logo_format; push @files_to_index, "matrix_logo_rc_".$logo_format;
	  &RSAT::message::Debug("Adding file to index ",  "matrix_logo_rc_".$logo_format, $outfile{matrix_logo}."_m1_rc.".$logo_format ) if ($main::verbose >= 10);
      }

      ## ##############################################################
      ## Export matrix in various formats
      if ($task{export_matrix}) {
	  ## Export the matrix in tab-delimited format
	  &ExportTabMatrix($matrix);
	  
	  ## Export the matrix in TRANSFAC format
	  &ExportTransfacMatrix($matrix);
	  
	  ## Export the matrix in tab-delimited format with additional information + the logos
	  &ExportMatrixInfo($matrix);
      }

      ## Shuffle the columns of the matrix (permutation test)
      &PermuteMatrixColumns() if  $task{permute} ;

      
      ################################################################
      ## Calculate theoretical distribution of probabilities
      $outfile{'matrix_theoretical_distrib'} = $main::matrix_prefix{$matrix_name}."_theor_score_distrib.tab"; push @files_to_index, "matrix_theoretical_distrib";
      &RSAT::message::Debug("Adding file to index ",  "matrix_theoretical_distrib", $outfile{'matrix_theoretical_distrib'}  ) if ($main::verbose >= 10);

      &CalcTheorScoreDistribution( $outfile{matrix_tab}, $outfile{'matrix_theoretical_distrib'} ) if ($task{theor});
      
      ################################################################
      ## Compute empirical distribution in the input sequence files
      foreach my $seq_type (@local_seq_types) {
	  &RSAT::message::TimeWarn("Analyzing sequence type", $seq_type, $seqfile{$seq_type}) if ($main::verbose >= 2);
	  &CalcSequenceDistrib($seqfile{$seq_type}, $outfile{matrix_tab}, 'tab', $seq_type, 1,  @matrix_scan_options) ;
	  ## Score sequences with the permuted matrices
	  if (($seqfile{$seq_type}) &&
	      (defined($perm_nb{$seq_type})) &&
	      ($perm_nb{$seq_type} > 0)) {
	      
	      ## Calculate the separate distributions for each permuted matrix
	      ## (this highlights the variability but the graph is noisy)
	      for my $i (1..$perm_nb{$seq_type}) {
		  $perm_suffix = $seq_type."_perm_col_".$i;
		  if (defined($scanopt{$seq_type})) {
		      $scanopt{$perm_suffix} = $scanopt{$seq_type};
		  }
		  push @perm_distrib_files, &CalcSequenceDistrib($seqfile{$seq_type}, $outfile{'matrix_perm_col_'.$i}, "tab", $perm_suffix, $perm_separate_distrib,  @matrix_scan_options) if task{scan};
	      }
	      
	      ## Compute the distribution for all the permutation tests
	      
	      ## Define the output file for the regrouped permutation tests
	      my $perm_suffix = $seq_type.'_'.$perm_nb{$seq_type}.'perm';
	      $outfile{$perm_suffix} =  $matrix_prefix{$matrix_name}."_scan_".$perm_suffix."_score_distrib.tab"; push @files_to_index, $perm_suffix;
	      &RSAT::message::Debug("Adding file to index ",  $perm_suffix, $outfile{$perm_suffix}  ) if ($main::verbose >= 10);
	      push @distrib_files, $outfile{$perm_suffix}; $file_nb{$perm_suffix} = scalar(@distrib_files);
	      
	      ## Run compare-scores to compute the dCDF of the mergeed permutation test
	      my $merge_cmd = $SCRIPTS."/compare-scores -v 2 ";
	      $merge_cmd .= " -ic 1 -numeric -sc 2";
	      $merge_cmd .= " -files ";
	      $merge_cmd .= join " ", @perm_distrib_files;
	      my $last_col = scalar(@perm_distrib_files) + 1;
	      $merge_cmd .= " | ".$SCRIPTS."/row-stats -before -col 2-".$last_col;
	      &RSAT::message::Debug("Merging permuted distributions", $merge_cmd) if ($main::verbose >= 3);
	      
	      ## Compute the cumulative and decreasing cumlative
	      ## distributions
	      
	      my @weights = ();
	      my @occ = ();
	      my @cum_occ = ();
	      my %merged_occ = ();
	      my $cum_occ = 0;
	      if ($task{scan}){
		  open MERGE, "$merge_cmd |";
		  while (<MERGE>) {
		      chomp();
		      next if /^;/;
		      next if /^#/;
		      next unless /\S/;
		      my @fields = split /\t/, $_;
		      my $weight = $fields[4];
		      my $occ = $fields[2];
		      $cum_occ += $occ;
		      push @weights, $weight;
		      push @occ, $occ;
		      push @cum_occ, $cum_occ;
		  }
		  close MERGE;
		  my $total_occ = $cum_occ[$#cum_occ];
	      
		  
		  ## Print the merged distribution
		  my $merged_distrib = &OpenOutputFile($outfile{$perm_suffix});
		  print $merged_distrib join ("\t", "#weight", "occ", "cum", "dcum", "dCDF"), "\n";
		  for my $i (0..$#weights) {
		      my $dcum_occ = $total_occ - $cum_occ[$i]+$occ[$i];
		      my $dcdf = $dcum_occ / $total_occ;
		      print $merged_distrib join ("\t",
						  $weights[$i],
						  $occ[$i],
						  $cum_occ[$i],
						  $dcum_occ,
						  sprintf("%7g", $dcdf)
			  ), "\n";
		  }
		  close $merged_distrib;
		  &RSAT::message::TimeWarn("Exported merged distribution", $outfile{$perm_suffix}) if ($main::verbose >= 2);    
	      }
	  }
      }
      
      ## Compare the (single) theoretical and (multiple) empirical distributions
      &CompareDistrib($distrib_score_col, @distrib_files); # the column of interest is rel_ic (inv_cum_freq)
      

      ################################################################
      ##Calculate NWD adn OCC
      &RSAT::message::TimeWarn("Calculating NWD file")  if ($main::verbose >= 2);
      
      if ($task{compare} || $task {graphs}) {
	  foreach (@local_seq_types ) {
	      my $st= $_;
	      #my $nwd_st= &Calculate_NWD ($m_width,$outfile{distrib_compa}.".tab",$st) if ($main::plot_types{nwd}) ;
	      
	      #die "sequence". $st. "+++" . $main::plot_seq_type{$st} ;
	      #die "BOOM";
	      ## In contrast with matrix-quality this program will calculate the NWD graph for all sequences
	      
	      my $nwd_st= &Calculate_NWD ($m_width,$outfile{distrib_compa}.".tab",$st) ;
	      $main::nwd_seqs++; ## counter to decide if the heatmap will be drawn, it requieres at least two matrices and two sequences
	      push ( @{ $tab_files_for_aux_plots_nwd_per_matrix{$matrix_name} }, $nwd_st);
	      push ( @{ $tab_files_for_aux_plots_nwd_per_seq{$st} }, $nwd_st);
	      
	      ## Check if NWD graph is requiered for the given sequence set
	      my $occ_proba_st=&Calculate_OCC ($seqfile{$st}, $outfile{matrix_tab}, 'tab', $st, 1,  @matrix_scan_options)   ;
	      push ( @{ $tab_files_for_aux_plots_occ_proba_per_matrix{$matrix_name} }, $occ_proba_st);
	      push ( @{ $tab_files_for_aux_plots_occ_proba_per_seq{$st} }, $occ_proba_st);	 
	  }
	  
	  #Draw NWD per matrix, if several sequence types were specified this will include all the NWD curves for the same matrices in different sequence sets.
	  
	  ( $outfile{matrix_nwd_table}, $outfile{matrix_nwd_plots}) =  &Draw_NWD ($main::matrix_prefix{$matrix_name}."_nwd_".$matrix_name, @{ $tab_files_for_aux_plots_nwd_per_matrix{$matrix_name} }) ;
	  push (@files_to_index,'matrix_nwd_table');
	  
	  #Draw NWD per matrix, if several sequence types were specified this will include all the NWD curves for the same matrices in different sequence sets.
	  
	  ( $outfile{matrix_occ_proba_table}, $outfile{matrix_occ_proba_plots}) =  &Draw_OCC ($main::matrix_prefix{$matrix_name}."_occ_proba_".$matrix_name, @{ $tab_files_for_aux_plots_occ_proba_per_matrix{$matrix_name} }) ;
	  push (@files_to_index,'matrix_occ_proba_table');
	  
      }
      
      ## Give the warning about the output prefix
      if ($main::verbose >= 2) {
	  &RSAT::message::Info("Matrix-specific matrix output directory", $dir{matrix_output});
	  &RSAT::message::Info("Matrix-specific matrix log file", $outfile{matrix_log});
	 # &RSAT::message::Info("Matrix-specific matrix synthesis file", $outfile{matrix_synthesis}); # this file has not been declared
      }
  } ## per matrix analysis


  ################
  ## Draw NWD and OCC graphs per sequence set
  if ($task{graphs}){
      foreach ( @main::seq_types ) {
	  
	  my $st= $_;
	  push(@all_sequence_names, $st);
	  ( $outfile{$st."all_matrices_nwd_table"}, $outfile{$st."all_matrices_nwd_plot"}) =  &Draw_NWD ($main::prefix{main}."_nwd_".$st,   @{ $tab_files_for_aux_plots_nwd_per_seq{$st} }) ;
	  print ((join("++", @{$tab_files_for_aux_plots_occ_proba_per_seq{$st}})), "\n");
	  ( $outfile{$st."all_matrices_occ_proba_table"}, $outfile{$st."all_matrices_occ_proba_plot"}) =  &Draw_OCC ($main::prefix{main}."_occ_proba_".$st,   @{ $tab_files_for_aux_plots_occ_proba_per_seq{$st} })  ;
	  #print $outfile{$st."all_matrices_occ_proba_table"}."\n";
	  #print $outfile{$st."all_matrices_occ_proba_plot"}."\n";
	  
	  ################
	  ## Heatmaps can only be produced when at least two matrices and two sequence sets are used for NWD calculation
	  # if  ( ($main::plot_types{nwd}) && ($main::num_mtx>=2) && ($main::nwd_seqs>=2) ){
	  # 	  my $nwd_all_file = $main::prefix{main}."_"."all_nwd_files.txt";
	  # 	  my $r_mtxq_sc=$ENV{RSAT}."/R-scripts/matrix_quality_compare.R";
	  
	  # 	  my $nwd_all_plot = $main::prefix{main}."_"."all_nwd_plot";
	  
	  # 	  $nwd_all_file_out = &OpenOutputFile($nwd_all_file);
	  
	  # 	  foreach my $mtx (keys %nwd_all_files) {
	  # 	      while (my ($seq, $nwd_file) = each %{ $nwd_all_files{$mtx} } ) {
	  # 		  print $nwd_all_file_out join("\t",$mtx,$seq,$nwd_file)."\n";
	  # 	      }
	  # 	  }
	  
	  # 	  &RSAT::message::Info("NWD files for heatmap", $nwd_all_file) if ($main::verbose >= 2);
	  # 	  my $R_command="Rscript ". $r_mtxq_sc." ".$nwd_all_file." ".$nwd_all_plot." ".join(",",'pdf','png');
	  
	  # 	  &RSAT::message::Info("Rcommand for NWD cluster",  $R_command) if ($main::verbose >= 2);
	  
	  # 	  &doit( $R_command, $dry, $die_on_error, $verbose, $batch, $job_prefix);
	  # 	  $outfile{"heatmap_max_nwd"} = join("/",$nwd_all_plot,"maxNWD_heatmap_compare");
	  # 	  $outfile{"heatmap_nwdsig"} = join("/",$nwd_all_plot,"maxNWDsignificantScore_heatmap_compare");
	  # 	  $outfile{"heatmap_aucall"} = join("/",$nwd_all_plot,"AUC_NWD_heatmap_compare");
	  # 	  $outfile{"heatmap_aucsig"} = join("/",$nwd_all_plot,"AUC_NWDsignificantScore_heatmap_compare");
	  
	  # }
	  
      }
  
  my $nwd_all_file = $main::prefix{main}."_"."all_nwd_files.txt";
  my $r_mtxq_sc=$ENV{RSAT}."/R-scripts/matrix_quality_compare.R";
  
  my $nwd_all_plot = $main::prefix{main}."_"."all_nwd_plot";
  
  $nwd_all_file_out = &OpenOutputFile($nwd_all_file);
  
  foreach my $mtx (keys %nwd_all_files) {
      while (my ($seq, $nwd_file) = each %{ $nwd_all_files{$mtx} } ) {
	  print $nwd_all_file_out join("\t",$mtx,$seq,$nwd_file)."\n";
      }
  }
  
  &RSAT::message::Info("NWD files for heatmap", $nwd_all_file) if ($main::verbose >= 2);
  my $R_command="Rscript ". $r_mtxq_sc." ".$nwd_all_file." ".$nwd_all_plot." ".join(",",'pdf','png')." 0";
  
  &RSAT::message::Info("R-command for NWD cluster",  $R_command) if ($main::verbose >= 2);
  
  &doit( $R_command, $dry, $die_on_error, $verbose, $batch, $job_prefix);
  $outfile{"heatmap_max_nwd"} = join("/",$nwd_all_plot,"maxNWD_heatmap_compare");
  $outfile{"heatmap_nwdsig"} = join("/",$nwd_all_plot,"maxNWDsignificantScore_heatmap_compare");
  $outfile{"heatmap_aucall"} = join("/",$nwd_all_plot,"AUC_NWD_heatmap_compare");
  $outfile{"heatmap_aucsig"} = join("/",$nwd_all_plot,"AUC_NWDsignificantScore_heatmap_compare");
  
 
  ################################################################
  ## Draw the dynamic heatmap with the clustering of the maxNWD
  &DynamicHeatmapEnrichment();

  ################################################################
  ## Draw the Binomial Occ Significance curves 
  ## of each TF on each sequence
  &DynamicOccProfiles();

  ##############################
  ## Generate the HTML report
  &CreateHTMLReport();
   
  }
  
  ## Give the warning about the output prefix
  if ($main::verbose >= 2) {
      &RSAT::message::Info("Main output directory", $dir{output});
      &RSAT::message::Info("Main log file", $outfile{log});
      # &RSAT::message::Info("Main synthesis file", $outfile{synthesis}); ## File to be specified
  }
 
  ################################################################
  ## Print verbose
  $main::out = &OpenOutputFile($outfile{log});
  &Verbose() if ($main::verbose >= 1);
  
  ################################################################
  ## Report execution time and close output stream
  &close_and_quit();
  
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################


################################################################
## Close output file and quit
sub close_and_quit {

  ## Report execution time
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
  print $main::out $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified

  ## Close output file
  if ($outfile{output}) {
    close $main::out;
    &RSAT::message::TimeWarn("Output file", $outfile{output}) if ($main::verbose >= 2);
  }

  ## CLOSE OTHER FILES HERE IF REQUIRED

  exit(0);
}


################################################################
## Display full help message 
sub PrintHelp {
  system "pod2text -c $0";
  exit()
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}

################################################################
## Read arguments 
sub ReadArguments {
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);


=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
	$main::verbose = shift(@arguments);
      } else {
	$main::verbose = 1;
      }


=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();


=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();


=pod

=item	B<-title title>

Title displayed on top of the report page.

=cut
     } elsif ($arg eq "-title") {
      $main::title = shift(@arguments);
      $main::title =~ s/\s+/_/g;

=pod

=item B<-matrix input_motif_file>

The input file contains a set of position-specific scoring
matrices.

=cut
    } elsif ($arg eq "-matrix") {
      $main::infile{motifs} = shift(@arguments);


=pod

=item B<-matrix_format matrix_format>

Specify the input matrix format.


B<Supported matrix formats>

Since the program takes several matrices as input, it only accepts
matrices in formats supporting several matrices per file (transfac,
tf, tab, clusterbuster, cb, infogibbs, meme, stamp, uniprobe).

For a description of these formats, see the help of I<convert-matrix>.

=cut
    } elsif ($arg eq "-matrix_format") {
      $matrix_format = shift(@arguments);
      unless ($main::supported_matrix_formats{$matrix_format}){
	&RSAT::error::FatalError($matrix_format, "Invalid format for input matrices\tSupported: ".$main::supported_matrix_formats);
      }

=pod

=item	B<-logo_format>

Image format for the sequence logos.

Multiple image formats can be specified either by using iteratively
the option, or by separating them by commas.

Example:
   -logo_format png,pdf

=cut
	} elsif ($arg eq "-logo_format") {
	  my $image_format = shift(@arguments);
	  my @tmp_logo_formats = split(',',$image_format);
	  if (scalar(@tmp_logo_formats)>0) {
	    foreach my $f (@tmp_logo_formats) {
	      push (@main::logo_formats, $f);
	    }
	  } else {
	    push (@main::logo_formats, $image_format);
	  }
=pod

=item	B<-img_format>

Image format for the plots (ROC curve, score profiles, ...).
To display the supported formats, type the following command:
XYgraph -h.

Multiple image formats can be specified either by using iteratively
the option, or by separating them by commas.

Example:
   -img_format png,pdf

=cut
	} elsif ($arg eq "-img_format") {
	  my $image_format = shift(@arguments);
	  my @tmp_img_formats = split(',',$image_format);
	  if (scalar(@tmp_img_formats)>0) {
	    foreach my $f (@tmp_img_formats) {
	      push (@main::image_formats, $f);
	    }
	  } else {
	    push (@main::image_formats, $image_format);
	  }
	  
  ## Pseudo weight

=pod

=item B<-pseudo pseudo_counts>

Pseudo-counts.
The pseudo-count reflects the possibility that residues that were
not (yet) observed in the model might however be valid for future
observations. The pseudo-count is used to compute the corrected
residue frequencies.


=cut
	} elsif ($arg eq "-pseudo") {
	    $main::pseudo_counts = shift(@arguments);
	    &RSAT::error::FatalError(join("\t", $main::pseudo_counts,
					  "Invalid value for pseudo-counts. Must be a positive real number."))
		unless ((&RSAT::util::IsReal($main::pseudo_counts) )
			&& ($main::pseudo_counts >= 0));


=pod

=item B<-top_matrices X>

Only analyze the first X motifs of the input file. This options is
convenient for quick testing before starting the full analysis.

=cut
    } elsif ($arg eq "-top_matrices") {
      $top_matrices = shift(@arguments);
      $top_matrix_flag = 1;

      &RSAT::error::FatalError($top_matrices, "Invalid value for option -top_matrices: must be a natural number") 
         unless (&IsNatural($top_matrices));

=pod

=item B<-skip_matrices X>

Skip the first X motifs of the input file. This options is convenient
for testing the program on a subset of the motifs before starting the
full analysis.

=cut
    } elsif ($arg eq "-skip_matrices") {
      $skip_matrices = shift(@arguments);
      $skip_matrix_flag = 1;
      &RSAT::error::FatalError($skip_matrices, "Invalid value for option -skip_matrices: must be a natural number") 
         unless (&IsNatural($skip_matrices));
=pod

=item B<-top_sequences X>

Only analyze the first X sequences of the input file. This options is
convenient for quick testing before starting the full analysis.

=cut
    } elsif ($arg eq "-top_sequences") {
      $top_sequences = shift(@arguments);
      $top_sequences_flag = 1;

      &RSAT::error::FatalError($top_sequences, "Invalid value for option -top_sequences: must be a natural number") 
         unless (&IsNatural($top_sequences));

=pod

=item B<-skip_sequences X>

Skip the first X sequences of the input file. This options is convenient
for testing the program on a subset of the sequences before starting the
full analysis.

=cut
    } elsif ($arg eq "-skip_sequences") {
      $skip_sequences = shift(@arguments);
      $skip_sequences_flag = 1;
      &RSAT::error::FatalError($skip_sequences, "Invalid value for option -skip_matrices: must be a natural number") 
         unless (&IsNatural($skip_sequences));

=pod

=item B<-seq seq_type input_sequences_file>

A file containing the sequences in fasta format.

=cut
    } elsif ($arg eq "-seq") {
	 my $seq_type = shift(@arguments);
	 ## Substitue special characters which cannot be used inside a file name
	 $seq_type =~ s|\s|_|g;
	 $seq_type =~ s|/|_|g;
	 $seq_type =~ s|:|_|g;
	 
	 $main::seqfile{$seq_type} =
	     shift(@arguments);
	 
         push @main::seq_types, $seq_type;

=pod

=item B<-seq_format sequence_format>

Sequence format.

=cut
	} elsif ($arg eq "-seq_format") {
	    $main::seq_format = shift(@arguments);

=pod

=pod

=item B<-bgfile background_file>

Background model to be used to calculate the matrix theoretical
distribution.  The matrix theoretical distribution is calculated with
I<matrix-distrib>.

=cut
	} elsif ($arg eq "-bgfile") {
		$main::infile{bg_file} = shift(@arguments);

	    ## Format of Background model for theoretical score distribution
# If the option -th_prior and -bg_file are used at the same time
# the background format must be the same in both cases.


=pod

=item B<-bg_format background_file>

Format for the background model file.

        Supported formats: all the input formats supported by
        convert-background-model.


=cut
	} elsif ($arg eq "-bg_format") {
		$main::bg_format = shift(@arguments);

=pod

=item B<-bg_input>

        Calculate the background distrinution from input set

        Supported options: all   
        all=Calculate one unique background model for all sequences.


=cut
	} elsif ($arg eq "-bg_input") {
		$main::bg_input = shift(@arguments) ;

		## Number of decimals for computing scores

=pod

=item B<-markov>

        Markov order to calculate bg_input


=cut
	} elsif ($arg eq "-markov") {
		$main::markov = shift(@arguments) ;

		## Number of decimals for computing scores

=pod

=item B<-perm seq_type #>

Number of permutations for a specific set (default 0).

=cut
	} elsif ($arg eq "-perm") {
	  my $seq_type = shift(@arguments);
	 ## Substitue special characters which cannot be used inside a file name
	 $seq_type =~ s|\s|_|g;
	 $seq_type =~ s|/|_|g;
	 $seq_type =~ s|:|_|g;
	  $main::perm_nb{$seq_type} = shift(@arguments);
	  &RSAT::error::FatalError($perm_nb{$seq_type}, "Invalid value for option -perm. Should be a Natural number.")
	    unless (&IsNatural($main::perm_nb{$seq_type}));

	    ## perm_sep

=pod

=item	B<-heatmap_color_palette Color_Palette>

Select the color palette used in the heatmaps (sequential scales)
The color palettes (and their names) are taken from ColorBrewer2 website (http://colorbrewer2.org/)

Supported: YlOrRd,YlOrBr,YlGnBu,YlGn,PuRd,PuBuGn,PuBu,OrRd,GnBu,BuPu,BuGn,Reds,Purples,Oranges,Greys,Greens,Blues

Default: YlOrRd
=cut

    } elsif ($arg eq "-heatmap_color_palette") {
         $heatmap_color_palette = shift(@arguments);
         unless(exists($supported_heatmap_color_palette{$heatmap_color_palette})) {
	   &RSAT::error::FatalError($heatmap_color_palette, "Invalid hclust method. Supported:", $supported_heatmap_color_palette);
         }

=pod

=item	B<-heatmap_color_classes X>

This option specifies in how many color classes the color palette will be divided.

For sequential color palettes: max 9
For diverging color palettes: max 11

If the user specified a color greater than the maximum allowed, the program takes this maximum value.

For more information see ColorBrewer2 website (http://colorbrewer2.org/)

=cut
    } elsif ($arg eq "-heatmap_color_classes") {
      $heatmap_color_classes = shift(@arguments);

      &RSAT::error::FatalError($heatmap_color_classes, "Invalid value for option -top_matrices: must be a natural number") 
         unless (&IsNatural($heatmap_color_classes));

=pod

=item B<-task tasks>

Specify one or several tasks to be run. If this option is not
specified all the tasks are run.

Note that some tasks depend on other ones. This option should thus be
used with caution, by experimented users only.

Supported tasks:

=over

=item B<export_matrix>

Export the matrix and sites in various formats (tab, info, logos)

=item B<permute>

Scan sequences with permuted matrices

=item B<theor>

Calculate the theoretical distribution

=item B<theor_cv>

Calculate the theoretical distribution of loo partial matrices

=item B<scan>

Scan sequences with I<matrix-scan>

=item B<compare>

Compare distributions between the various input files

=item B<graphs>

Draw the graphs with distrib comparisons

=item B<synthesis>

Generate a HTML file with a synthetic report, which displays the main
graphs (distribution curves and ROC curve) and provides links to the
result files.

In order to be correctly indexed, the graphs have to be generated in
png format.

=item B<clean>

Clean temporary files.

=back

=cut
       } elsif ($arg eq "-task") {
	 $arg = shift (@arguments);
	 chomp($arg);
	 my @tasks = split ",", $arg;
	 foreach my $task (@tasks) {
	   $task = lc($task);
	   if ($supported_task{$task}) {
	     $task{$task} = 1;
	   } else {
	     &RSAT::error::FatalError($task, "Invalid tasks. Supported:", $supported_tasks);
	   }
	 }

	    
=pod

=item	B<-o outputfile>

If no output file is specified, the standard output is used.  This
allows to use the command within a pipe.

=cut
    } elsif ($arg eq "-o") {
     $main::prefix{main} = shift(@arguments);

    } else {
      &FatalError(join("\t", "Invalid option", $arg));

    }
  }

=pod

=back

=cut

}

################################################################
## Calculate sequence length
sub ProcessTopSeq {

    my ($seq_type,$seq_file)= @_;
    &RSAT::message::TimeWarn("Selecting top or skip sequences for file", $seq_file ) if ($main::verbose >= 2);

    my $aux_var_top_seq=$dir{output}."/".$short_prefix."_".$seq_type."_selected_topskip_sequences.fas";
    
    $convert_seq_cmd = $SCRIPTS."/convert-seq";
    $convert_seq_cmd .= " -i ".$seq_file ;
    $convert_seq_cmd .= " -from ".$main::seq_format." -to ".$main::seq_format;
    $convert_seq_cmd .= " -top ".$top_sequences if($top_sequences_flag == 1);
    $convert_seq_cmd .= " -skip ".$skip_sequences if($skip_sequences_flag == 1);
    $convert_seq_cmd .= " -o ". $aux_var_top_seq ;
    &doit($convert_seq_cmd, $dry, $die_on_error, $verbose, 0, $job_prefix);    



    return( $aux_var_top_seq);
}


################################################################
## Verbose message
sub Verbose {
  print $out "; template ";
  &PrintArguments($out);
  printf $out "; %-22s\t%s\n", "Program version", $program_version;
  if (%main::infile) {
    print $out "; Input files\n";
    while (my ($key,$value) = each %main::infile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
  if (%main::outfile) {
    print $out "; Output files\n";
    while (my ($key,$value) = each %main::outfile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
}


################################################################
## Draw the Dynamic Heatmap using the matrix
## of maxNWD positive values
sub DynamicHeatmapEnrichment {

    ########################################
    ## Outfiles of Dynamic heatmap script
    $main::outfile{short_path_maxNWD_tsv} = $short_prefix."_matrix_heatmap.tsv";
    $main::outfile{maxNWD_tsv} = $main::prefix{main}."_matrix_heatmap.tsv";
    $main::outfile{maxNWD_heatmap_html} = $main::prefix{main}."_motif_enrichment_maxNWD_heatmap.html";
    $main::outfile{maxNWD_heatmap_template} = $ENV{RSAT}."/public_html/templates_html/motif_enrichment_dynamic_heatmap_d3.html";
    
    &RSAT::message::TimeWarn("Drawing heatmap of maxNWD") if ($main::verbose >= 2);

    ##################################
    ### Identify the path of the R executable
    my $r_path = &RSAT::server::GetProgramPath("R");

    my $dynamic_heatmap_script  = $ENV{RSAT}."/R-scripts/matrix-enrichment_heatmap.R";

    &RSAT::error::FatalError("Cannot read matrix-enrichment_heatmap.R script", $dynamic_heatmap_script) unless (-r $dynamic_heatmap_script);

    ## Command
    my $dynamic_heatmap_cmd = "";
    $dynamic_heatmap_cmd .= " cat ".$dynamic_heatmap_script;
    $dynamic_heatmap_cmd .= " | ".$r_path;
    $dynamic_heatmap_cmd .= " --slave --no-save --no-restore --no-environ";
    $dynamic_heatmap_cmd .= " --args \"";
    $dynamic_heatmap_cmd .= " prefix = '".$main::prefix{main}."'";
    $dynamic_heatmap_cmd .= "; maxNWD.table.file = '".$outfile{heatmap_nwdsig}.".txt'";
    $dynamic_heatmap_cmd .= "; html.template.file = '".$main::outfile{maxNWD_heatmap_template}."'";
    $dynamic_heatmap_cmd .= "; maxNWD.tsv = '".$main::outfile{maxNWD_tsv}."'";
    $dynamic_heatmap_cmd .= "; shortpath.maxNWD.tsv = '".$main::outfile{short_path_maxNWD_tsv}."'";
    $dynamic_heatmap_cmd .= "; maxNWD.heatmap.html = '".$main::outfile{maxNWD_heatmap_html}."'";
    $dynamic_heatmap_cmd .= "; d3.base = '".$d3_base."'";
    $dynamic_heatmap_cmd .= "; d3.array.base = '".$d3_array_base."'";
    $dynamic_heatmap_cmd .= "; heatmap.color.palette = '".$heatmap_color_palette."'";
    $dynamic_heatmap_cmd .= "; heatmap.color.classes = '".$heatmap_color_classes."'";
    $dynamic_heatmap_cmd .= " \"";
#    $Cluster_motifs_cmd = "(".$dynamic_heatmap_cmd.")"; # Uncomment this line to check the R output in the terminal
#    $dynamic_heatmap_cmd = "(".$dynamic_heatmap_cmd.") 2 > ".$main::outfile{Rlog}; 

    if ($r_path) {
        &doit($dynamic_heatmap_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);
    } else {
        &RSAT::message::Warning("Could not run matrix-enrichment because the program R is not available") if ($main::verbose >= 1);
    }
    return();
}


################################################################
## Draw the Dynamic Heatmap using the matrix
## of maxNWD positive values
sub DynamicOccProfiles {

    ########################################
    ## Outfiles of Dynamic heatmap script
    $main::outfile{binomial_occ_profile} = $main::prefix{main}."_binomial_occ_sig_profiles.html";
    $main::outfile{binomial_occ_profile_template} = $ENV{RSAT}."/public_html/templates_html/dynamic_binomial_OCC_profiles.html";
    
    &RSAT::message::TimeWarn("Drawing curves of Binomial OCC significance") if ($main::verbose >= 2);

    ##################################
    ### Identify the path of the R executable
    my $r_path = &RSAT::server::GetProgramPath("R");

    my $dynamic_occ_sig_script  = $ENV{RSAT}."/R-scripts/plot_binomial_occ.R";

    &RSAT::error::FatalError("Cannot read plot_binomial_occ.R script", $dynamic_occ_sig_script) unless (-r $dynamic_occ_sig_script);

    my $Seq_string = join('---', @all_sequence_names);
    my $TFs_string = join('---', @all_matrix_names);

    ## Command
    my $dynamic_occ_sig_cmd = "";
    $dynamic_occ_sig_cmd .= " cat ".$dynamic_occ_sig_script;
    $dynamic_occ_sig_cmd .= " | ".$r_path;
    $dynamic_occ_sig_cmd .= " --slave --no-save --no-restore --no-environ";
    $dynamic_occ_sig_cmd .= " --args \"";
    $dynamic_occ_sig_cmd .= " prefix = '".$main::prefix{main}."'";
    $dynamic_occ_sig_cmd .= " ;html.template.file = '".$main::outfile{binomial_occ_profile_template}."'";
    $dynamic_occ_sig_cmd .= " ;Sequences = '".$Seq_string."'";
    $dynamic_occ_sig_cmd .= " ;TFs = '".$TFs_string."'";
    $dynamic_occ_sig_cmd .= " \"";
#    $dynamic_occ_sig_cmd = "(".$dynamic_occ_sig_cmd.")"; # Uncomment this line to check the R output in the terminal
#    $dynamic_occ_sig_cmd = "(".$dynamic_occ_sig_cmd.") 2 > ".$main::outfile{Rlog}; 

    if ($r_path) {
        &doit($dynamic_occ_sig_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);
    } else {
        &RSAT::message::Warning("Could not run matrix-enrichment because the program R is not available") if ($main::verbose >= 1);
    }
    return();
}


################################################################
## Draw the Dynamic Heatmap using the matrix
## of maxNWD positive values
sub CreateHTMLReport {
    
  &RSAT::message::TimeWarn("Creating HTML report") if ($main::verbose >= 2);

  $main::outfile{report_template} = $ENV{RSAT}."/public_html/templates_html/matrix_enrichment_template.html";
  $main::outfile{report} = $main::prefix{main}."_report.html";  

  ## Open the HTML template
  my ($template) = &OpenInputFile($main::outfile{report_template});
  
  ## Open the HTLM report
  my ($syn) = &OpenOutputFile( $main::outfile{report});
  
  my $motif_nb = scalar(@all_matrix_names);
  my $set_nb = scalar(@all_sequence_names);

  my $head = &PrintHtmlResultHeader(program=>"matrix-enrichment", "title"=>$main::title);

  ###########################################
  ## Read the template and add the section
  ## names and other parameters
  while(<$template>){
      
      next if (/^#/); ## Skip header line
      next if (/^;/); ## Skip comment lines
      next unless (/\S/); ## Skip empty lines	
      chomp();
      my $line = $_;

      ## Substitute the Header
      if($line =~ /--he--/){

	  print " 1\n";
	  $line =~ s/--he--/$head/;
      }

      ## Substitute the Command section
      if($line =~ /--show_command--/){

	  ## Print arguments
	  print $ &open_menu_heading(1, "<a name='logs'></a><h3>Command</h3>", 1);
	  print $syn "<pre>";
	  print $syn "matrix-enrichment ";
	  &PrintArguments($syn);
	  print $syn "</pre>";
	  print $syn &close_menu_heading();

	  $line =~ s/--show_command--//;
	  print " 2\n";
      }
      
      ###########################
      ## Fill parameters table
      $line =~ s/--motif_nb--/$motif_nb/g;
      $line =~ s/--set_nb--/$set_nb/g;
 
      ############################
      ## maxNWD heatmap section
      my $heatmap_link = &RSAT::util::RelativePath($main::outfile{report}, $main::outfile{maxNWD_heatmap_html});
      $line =~ s/--heatmap_link--/$heatmap_link/g;
      
      ############################
      ## maxNWD heatmap section
      my $occ_profiles_link = &RSAT::util::RelativePath($main::outfile{report}, $main::outfile{binomial_occ_profile});
      $line =~ s/--occ_profiles--/$occ_profiles_link/g;
      
      print $syn $line."\n";
  }
  close($template);
  close($syn);  
}

__END__
