#!/usr/bin/env perl

############################################################
#
# $Id: peak-motifs_quick,v 1.256 2013/10/11 04:21:13 rsat Exp $
#
############################################################


## use strict;

=pod

=head1 NAME

peak-motifs_quick

=head1 VERSION

$program_version

=head1 DESCRIPTION

Workflow combining various algorithms to discover motifs from set of
peak sequences, e.g. genomic regions obtained from ChIP-seq or related
experiments (STARR-seq, ChIP-chip, ChIP-PET).

=head1 AUTHORS

=over

=item Jacques van Helden <Jacques.van.Helden@ulb.ac.be>

Conception and implementation of the work flow + testing.

=item Morgane Thomas-Chollier <thomas-c@molgen.mpg.de>

Conception of the work flow + output layout + Web interface + testing.

=item Matthieu Defrance <defrance@ccg.unam.mx>

Implementation of the efficient algorithms used in the work flow
(I<count-words>, I<matrix-scan-quick>, I<local-word-analysis>).

=item Olivier Sand <oly@bigre.ulb.ac.be> for the Web services

Web services.

=item Carl Herrmann <carl.herrmann@univmed.fr> and Denis Thieffry
<thieffry@tagc.univ-mrs.fr>

Analysis of the case studies. Definition of optimal conditions of
utilization. Motif comparisons and clustering.

=back


=head1 CATEGORY

Motif discovery

=head1 USAGE

peak-motifs_quick [-i inputfile] [-o outputfile] [-v #] [...]

=head1 INPUT FORMAT

The program takes as input either one (test) or two sequence files
(test versus control).

All sequences formats supported as input by I<convert-sequences> are
supported.

=head1 OUTPUT FORMAT

The pipeline runs a series of programs generating each one or several
result file. An HTML index is generated in order to synthesize the
results and give access to the individual result files.

The index file is formed from the output directory (option -outdir)
and the file prefix (option -prefix).

  [output_dir]/[prefix]_synthesis.html

=cut


BEGIN {
    if ($0 =~ /([^(\/)]+)$/) {
	push (@INC, "$`lib/");
    }
}
require "RSA.lib";
require "RSA.disco.lib";
require "footprint.lib.pl";
use RSAT::util;
use RSAT::MatrixReader;
use RSAT::SeqUtil;
use Data::Dumper;
use File::Basename;

################################################################
## Main package
package main;
{

  ################################################################
  ## Initialize parameters
  local $start_time = &RSAT::util::StartScript();
  local $die_on_error = 1;
  local $genome_assembly ="";

  ## Check that the RSAT paths of the programs required for the script are specified
  $PYTHON =  $ENV{RSAT}."/python_scripts" unless ($PYTHON);
  $SCRIPTS = $ENV{RSAT}."/perl-scripts" unless ($SCRIPTS);
  $BIN = $ENV{RSAT}."/bin" unless ($BIN);

  $program_version = do { my @r = (q$Revision: 1.256 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  #    $program_version = "0.00";

  %main::param = ();
  @main::param_list = ();

  %main::dir = ();
  @main::outdir = (); ## Directories to report in the log file
  %main::infile = ();
  %main::outfile = ();

  $main::verbose = 0;
  $main::out = STDOUT;

  ## Sequence types (can be eiter 'test' or 'test' + 'ctrl')
  @seq_types = ();

  ## Pattern types (oligos, positions, local-word-analysis,... with the oligo length suffix)
  @pattern_types = ();
  @patterns_to_merge = ();
  @oligos_to_merge = ();
  @local_words_to_merge = ();
  @positions_to_merge = ();

  ## This is a bit tricky: I first called "timelog" the time
  ## measurements taken from RSAT programs. Then I implemented a
  ## systematic way to measure the exectution time of a task with the
  ## Unix time command, which allows me to apply to third-party
  ## algorithms as well (meme, dreme, ChipMunk). I should see later if I
  ## suppress all the timelog stuff.
  @timetable_keys = ();
  @timelog_keys = ();


  ################################################################
  ## Supported tasks
  @default_tasks = ("purge",
		    "seqlen",
		    "composition",
#		    "bg_from_ctrl",
		    "disco",
		    "merge_words",
		    "merge_motifs",
		    "split_motifs",
		    "collect_motifs",
            "cluster_motifs",
#		      "motif_compa",
		    "scan",
		    "timelog",
		    "synthesis",
		    "archive",
		    "small_summary"
		    );

  ## Extra tasks are only executed on explicit demand of the user
  @extra_tasks = ("all",
		  "clean_seq",
		  "meme_bg",
		  "meme",
		  "dreme",
		  "chipmunk",
		 );

  ## All the supported tasks
  @supported_tasks = (@default_tasks, @extra_tasks);
  $supported_tasks = join ",", @supported_tasks;
  %supported_task = ();
  foreach my $task (@supported_tasks) {
    $supported_task{$task} = 1;
  }
  %task = ();  ## List of tasks to be executed


  ################################################################
  ## Supported motif discovery algorithms
  @default_discos = ("oligos",
		     "positions",
		    );

  ## Extra tasks are only executed on explicit demand of the user
  @extra_discos = ("dyads",
    #"local_words",
    #"local_dyads",
    	   "oligo_diff",
    	   "merged_words",
## Still to be treated: include the meme and chipmunk results in the HTML synthesis ?
    	   "meme",
    	   "dreme",
    	   "chipmunk",
		  );

  ## All the supported motif discovery algorithms
  @supported_discos = (@default_discos, @extra_discos);
  $supported_discos = join ",", @supported_discos;

  %supported_disco = ();
  foreach my $disco (@supported_discos) {
    $supported_disco{$disco} = 1;
  }
  %disco = (); ## List of motif discovery algorithms to run


  ################################################################
  ## Supported sequence sources
  local %supported_seq_source = ("galaxy"=>1, "ucsc"=>1, "getfasta"=>1);
  local @supported_seq_sources = sort (keys (%supported_seq_source));
  local $supported_seq_sources = join( ",", @supported_seq_sources);
  local @bed_files = (); ## List of bed files to upload on UCSC

  ## Motif databases
  @motif_databases = ();
  @motif_db_format = ();

  ## Options and suffix for MEME (must be global variables)
  local $meme_options = "";
  local $meme_suffix = "";

  ## Options and suffix for DREME (must be global variables)
  local $dreme_options = "";
  local $dreme_suffix = "";

  ## Options and suffix for ChIPMunk  (must be global variables)
  local $chipmunk_options = "";
  local $chipmunk_suffix = "";

  ## Discovered motifs
  local @motifs; ## list of discovered motifs for the synthesis
  local %motif_by_id; ## Index of motifs by ID
  local $motifs_read = 0; ## flag indicating if the motifs have been read (since the method ReadDiscoveredMotifs may be called from several places

  ## Those variables need to be global for motif post-processing and indexing
  local @motif_ids = ();
  local @motif_dirs = ();
  local @motif_prefixes = ();
  local @motif_files = ();
  local $motif_nb = 0;

  local $progressive_synthesis = 1;
  local $syn;
  local $synthesis_path;
  local $menu_nb = 100;

  local $check_fasta_top = 1000; ## Number of top lines for checking fasta headers in the input sequence files

  ################################################################
  ## Set default parameters
  &DefaultParameters();


  ################################################################
  ## Read argument values
  &ReadArguments();


  &CheckArguments();

  &SetOutFileNames();

  ################################################################
  ## Open output stream
  $main::out = &OpenOutputFile($main::outfile{log});

  ## Print the full command to the log file, for reproducibility +
  ## debugging
  print $main::out "; ", &RSAT::util::AlphaDate(), "\n";
  print $main::out "peak-motifs_quick ";
  &PrintArguments($main::out, 1);
  print $main::out "\n\n";

  &OpenSynthesis() if ($progressive_synthesis);

  foreach my $seq_type (@seq_types) {
    ## Prepare sequences
    &PurgeSeq($seq_type) if ($task{purge});

    &SequenceLengths($seq_type) if ($task{seqlen});


    if ($task{composition}) {
      ## Sequence composition
      &CompositionProfiles($seq_type);

    }
  }

  ## Build background models from control sequences
  if (($main::infile{ctrl_seq})) {
    &ComputeBgModels($main::infile{ctrl_seq});


    ################################################################
    ## Automatic definition of Markov order
    ##
    ## Note: auto (or manual) Markov order does not apply for test
    ## versus control analysis.
  } elsif ($main::auto_markov) {
    ## If Markov model is computed automatically, compute the total
    ## sequence length and determine markov orders.
    ##
    &RSAT::message::TimeWarn("Auto definition of Markov order on the basis of sequence length") if ($main::verbose >= 2);

    ## Total sequence length is computed on the converted test
    ## sequences, since these may have been clipped or restricted to
    ## the top peaks.
    my $cmd = $SCRIPTS."/sequence-lengths -i ".$main::outfile{"test_converted"}." -sum | grep -v '#' | cut -f 1";
    local $total_seq_len = `$cmd`;
    chomp($total_seq_len);

    ## Choose the order according to total sequence length. The limits
    ## here are somewhat arbitrary, I should come back to this problem
    ## some time, in order to define some better criterion for
    ## defininig the Markov order as a function of sequence size.
    if ($total_seq_len <= 10000) {
      $main::param{oligo_min_mkv} = $main::param{oligo_max_mkv} = 1;
    } elsif ($total_seq_len <= 1e+5) {
      $main::param{oligo_min_mkv} = $main::param{oligo_max_mkv} = 2;
    } elsif ($total_seq_len <= 1e+6) {
      $main::param{oligo_min_mkv} = $main::param{oligo_max_mkv} = -3;
    } else {
      $main::param{oligo_min_mkv} = $main::param{oligo_max_mkv} = -2;
    }
    &RSAT::message::Info("Automatic choice of Markov order.", "L=".sprintf("%.1e", $total_seq_len), "m=".$main::param{oligo_min_mkv})
      if ($main::verbose >= 2);

    ## We need to redefine file names after having chosen the Markov
    ## order.  Sequence length computation however required first to
    ## prepare the sequences, which required a first pass throught
    ## SetOutFileNames(). We run it this twice.
    &SetOutFileNames();
  }

  &SynthesisSequenceComposition() if ($progressive_synthesis);

  ## Run motif discovery algorithms
  if ($task{disco}) {
    &OligoAnalysis() if ($disco{oligos});
    &DyadAnalysis() if ($disco{dyads});
    &PositionAnalysis() if ($disco{positions});
  }
  &MergeWords() if ($task{merge_words});

  ## MEME motif discovery
  &CalcMemeBackground() if ($task{meme_bg});
  &RunMEME() if (($task{meme}) || (($task{disco}) && ($disco{meme}))); ## Not yet clear to me (JvH) whether meme should be a separat etask or a disco algo that runs with task disco

  ## DREME motif discovery
  &RunDREME() if (($task{dreme}) || (($task{disco}) && ($disco{dreme}))); ## Not yet clear to me (JvH) whether dreme should be a separat etask or a disco algo that runs with task disco

  ## ChIPMunk motif discovery
  &RunChIPMunk() if  (($task{chipmunk}) || (($task{disco}) && ($disco{chipmunk}))); ## Not yet clear to me (JvH) whether chipmunk should be a separat etask or a disco algo that runs with task disco

  ## Merge all discovered motifs
  &MergeMotifs() if ($task{merge_motifs});

  ## Split motifs from the merged file for some motif-wise tasks
  &SplitMotifs() if ($task{split_motifs});

    #&MotifsVersusMotifs() if ($task{motifs_vs_motifs});

  &ClusterMotifs() if ($task{cluster_motifs});

  ## HTML synthesis by algorithm
  &SynthesisMotifsByAlgo() if ($progressive_synthesis);

  ## Check the number of discovered motifs before running further analyses
  &ReadMotifTable() unless (scalar(@motif_ids) > 1); ## Don't re-read the table if already done
  if ($motif_nb > 0) {
    &ScanSequences() if ($task{scan});

    &SynthesisByMotif() if ($progressive_synthesis);

    &SynthesisMergedSites() if ($progressive_synthesis);

    &SynthesisMotifCompa() if ($progressive_synthesis);
  } else {
    &RSAT::message::Warning("Not a single discovered motif. Reducing the order of the Markov model may increase sensitivity.");
  }

  ## Write the time spent by each program in a time table
  if ($task{timelog}) {
    &TimeLog();
    &TimeTable();
  }

  ## Report parameters and files
  &SynthesisParamsAndFiles() if ($progressive_synthesis);

  &CleanSequences() if ($task{clean_seq});

  ## Archiving has to be done before synthesis in order to enable
  ## liking the archive file in the HTML synthesis. However, the
  ## archive has to be updated after the synthesus, in order to
  ## onclude the HTML synthesis file.
  &Archive(1) if ($task{archive});

  &CloseSynthesis() if ($progressive_synthesis); ## Needs to be closed before re-building the report

  ## For the final synthesis, re-write the whole synthesis file
  if ($task{synthesis}) {
    $progressive_synthesis = 0;
    &Synthesis();
  }
  
  ## Will create a separate html file
  ## with a very synthetic view of the results
  &SmallSummary() if ($task{small_summary});

  if ($main::verbose >= 1) {
    &RSAT::message::TimeWarn("Log file", $main::outfile{log}) if ($main::verbose >= 2);
  }

  ################################################################
  ## Close output stream
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
  print $main::out $exec_time if ($main::verbose >= 1);	## only report exec time if verbosity is specified
  close $main::out if ($main::outfile{output});
  exit(0);
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################


################################################################
## Display full help message
sub PrintHelp {
  system "pod2text -c $0";
  exit()
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}


################################################################
## Set parameter values for all the programs
sub DefaultParameters {
  &RSAT::message::TimeWarn("\n; Setting default parameter values") if ($main::verbose >= 2);

  ## Formats
  $main::param{seq_format} = "fasta"; push(@main::param_list, "seq_format");
#  $main::param{img_format} = "pdf"; push(@main::param_list, "img_format");
  $main::param{img_format} = "png"; push(@main::param_list, "img_format");
  $main::param{r_plot} = 0; ## Use R to plot XY graph
  $main::param{img_border} = 1; ## Border for the images in the synthetic table (generally blue because linked to the image file)
  $main::param{logo_table_height} = 50; ## Height for displaying logos in the synthetic tables
  $main::param{iupac_coloring} = 0; ## Color IUPAC degenerate letters in the consensus of HTML reports
  $main::param{archive_format} = "zip"; ## Supported: zip, tar, tgz


  ## Purge-seq
  $main::param{purge_match_length} = 40; push(@main::param_list, "purge_match_length");
  $main::param{purge_mismatches} = 3; push(@main::param_list, "purge_mismatches");

  ################################################################
  ## Residue  profiles (position-analysis)
  $main::param{origin} = "center"; push(@main::param_list, "origin");
  $main::param{offset} = 0; push(@main::param_list, "offset");
  $main::param{profiles_ci} = 20; push(@main::param_list, "profiles_ci");
  $main::param{profiles_max_graphs} = 20; push(@main::param_list, "profiles_max_graphs");
  $main::param{profiles_strand} = "-1str"; push(@main::param_list, "profiles_strand");
  @main::composition_oligo_lengths = (1,2);
  $main::param{composition_oligo_lengths} = join(',',@composition_oligo_lengths); push(@main::param_list, "composition_oligo_lengths");


  ################################################################
  ## Beware: computational profiles are always computed with the
  ## option -ovlp (count all occurrences), to avoid weird effect:
  ## with the -noov mode, the transition tables are unbalanced even
  ## when the input sequences are generated with equiprobable
  ## residues. This is consistent with the fact that those profiles
  ## further serve to estimate the probability of larger words, which
  ## may include repeated residues. Using the option -noov here would
  ## induce an under-estimation of the expected frequency for words
  ## containing repeated residues.
  $main::param{profiles_noov} = "-ovlp"; push(@main::param_list, "profiles_noov");

  ################################################################
  ## Motif discovery options
  $main::param{strand} = "-2str"; push(@main::param_list, "strand");
  $main::param{disco_noov} = "-noov"; push(@main::param_list, "disco_noov");
  $main::param{disco_pseudo} = 0.01;
  $main::param{oligo_min_len} = 6; push(@main::param_list, "oligo_min_len");
  $main::param{oligo_max_len} = 7; push(@main::param_list, "oligo_max_len");
  $main::param{merge_lengths} = 1; ## Merge oligos of different lengths before assembly
  $main::param{patterns_max_rank} = 100; push(@main::param_list, "patterns_max_rank");

  ## Thresholds for oligo-analysis, dyad-analysis and oligo-diff
  $main::param{min_ratio} = 1; push(@main::param_list, "min_ratio");
  $main::param{min_zscore} = 6; push(@main::param_list, "min_zscore");
  $main::param{min_sig} = 0; push(@main::param_list, "min_sig");

  ## oligo-analysis
  $main::param{oligo_min_mkv} = -2; push(@main::param_list, "oligo_min_mkv");
  $main::param{oligo_max_mkv} = -2; push(@main::param_list, "oligo_max_mkv");
  $main::param{under_represented} = 0; push(@main::param_list, "under_represented"); ## search for under-represented words

  ## dyad-analysis

  ## position-analysis
  $main::param{positions_ci} = 50; ## Class interval
  $main::param{positions_min_occ} = 1; ## Min number of occurrences
  $main::param{positions_max_graphs} = 20; push(@main::param_list, "positions_max_graphs")
;
 
  ## matrix-from-patterns
  $main::param{asmb_toppat} = $main::param{patterns_max_rank}; push(@main::param_list, "asmb_toppat");
  $main::param{max_asmb_width} = 20; push(@main::param_list, "max_asmb_width");
  $main::param{matrix_nmotifs} = 3; push(@main::param_list, "matrix_nmotifs");


  ################################################################
  ## matrix-scan options
#  $main::param{scan_strands} = "-2str";
  $main::param{scan_pseudo} = 1;
  $main::param{scan_decimals} = 1;
  $main::param{scan_bg_pseudo} =  0.01;
  $main::param{scan_markov} = 1; ## Used for computing the background model in inclusive format (required by matrix-scan-quick).

  ## Options used for site scanning but not for enrichment analysis
  $main::param{scan_min_score} = 7.5;

  ## Options used for enrichment in matrix hits but not for site detection
  $main::param{scan_enrich_min_score} = -5;

  ## Default MEME options
  $main::param{meme_text} = ""; push @MEME_options, "text";	## Ouptut format = text
  $main::param{meme_dna} = ""; push @MEME_options, "dna"; ## Sequence type == dna
  $main::param{meme_mod} = "anr"; push @MEME_options, "mod"; ## Accept any number of occurrences per sequence
  $main::param{meme_minw} = 12; push @MEME_options, "minw"; ## Minimal motif width
  $main::param{meme_maxw} = 12; push @MEME_options, "maxw"; ## Maximal motif width
  $main::param{meme_nmotifs} = $main::param{matrix_nmotifs}; push @MEME_options, "nmotifs"; ## Number of motifs
  $main::param{meme_evt} = 1; push @MEME_options, "evt"; ## upper threshold on E-value
  $main::param{meme_maxsize} = 10000000; push @MEME_options, "maxsize"; ## maximum size for the sequence set
  $main::param{bg_meme_markov} = 3; ## Markov order for MEME background model

  ## Default DREME options
  $main::param{dreme_e} = 0.05; push @DREME_options, "e";	## E-value threshold
  $main::param{dreme_m} = $main::param{matrix_nmotifs}; push @DREME_options, "m";	## max number of motifs
  $main::param{dreme_mink} = $main::param{oligo_min_len}; push @DREME_options, "mink";	## min width of core motif
  $main::param{dreme_maxk} = $main::param{oligo_max_len}; push @DREME_options, "maxk";	## max width of core motif

  ################################################################
  ## Default ChIPMunk options
  ##
  ## java -cp "some_dir" ru_genetika.ChIPMunk <start_motif_length>
  ## <stop_motif_length> <verbose>=(y)es|(n)o
  ## <mode>=oops|zoops_factor=0.0..1.0 <x:input_set1>..<x:input_setN>
  ## <try_limit> <step_limit> <iter_limit> <thread_count>
  ## <seeds>=random|filename.mfa <gc%>=0.
  $main::param{chipmunk_start_motif_length} = 12;
  $main::param{chipmunk_stop_motif_length} = 12;
  $main::param{chipmunk_verbose} = "yes";
  $main::param{chipmunk_mode} = "oops";
  $chipmunk_suffix = join("_", "_w",
			  $main::param{chipmunk_start_motif_length},
			  $main::param{chipmunk_stop_motif_length},
			  $main::param{chipmunk_mode},
			 );

  ## TO BE TESTED
#   ## Facultative ChIPMunk options - to evaluate
#   $main::param{chipmunk_try_limit} = 100;
#   $main::param{chipmunk_step_limit} = 10;
#   $main::param{chipmunk_iter_limit} = 1;
#   $main::param{chipmunk_thread_count} = 7;
#   $main::param{chipmunk_gc} = 0.5;

}



################################################################
## Set output file names
sub SetOutFileNames {

  ## Reset @pattern_types and @patterns_to_merge to empty lists,
  ## because &SetOutFileNames() can be called twice with the option
  ## $auto_markov.
  @pattern_types = ();
  @patterns_to_merge = ();
  @oligos_to_merge = ();
  @local_words_to_merge = ();
  @positions_to_merge = ();


  ################################################################
  ## Output file names for peaks and their composition analysis (apply
  ## to both test and control sequences).
  foreach my $seq_type (@seq_types) {
    my @seq_suffix = ();
    if (defined($main::param{max_seq_len})) {
      push @seq_suffix, "maxlen".$main::param{max_seq_len};
    }
    if (defined($main::param{top_peaks})) {
      push @seq_suffix, "top".$main::param{top_peaks};
    }


    ## Converted sequences
    $main::outfile{$seq_type."_converted"} = &OutFileName("data/sequences",
							  ".fasta",
							  $seq_type,
							  @seq_suffix);

    ## Purged sequences
    $main::outfile{$seq_type."_purged"} = &OutFileName("data/sequences",
						       ".fasta",
						       $seq_type,
						       @seq_suffix,
						       "purged",
						       "ml".$main::param{purge_match_length},
						       "mis".$main::param{purge_mismatches},
						      );

#    ## Prefix for sequence files
#    $seqfile{$seq_type} = $main::infile{$seq_type."_seq"};

    ## Sequence lengths
    $main::outfile{$seq_type."_seqlen"} = &OutFileName("data/sequences", ".tab", $seq_type."_seqlen");
    $main::outfile{$seq_type."_seqlen_distrib"} = &OutFileName("data/sequences", ".tab", $seq_type."_seqlen_distrib");
    $main::outfile{$seq_type."_seqlen_distrib_graph"} = &OutFileName("data/sequences", ".".$param{img_format}, $seq_type."_seqlen_distrib");

    ################################################################
    ## Compositional Profiles
    for my $ol (@composition_oligo_lengths) {

      ################################################################
      ## Profiles of oligo frequencies as a function of the position
      ##
      $main::outfile{$ol."nt_".$seq_type."_profiles"} = &OutFileName("results/composition", ".tab", $seq_type."_profiles".$main::param{profiles_strand}.$main::param{profiles_noov},
								     $ol."nt","ci".$main::param{profiles_ci});
      push @timelog_keys, $ol."nt_".$seq_type."_profiles";

      ## oligo frequencies in the sequence set
      $main::outfile{$ol."nt_".$seq_type."_freq"} = &OutFileName("results/composition", ".tab", $seq_type."_freq"."-1str".$main::param{profiles_noov},$ol."nt");
      push @timelog_keys, $ol."nt_".$seq_type."_freq";
      $main::outfile{$ol."nt_".$seq_type."_transitions"} = &OutFileName("results/composition", ".tab", $seq_type."_transitions"."-1str".$main::param{profiles_noov},$ol."nt");
      $main::outfile{$ol."nt_".$seq_type."_inclusive"} = &OutFileName("results/composition", ".txt", $seq_type."_inclusive"."-1str".$main::param{profiles_noov},$ol."nt");
      $main::outfile{$ol."nt_".$seq_type."_heatmap"} = &OutFileName("results/composition", ".".$main::param{img_format}, $seq_type."_heatmap"."-1str".$main::param{profiles_noov},$ol."nt");
      ## HTML index to the individual oligonucleotide profiles
      $main::outfile{$ol."nt_".$seq_type."_profiles_index"} = $main::outfile{$ol."nt_".$seq_type."_profiles"};
      $main::outfile{$ol."nt_".$seq_type."_profiles_index"} =~ s/\.tab$//;
      $main::outfile{$ol."nt_".$seq_type."_profiles_index"} .= "_graph_index.html";

      ## Graph prefix for positional profiles
      $main::outfile{$ol."nt_".$seq_type."_profiles_graph"} = $main::outfile{$ol."nt_".$seq_type."_profiles"};
      $main::outfile{$ol."nt_".$seq_type."_profiles_graph"} =~ s/\.tab$//;
      $main::outfile{$ol."nt_".$seq_type."_profiles_graph"} .= ".".$main::param{img_format};
    }
  }


  ################################################################
  ## Background model file for matrix scanning (has been computed with
  ## the composition profiles).
  $main::param{scan_bg_file} = $main::outfile{$main::param{scan_bg_ol}."nt_test_inclusive"};
  &RSAT::message::Info("Background model file",
		       "order=".$main::param{scan_markov},
		       "bg_ol=".$main::param{scan_bg_ol},
		       "key=".$main::param{scan_bg_ol}."nt_test_inclusive",
		       "file=".$main::param{scan_bg_file}
		      ) if ($main::verbose >= 3);

  ################################################################
  ## BED files for loading test peaks in genome browser (e.g. UCSC)
  ##
  if (($main::infile{coord}) || ($main::param{seq_source})) {
    ## TO DISCUSS: SHOULD WE ALSO EXPORT CONTROL PEAKS IN BED FORMAT ?
    ## CAN BE PROBLEMATIC BECAUSE A USER CAN ENTER GALAXY-FORMATTED SEQ
    ## FOR TEST BUT NOT FOR CONTROL. IN THE MEAN TIME I DESACTIVATE THIS
    ## OPTION.
    $seq_type = "test";
    $main::outfile{$seq_type."_seq_coord"} = &OutFileName("data/sequences", ".bed", $seq_type."_seqcoord");
    push @bed_files, $main::outfile{$seq_type."_seq_coord"};
  }


  ################################################################
  ## Merged sites
  $main::outfile{merged_sites} = &OutFileName("results/sites", ".tab", "all_motifs_seqcoord");
  $main::outfile{merged_sites_bed} = &OutFileName("results/sites", ".bed", "all_motifs_seqcoord");


  ################################################################
  ## Motif discovery
  my $pattern_type = "";

  ## Iterate lengths for word-counting programs (oligo-diff,
  ## oligo-analysis, position-analysis, local-word-analysis).
  for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {

    ################################################################
    ## oligo-diff (requires control sequences)
    if ($disco{oligo_diff}) {
      $pattern_type = 'oligo_diff_'.$len.'nt';
      $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "oligo_diff".$main::param{strand}.$main::param{disco_noov},$len."nt");
      push @pattern_types, $pattern_type unless ($main::param{merge_lengths});
      push @patterns_to_merge, $pattern_type;
      push @oligos_to_merge, $pattern_type;
    }

    ################################################################
    ## oligo-analysis
    if ($disco{oligos}) {

      ## Test versus control sequences
      if ($main::infile{ctrl_seq}) {

	$main::param{oligo_bg_suffix} = "test_vs_ctrl";
	$pattern_type = 'oligos_'.$len.'nt_'.$main::param{oligo_bg_suffix};

	## Background models estimated from the control sequences
	## (Markov chain m = k - 1).
	my $mkv = $len -1;
	$main::outfile{$pattern_type."_bg_file"} = &OutFileName("results/".$pattern_type, ".tab", "oligos_bg_from_ctrl_".$len."nt".$main::param{strand}.$main::param{disco_noov});
#	&RSAT::message::Debug("bg_file", $pattern_type."_bg_file", $main::outfile{$pattern_type."_bg_file"}) if ($main::verbose >= 10);


	## oligo-analysis result file
	$main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "oligos".$main::param{strand}.$main::param{disco_noov},
						     $len."nt", $main::param{oligo_bg_suffix});
#	&RSAT::message::Debug("pattern_file", $pattern_type, $main::outfile{$pattern_type}) if ($main::verbose >= 10);
	push @pattern_types, $pattern_type unless ($main::param{merge_lengths});
	push @patterns_to_merge, $pattern_type;
	push @oligos_to_merge, $pattern_type;

      } else {

	## Background models estimated from the test sequences
	## themselves (Markov chain m <= k -2)

	## Convert negative markov orders relative to the current oligo length
	unless (($main::param{oligo_min_mkv} eq "auto") ||
		($main::param{oligo_max_mkv} eq "auto")) {
	  $min_mkv = $main::param{oligo_min_mkv}; $min_mkv += $len if ($min_mkv < 0);
	  $max_mkv = $main::param{oligo_max_mkv}; $max_mkv += $len if ($max_mkv < 0);
	  $main::param{oligo_bg_suffix} = "mvk".$main::param{oligo_min_mkv}."_".$main::param{oligo_max_mkv};
	  for my $markov ($min_mkv..$max_mkv) {
	    my $oligo_bg_suffix = "mkv".$markov;
	    ## oligo-analysis result file
	    $pattern_type = 'oligos_'.$len.'nt_'.$oligo_bg_suffix;
	    $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "oligos".$main::param{strand}.$main::param{disco_noov},
							 $len."nt", $oligo_bg_suffix);
	    push @pattern_types, $pattern_type unless ($main::param{merge_lengths});
	    push @patterns_to_merge, $pattern_type;
	    push @oligos_to_merge, $pattern_type;
	  }
	}
      }
    }

    
    ################################################################
    ## position-analysis
    if ($disco{positions}) {
      $pattern_type = 'positions_'.$len.'nt';
      $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", 
						   "positions".$main::param{strand}.$main::param{disco_noov},
						   $len."nt", "ci".$main::param{positions_ci});
      push @pattern_types, $pattern_type unless ($main::param{merge_lengths});
      push @patterns_to_merge, $pattern_type;
      push @positions_to_merge, $pattern_type;
    }
  }


  ################################################################
  ## Merge oligos of different lengths before pattern assembly
  $main::param{oligo_range} = $main::param{oligo_min_len}."-".$main::param{oligo_max_len}."nt";
  if ($main::param{merge_lengths}) {

    ## Merge position-analysis result files
    if ($disco{positions}) {
      my $pattern_type = "positions_".$main::param{oligo_range};
      $main::outfile{$pattern_type} =
	  &OutFileName("results/".$pattern_type, ".tab", 
		       "positions".$main::param{strand}.$main::param{disco_noov},
		       $main::param{oligo_range}, "ci".$main::param{positions_ci});
      push @pattern_types, $pattern_type;
      &RSAT::message::Debug("Merged file", $pattern_type, $main::outfile{$pattern_type}) if ($main::verbose >= 5);
    }


    
    ## Merge oligo-analysis result files
    if ($disco{oligos}) {
      $main::param{oligo_bg_suffix} = "mvk";
      if (($main::param{oligo_min_mkv} eq "auto") ||
	  ($main::param{oligo_min_mkv} eq "auto")) {
	$main::param{oligo_bg_suffix} .=  "auto";
      } else {
	$main::param{oligo_bg_suffix} .= $main::param{oligo_min_mkv}."_".$main::param{oligo_max_mkv};
      }
      my $pattern_type = "oligos_".$main::param{oligo_range};
      $main::outfile{$pattern_type} =
	  &OutFileName("results/".$pattern_type, ".tab", "oligos".$main::param{strand}.$main::param{disco_noov},
		       $main::param{oligo_range}, $main::param{oligo_bg_suffix});
      push @pattern_types, $pattern_type;
      &RSAT::message::Debug("Merged file", $pattern_type, $main::outfile{$pattern_type}) if ($main::verbose >= 5);
    }

    ## Merge oligo-diff result files
    if ($disco{oligo_diff}) {
      my $pattern_type = "oligo_diff_".$main::param{oligo_range};
      $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "oligo_diff".$main::param{strand}.$main::param{disco_noov},$main::param{oligo_range});
      push @pattern_types, $pattern_type;
      &RSAT::message::Debug("Merged file", $pattern_type, $main::outfile{$pattern_type}) if ($main::verbose >= 5);
    }
  }


  ################################################################
  ## dyad-analysis
  if ($disco{dyads}) {
    ## Test versus control sequences
    if ($main::infile{ctrl_seq}) {
      $pattern_type = 'dyads_test_vs_ctrl';

      ## Background models estimated from the control sequences
      $main::outfile{$pattern_type."_bg_file"} = &OutFileName("results/".$pattern_type, ".tab", "dyads_bg_from_ctrl".$main::param{strand}.$main::param{disco_noov},"3nt_sp0-20_bg_monads");

    } else {
      $pattern_type = "dyads";
    }
    $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "dyads".$main::param{strand}.$main::param{disco_noov},"3nt_sp0-20_bg_monads");
    push @pattern_types, $pattern_type;
    push @patterns_to_merge, $pattern_type;
    &RSAT::message::Debug("Merged file", $pattern_type, $main::outfile{$pattern_type}) if ($main::verbose >= 5);
  }

  
  ################################################################
  ## We desactivate the graph display because the layout is really too
  ## ugly. Users will be suggested to open the graph with Cytoscape
  ## until we find the time to implement a nice graph layout
  ## procedure.
  $main::param{display_graphs} = 0;

  ################################################################
  ## Comparison between significance of the discovered patterns
  $pattern_type = "merged_words";
  $main::outfile{merged_words} = &OutFileName("results/".$pattern_type, ".tab", "merged_words");
  $main::outfile{merged_words_html} = &OutFileName("results/".$pattern_type, ".html", "merged_words");
  $main::outfile{merged_words_heatmap} = &OutFileName("results/".$pattern_type, ".png", "merged_words_heatmap");
  if ($disco{merged_words}) {
    push @pattern_types, $pattern_type;
  }

  &RSAT::message::Info("Pattern types", join (",", @pattern_types)) if ($main::verbose >= 2);
  &RSAT::message::Info("Sequence types", join (",", @seq_types)) if ($main::verbose >= 2);

  ## Conversion from patterns to matrices + logos
  foreach my $pattern_type (@pattern_types) {
    $main::prefix{$pattern_type.'_pssm'} = $main::outfile{$pattern_type};
    $main::prefix{$pattern_type.'_pssm'} =~ s/\.tab$//;
    $main::prefix{$pattern_type.'_pssm'} .= "_pssm";
    $main::outfile{$pattern_type.'_2pssm'} = $main::prefix{$pattern_type.'_pssm'}."_log.txt"; push @timelog_keys, $pattern_type.'_2pssm';
    $main::outfile{$pattern_type.'_asmb'} = $main::prefix{$pattern_type.'_pssm'}.".asmb";
    $main::outfile{$pattern_type.'_pssm_sig'} = $main::prefix{$pattern_type.'_pssm'}."_sig_matrices.tf";
#    $main::outfile{$pattern_type.'_pssm_gibbs'} = $main::prefix{$pattern_type.'_pssm'}."_gibbs_matrices.txt";
    $main::outfile{$pattern_type.'_pssm_counts'} = $main::prefix{$pattern_type.'_pssm'}."_count_matrices.txt";
    $main::outfile{$pattern_type.'_pssm_tf'} = $main::prefix{$pattern_type.'_pssm'}."_count_matrices.tf";
    foreach my $logo_nb (1..$main::param{matrix_nmotifs}) {
      $main::outfile{$pattern_type.'_pssm_logo'.$logo_nb} =
	$main::prefix{$pattern_type.'_pssm'}."_count_matrices_logo_m".$logo_nb.".".$main::param{img_format};
      $main::outfile{$pattern_type.'_pssm_logo_rc'.$logo_nb} =
	$main::prefix{$pattern_type.'_pssm'}."_count_matrices_logo_m".$logo_nb."_rc.".$main::param{img_format};
    }
    push @timelog_keys, $pattern_type;
    push @timetable_keys, $pattern_type;
    push @timetable_keys, $pattern_type."_pssm";
  }

  ## Output files for MEME
  if (($disco{meme}) || ($task{meme})) {
    $main::outfile{meme_bg_file} = &OutFileName("results/meme", ".txt", "meme_bg_mkv".$main::param{bg_meme_markov});
    &CalcMemeOptions(); ## this also computes the suffix of the meme output file
    $main::outfile{meme} = &OutFileName("results/meme", ".txt", "meme_".$meme_suffix);
    $main::outfile{meme_tf} = &OutFileName("results/meme", ".tf", "meme_".$meme_suffix);
    $main::outfile{meme_logos} = &OutFileName("results/meme", "", "meme_".$meme_suffix."_logos");
    push @timetable_keys, "meme";
  }

  ## Output files for DREME
  if (($disco{dreme}) || ($task{dreme})) {
    &CalcDremeOptions(); ## this also computes the suffix of the dreme output file
    $main::outfile{dreme} = &OutFileName("results/dreme", ".txt", "dreme_".$dreme_suffix);
    $main::outfile{dreme_tf} = &OutFileName("results/dreme", ".tf", "dreme_".$dreme_suffix);
    $main::outfile{dreme_logos} = &OutFileName("results/dreme", "", "dreme_".$dreme_suffix."_logos");
    push @timetable_keys, "dreme";
  }

  ## Output files and options for ChIPMunk
  if (($disco{chipmunk}) || ($task{chipmunk})) {
    $main::outfile{chipmunk} = &OutFileName("results/chipmunk", ".txt", "chipmunk_".$chipmunk_suffix);
    #    $main::outfile{chipmunk_tf} = &OutFileName("results/chipmunk", ".tf", "chipmunk_".$chipmunk_suffix);
    #    $main::outfile{chipmunk_logos} = &OutFileName("results/chipmunk", "", "chipmunk_".$chipmunk_suffix."_logos");
    push @timetable_keys, "chipmunk";
  }

  ################################################################
  ## Clustering of the discovered motifs + comparison with reference motif(s)

  ## Merged motifs
  $main::outfile{"motifs_discovered"} = &OutFileName("results/discovered_motifs", ".tf", "motifs_discovered");

  ## Table with the list of separated motifs
  $main::outfile{"motifs_discovered_table"} = &OutFileName("results/discovered_motifs", ".tab", "motifs_discovered_table");

  ## Clustered motifs
  $main::outfile{"motifs_disco_clusters"} = &OutFileName("results/discovered_motifs", "", "motifs_discovered_clusters");
  $main::outfile{"motifs_disco_clusters_html"} = &OutFileName("results/discovered_motifs", ".html", "motifs_discovered_clusters_SUMMARY");


  ## Motif clustering
  if ($task{cluster_motifs}) { 

    ## THIS IS TEMPORARILY INACTIVATED UNTIL I FINALIZE MOTIF CLUSTERING
    $main::outfile{motifs_disco_clusters_mcl} = &OutFileName("results/discovered_motifs", ".mcl", "motifs_disco_clusters");
    $main::outfile{motifs_disco_clusters_tab} = &OutFileName("results/discovered_motifs", ".tab", "motifs_disco_clusters");
    $main::outfile{motifs_disco_clusters_graph} = &OutFileName("results/discovered_motifs", ".tab", "motifs_disco_clusters_graph");
    $main::outfile{motifs_disco_clusters_graph_gml} = &OutFileName("results/discovered_motifs", ".gml", "motifs_disco_clusters_graph");
    $main::outfile{motifs_disco_clusters_graph_png} = &OutFileName("results/discovered_motifs", ".png", "motifs_disco_clusters_graph") if ($main::param{display_graphs});
    $main::outfile{motifs_disco_compa_cluster_intra_degree} = &OutFileName("results/discovered_motifs", ".tab", "motifs_disco_compa_cluster_intra_degree");
  #    $main::outfile{motifs_disco_ref} = &OutFileName("results/discovered_motifs", ".tf", "motifs_disco_ref");
  }

  
}

################################################################
## Convert, truncate, select top and purge sequences
sub PurgeSeq {
  my ($seq_type) = @_;

#  my $seq_file = $seqfile{$seq_type};
  my $seq_file = $main::infile{$seq_type."_seq"};

  ## Convert sequences to standard fasta format, mask non-DNA seqences
  ## selectd top peaks if required, and truncate sequences to max
  ## length if specified.
  &RSAT::message::TimeWarn("Converting sequences", $seq_type, $seq_file) if ($main::verbose >= 2);
  my $cmd = $SCRIPTS."/convert-seq";
  $cmd .= " -i ".$seq_file;
  $cmd .= " -from ".$main::param{seq_format};
  $cmd .= " -to fasta";
  $cmd .= " -mask non-dna";

  ## Select top peaks if required
  if (defined($main::param{top_peaks})) {
    &RSAT::message::Info("\tSelecting top peaks", $main::param{top_peaks}) if ($main::verbose >= 2);
    $cmd .= " -top ".$main::param{top_peaks};
  }

  ## Truncate sequences if required
  if (defined($main::param{max_seq_len})) {
    my $from = -round($main::param{max_seq_len}/2);
    my $to = $from + $main::param{max_seq_len} -1;
    &RSAT::message::Info("\tTruncating to ", $main::param{max_seq_len}." bp max") if ($main::verbose >= 2);
    $cmd .= "| ".$SCRIPTS."/sub-sequence";
    #    $cmd .= " -i ".$main::infile{$seq_type.'_seq'};
    $cmd .= " -origin ".$main::param{origin};
#    $cmd .= " -offset ".$main::param{offset}; ## NOTE JvH 2016-02-07: I should add an option -offset to sub-sequence
    $cmd .= " -from ".$from;
    $cmd .= " -to ".$to;
  }
  $cmd .= " -o ".$main::outfile{$seq_type."_converted"};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

  ## Purge sequences (mask redundant fragments)
  &RSAT::message::TimeWarn("Purging sequences", $seq_type, $main::outfile{$seq_type."_converted"}) if ($main::verbose >= 2);
  $cmd = $SCRIPTS."/purge-sequence -dna";
  $cmd .= " -i ".$main::outfile{$seq_type."_converted"};
  $cmd .= " -ml ".$main::param{purge_match_length};
  $cmd .= " -mis ".$main::param{purge_mismatches};
  $cmd .= " -o ".$main::outfile{$seq_type."_purged"};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
}


################################################################
## Compute sequence lengths
sub SequenceLengths {
  my ($seq_type) = @_;

  &RSAT::message::TimeWarn("Computing sequence lengths", $seq_type) if ($main::verbose >= 2);
  my $cmd = $SCRIPTS."/sequence-lengths";
  #  $cmd .= " -i ".$main::infile{$seq_type.'_seq'};
  $cmd .= " -i ".$main::outfile{$seq_type."_purged"};
  $cmd .= " -o ".$main::outfile{$seq_type.'_seqlen'};
  $cmd .= " ; ".$SCRIPTS."/classfreq -v 1 -col 2";
  $cmd .= " -i ".$main::outfile{$seq_type.'_seqlen'};
  $cmd .= " -ci ".$main::param{profiles_ci};
  $cmd .= " -o ".$main::outfile{$seq_type.'_seqlen_distrib'};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

  ## Plot the distribution of sequence lengths
  $cmd = $SCRIPTS."/XYgraph -lines -pointsize 0";
  $cmd .= " -format ".$main::param{img_format};
  my $title = "Peak lengths";
  $title .= "; ".$main::param{title} if ($main::param{title});
  $cmd .= " -title '".$title."'";
  $cmd .= " -xsize 700 -xcol 3 -xleg1 'Peak length'";
  $cmd .= " -ysize 250 -ycol 4 -yleg1 'Number of peaks'";
  $cmd .= " -xmin 0 -ymin 0";
#  $cmd .= " -xgstep1 ".($main::param{profiles_ci});
  $cmd .= " -xgstep2 ".$main::param{profiles_ci};
  $cmd .= " -i ".$main::outfile{$seq_type.'_seqlen_distrib'};
  $cmd .= " -r_plot" if ($main::param{r_plot});
  $cmd .= " -o ".$main::outfile{$seq_type.'_seqlen_distrib_graph'};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

  ################################################################
  ## Convert the genomic coordinates
  ## if these are embeded in the fasta headers
  ##
  ## TO DISCUSS: SHOULD WE ALSO EXPORT CONTROL PEAKS IN BED FORMAT ?
  ## CAN BE PROBLEMATIC BECAUSE A USER CAN ENTER GALAXY-FORMATTED SEQ
  ## FOR TEST BUT NOT FOR CONTROL. IN THE MEAN TIME I DESACTIVATE THIS
  ## OPTION.
  if (($main::param{seq_source})
      && ($seq_type eq "test")) {
    &RSAT::message::TimeWarn("Converting fasta headers to BED file") if ($main::verbose >= 2);
    $cmd = $SCRIPTS."/convert-features";
    $cmd .= " -i ".$main::infile{$seq_type.'_seq'};
    $cmd .= " -from ".$main::param{seq_source}."_seq"; ## e.g. galaxy_seq
    $cmd .= " -to bed ";
    my $bed_prefix = $main::param{title};
    $bed_prefix .= " ".$seq_type;
    my $bed_color;
    if ($seq_type eq "ctrl") {
      $bed_color = "128,128,128";
    } else {
      $bed_color = "13,115,67";
    }
    my $bed_header= "track name=\"".$seq_type."_peaks\" description=\"RSAT peak-motifs_quick ".$bed_prefix.": peaks\" visibility=2 use_score=1 color=".$bed_color;
    $cmd .= " | sed '1s/^.*/".$bed_header."/' "; ## change first line
    $cmd .= " > ".$main::outfile{$seq_type."_seq_coord"};
#    print $cmd, "\n";
#    die "HELLO";
    &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
  }
}

################################################################
## Run position-analysis to compute composition profiles (residues,
## dinucleotides) anw count-words to compute background models for
## sequence scanning.
sub CompositionProfiles {
  my ($seq_type) = @_;

  ################################################################
  ## Compute nucleotide and dinucleotide frequencies
  &RSAT::message::TimeWarn("Computing nucleotide and dinucleotide frequencies") if ($main::verbose >= 2);
  for my $ol (@composition_oligo_lengths) {

    ## Compute background models of order 0 (Bernoulli) and 1 (Markov)
    ## from the sequences
    $cmd = $BIN."/count-words -v 1";
    $cmd .= " -i ".$main::outfile{$seq_type."_purged"};
    $cmd .= " -l ".$ol;
    $cmd .= " -1str";
    $cmd .= " ".$main::param{profiles_noov} if ($main::param{profiles_noov} eq "-noov"); ## The option -ovlp is not supported by count-words
    $cmd .= " > ".$main::outfile{$ol."nt_".$seq_type."_freq"};

    &RSAT::util::one_command($cmd, 1, $main::outfile{$ol."nt_".$seq_type."_freq"}."_time.txt", log=>$main::out);

    ## Convert background model in INCLUSIVE format for matrix-scan-quick
    $cmd = $SCRIPTS."/convert-background-model -from oligos -to inclusive ";
    $cmd .= " -i ".$main::outfile{$ol."nt_".$seq_type."_freq"};
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_inclusive"};

    ## Convert background model to transition table and draw the heatmap of transition probabilities
    $cmd .= "; ".$SCRIPTS."/convert-background-model -from oligos -to transitions ";
    $cmd .= " -i ".$main::outfile{$ol."nt_".$seq_type."_freq"};
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_transitions"};
#    if ($ol==1) {
#      $cmd .= " ; cut -f 1-5";
#    } else {
      $cmd .= " ; cut -f 1-5,7";
#    }
    $cmd .= " ".$main::outfile{$ol."nt_".$seq_type."_transitions"};
    $cmd .= " | ".$SCRIPTS."/draw-heatmap -min 0 -max 1 -digits 3 -out_format png -col_width 50";
    $cmd .= " -rownames";
    $cmd .= " -r_plot" if ($main::param{r_plot});
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_heatmap"};
    &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
  }

  ################################################################
  ## Compute position profiles for 1nt and 2nt
  &RSAT::message::TimeWarn("Computing composition profiles") if ($main::verbose >= 2);
  for my $ol (@composition_oligo_lengths) {

    ## Compute positional profiles
    &RSAT::message::TimeWarn("Computing composition profiles", $ol."nt") if ($main::verbose >= 2);
    my $cmd =$SCRIPTS."/position-analysis -v 1";
    $cmd .= " -i ".$main::outfile{$seq_type."_purged"};
    $cmd .= " -format fasta";
    $cmd .= " -sort ";
    $cmd .= " -return html,chi,sig,distrib,graphs,rank,index";
    $cmd .= " -max_graphs ".$main::param{profiles_max_graphs} if ($main::param{profiles_max_graphs}  > 0);
    $cmd .= " ".$main::param{profiles_strand};
    $cmd .= " ".$main::param{profiles_noov};
    $cmd .= " -seqtype dna";
    $cmd .= " -l ".$ol;
    $cmd .= " -ci ".$main::param{profiles_ci};
    $cmd .= " -img_format ".$main::param{img_format};
    $cmd .= " -title '".$main::param{title}."'";
    $cmd .= " -origin ".$main::param{origin};
    $cmd .= " -offset ".$main::param{offset};
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_profiles"};
    &RSAT::util::one_command($cmd, 1, $main::outfile{$ol."nt_".$seq_type."_profiles"}."_time.txt", log=>$main::out);

    ## Draw a XY graph with composition profiles
    my $color_file = $ENV{RSAT}."/perl-scripts/lib/color_palettes/".$ol."nt".$main::param{profiles_strand}."_colors.tab";
    my $col_nb = 4**$ol + 1;
    $cmd = 'grep -v ";" '.$main::outfile{$ol."nt_".$seq_type."_profiles"};
    $cmd .= ' | cut -f 1,10-10000'; ## suppress columns preceding the position frequencies
    $cmd .= ' | '.$SCRIPTS.'/transpose-table';
#    $cmd .= ' | grep -P \'(^id)|(^\-?\d+)\'';
    $cmd .= " | ".$SCRIPTS."/XYgraph -xcol 1 -ycol 2-".$col_nb;
#   $cmd .= " -xgstep1 ".($main::param{profiles_ci});
    $cmd .= " -xgstep2 ".$main::param{profiles_ci};
    $cmd .= " -vline '#666666' 0";
    $cmd .= " -xsize 700 -ysize 250";
    $cmd .= " -r_plot" if ($main::param{r_plot});
    $cmd .= " -format ".$main::param{img_format};
    $cmd .= " -legend -lines -pointsize 0 -legend -header";
    if (-e $color_file) {
      $cmd .= " -colors ".$color_file;
    } else {
      &RSAT::message::Warning("Cannot find residue color specification file", $color_file) if ($main::verbose >= 2);
   }
    #    $cmd .= " -symbols "; ## THIS OPTION IS NOT WORKING ANYMORE : THE LEGEND DISPLAYS SYMBOLS BUT NOT THE GRAPH. THIS HAS TO BE FIXED

    my $title;
    if ($ol == 1) {
      $title = "Nucleotide";
    } elsif ($ol == 2) {
      $title = "Dinucleotide";
    } else {
      $title = $ol."nucleotide";
    }
    $title .= " composition profiles";

    $title .= "; ".$seq_type." sequence";
    $title .= "; ".$main::param{title} if ($main::param{title});
    $cmd .= " -title '".$title."'";
    $cmd .= " -xleg1 'Position' -xsize 700";
    $cmd .= " -yleg1 'Occurrences' -ysize 250  -ymin 0";
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_profiles_graph"};
    &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
  }
}


################################################################
## Compute background models for oligo-analysis and dyad-analysis from
## the control sequence file.
##
## Beware: when control sequences are provided, the background models
## are computed with the oligonucleotide lengths (from min to max)
## used for oligo-analysis. For an oligo length of k, the markov order
## is thus m = k - 1.
##
sub ComputeBgModels {
  my ($bg_seq) = @_;

  ## Build background models for oligo-analysis (same oligo lengths as
  ## for the analysis of the test sequences).
  if ($disco{oligos}) {
    &RSAT::message::TimeWarn("Computing expected oligo frequencies from control set.") if ($main::verbose >= 2);
    for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {
      my $pattern_type = 'oligos_'.$len.'nt_test_vs_ctrl';
      #    my $pattern_type = 'oligos_'.$len.'nt'.'_mkv'.$markov;
      my $cmd = $SCRIPTS."/oligo-analysis -v 1";
      $cmd .= " -quick";
      $cmd .= " -i ".$main::outfile{"ctrl_purged"};
      $cmd .= " -format fasta";
      $cmd .= " -return occ,freq";
      $cmd .= " ".$main::param{strand};
      $cmd .= " ".$main::param{disco_noov} if ($main::param{disco_noov} eq "-noov"); ## The option -ovlp is not supported by count-words
#      $cmd .= " ".$main::param{disco_noov};
      $cmd .= " -seqtype dna";
      $cmd .= " -l ".$len;
      $cmd .= " -o ".$main::outfile{$pattern_type."_bg_file"};
      &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
    }
  }

  ## Build background model for dyad-analysis (dyad counts/frequencies
  ## in control sequences).
  if ($disco{dyads}) {
    &RSAT::message::TimeWarn("Computing expected dyad frequencies from control set.") if ($main::verbose >= 2);
    my $pattern_type = 'dyads_test_vs_ctrl';
    my $cmd = $SCRIPTS."/dyad-analysis -v 1";
    $cmd .= " -i ".$main::outfile{"ctrl_purged"}; ## NOTE: AS A NEGATIVE CONTROL, USE SAME FILE AS INPUT AND CTRL, AND SET PSEUDO TO 0.
    $cmd .= " -quick";
    $cmd .= " -format fasta";
    $cmd .= " -return occ,freq";
    $cmd .= " ".$main::param{strand};
    $cmd .= " ".$main::param{disco_noov} if ($main::param{disco_noov} eq "-noov"); ## The option -ovlp is not supported by count-words
#    $cmd .= " ".$main::param{disco_noov};
    $cmd .= " -seqtype dna";
    $cmd .= " -l 3 -sp 0-20 ";
    $cmd .= " -o ".$main::outfile{$pattern_type."_bg_file"};
    &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
  }
}

################################################################
## Run oligo-analysis on the test set
sub OligoAnalysis {
  my @oligo_files = ();
  for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {

    ## Define Markov order(s) depending on the background model type
    ## (from control sequences or from test sequence).
    my @markov_orders = ();
    my @oligo_types = ();
    if ($main::infile{ctrl_seq}) {
      ## When control sequences are provided, oligo-analysis runs with
      ## a single markov model (m = k-1) trained on these sequences.
      push @markov_orders, $len - 1;
      push @oligo_types, 'oligos_'.$len.'nt_test_vs_ctrl';
    } else {
      for my $markov ($main::param{oligo_min_mkv}..$main::param{oligo_max_mkv}) {
	$markov += $len if ($markov < 0); ## Convert negative markov orders relative to the oligo length
	my $pattern_type = 'oligos_'.$len.'nt'.'_mkv'.$markov;
	push @markov_orders, $markov;
	push @oligo_types, $pattern_type;
      }
    }

    for my $i (0..$#oligo_types) {
      my $pattern_type = $oligo_types[$i];
      push @oligo_files, $main::outfile{$pattern_type};
      my $markov = $markov_orders[$i];

      &RSAT::message::TimeWarn("Running oligo-analysis", $pattern_type) if ($main::verbose >= 2);
      my $cmd = $SCRIPTS."/oligo-analysis -v 1";
      $cmd .= " -quick";
      $cmd .= " -i ".$main::outfile{"test_purged"};
      $cmd .= " -format fasta";
      $cmd .= " -sort";
      $cmd .= " -lth ratio ".$main::param{min_ratio};
      $cmd .= " -lth occ_sig ".$main::param{min_sig};
      $cmd .= " -uth rank ".$main::param{patterns_max_rank};
      $cmd .= " -return occ,proba,rank";
      $cmd .= " ".$main::param{strand};
      $cmd .= " ".$main::param{disco_noov};
      $cmd .= " -seqtype dna";
      $cmd .= " -l ".$len;
      if ($main::infile{ctrl_seq}) {
	&ComputeBgModels($main::infile{ctrl_seq}) unless -e ($main::outfile{$pattern_type."_bg_file"});
	$cmd .= " -expfreq ".$main::outfile{$pattern_type."_bg_file"};
      } else {
	$cmd .= " -markov ".$markov;
      }
     if ($main::param{under_represented} == 1) {
       $cmd .= " -under ";
       }
      $cmd .= " -pseudo ".$main::param{disco_pseudo};
      $cmd .= " -o ".$main::outfile{$pattern_type};
      &RSAT::util::one_command($cmd, 1, $main::outfile{$pattern_type}."_time.txt", log=>$main::out);
      &MatrixFromPatterns($main::outfile{$pattern_type}, $main::prefix{$pattern_type.'_pssm'}, $pattern_type, 0) unless ($main::param{merge_lengths});
    }
  }

  ## Merge oligo-analysis result files
  if ($main::param{merge_lengths}) {
    &MergeAndBuildMatrix("oligos_".$main::param{oligo_range}, @oligo_files);
  }
    
}

################################################################
## Run oligo-diff to compare the test set to the control set
sub OligoDiff {
  for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {
    my $pattern_type = 'oligo_diff_'.$len.'nt';
    &RSAT::message::TimeWarn("Running oligo-diff", $len."nt") if ($main::verbose >= 2);
    my $cmd = $SCRIPTS."/oligo-diff -v 3";
    $cmd .= " -test ".$main::outfile{"test_purged"};
    $cmd .= " -ctrl ".$main::outfile{"ctrl_purged"};
    $cmd .= " -nopurge"; ## The sequences have already been purged
    $cmd .= " -l ".$len;
    $cmd .= " ".$main::param{strand};
    $cmd .= " ".$main::param{disco_noov};
    $cmd .= " -side test";
    #    $cmd .= " -sort";
    $cmd .= " -lth ratio ".$main::param{min_ratio};
    $cmd .= " -lth occ_sig ".$main::param{min_sig};
    $cmd .= " -uth rank ".$main::param{patterns_max_rank};
    #    $cmd .= " -return occ,proba,rank";
    #    $cmd .= " -seqtype dna";
    #    $cmd .= " -pseudo ".$main::param{disco_pseudo};
    $cmd .= " -o ".$main::outfile{$pattern_type};
    &RSAT::util::one_command($cmd, 1, $main::outfile{$pattern_type}."_time.txt", log=>$main::out);
    &MatrixFromPatterns($main::outfile{$pattern_type}, $main::prefix{$pattern_type.'_pssm'}, $pattern_type, 0);
  }
}


################################################################
## Run dyad-analysis on the test set
sub DyadAnalysis {
  &RSAT::message::TimeWarn("Running dyad-analysis") if ($main::verbose >= 2);

  ## Test versus control sequences
  if ($main::infile{ctrl_seq}) {
    $pattern_type = 'dyads_test_vs_ctrl';
  } else {
    ## Single-input mode
    $pattern_type = "dyads";
  }

  my $cmd = $SCRIPTS."/dyad-analysis -v 1";
  $cmd .= " -i ".$main::outfile{"test_purged"};
  $cmd .= " -quick";
  $cmd .= " -format fasta";
  $cmd .= " -sort";
  $cmd .= " -lth ratio ".$main::param{min_ratio};
  $cmd .= " -lth occ_sig ".$main::param{min_sig};
  $cmd .= " -uth rank ".$main::param{patterns_max_rank};
  $cmd .= " -return occ,proba,ratio,zscore,rank";
  $cmd .= " ".$main::param{strand};
  $cmd .= " ".$main::param{disco_noov};
  $cmd .= " -seqtype dna";
  $cmd .= " -l 3 -sp 0-20 ";
  if ($main::infile{ctrl_seq}) {
    &ComputeBgModels($main::infile{ctrl_seq}) unless -e ($main::outfile{$pattern_type."_bg_file"});
    $cmd .= " -expfreq ".$main::outfile{$pattern_type."_bg_file"};
  } else {
    $cmd .= " -bg monads";
  }
  $cmd .= " -pseudo ".$main::param{disco_pseudo};
  $cmd .= " | perl -pe 's/n\\{0\\}//g'"; ## Suppress zero spacings for merging with oligos
  $cmd .= " >".$main::outfile{$pattern_type};
  &RSAT::util::one_command($cmd, 1, $main::outfile{$pattern_type}."_time.txt", log=>$main::out);
  &MatrixFromPatterns($main::outfile{$pattern_type}, $main::prefix{$pattern_type."_pssm"}, $pattern_type, 0);
}

################################################################
## Run position-analysis to discover oligonucleotides with positional
## biases
sub PositionAnalysis {
  my @position_files = ();
  for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {
    my $pattern_type = 'positions_'.$len.'nt';
    push @position_files, $main::outfile{$pattern_type};
    &RSAT::message::TimeWarn("\n Running position-analysis", $len."nt") if ($main::verbose >= 2);
    my $cmd = $SCRIPTS."/position-analysis -v 1";
    $cmd .= " -i ".$main::outfile{"test_purged"};
    $cmd .= " -format fasta";
    $cmd .= " -sort ";
#    $cmd .= " -return html,chi,sig,distrib,exp_occ,graphs,rank,index";
    $cmd .= " -return chi,sig,rank,distrib,index";
    $cmd .= " -max_graphs ".$main::param{patterns_max_rank};
    $cmd .= " ".$main::param{strand};
    $cmd .= " ".$main::param{disco_noov};
    $cmd .= " -seqtype dna";
    $cmd .= " -l ".$len;
    $cmd .= " -ci ".$main::param{positions_ci};
    $cmd .= " -lth_occ ".$main::param{positions_min_occ};
    $cmd .= " -lth_sig ".$main::param{min_sig};
    $cmd .= " -uth_rank ".$main::param{patterns_max_rank};
    $cmd .= " -img_format ".$main::param{img_format};
    $cmd .= " -title '".$main::param{title}."'";
    $cmd .= " -origin ".$main::param{origin};
    $cmd .= " -offset ".$main::param{offset};
    $cmd .= " -max_graphs ".$main::param{positions_max_graphs} if ($main::param{positions_max_graphs}  > 0);
    $cmd .= " -o ".$main::outfile{$pattern_type};
    &RSAT::util::one_command($cmd, 1, $main::outfile{$pattern_type}."_time.txt", log=>$main::out);
    &MatrixFromPatterns($main::outfile{$pattern_type}, $main::prefix{$pattern_type."_pssm"}, $pattern_type, 0) 
	unless ($main::param{merge_lengths});
  }

  ## Merge position-analysis result files
  if ($main::param{merge_lengths}) {
    &MergeAndBuildMatrix("positions_".$main::param{oligo_range}, @position_files);
  }

}

################################################################
## Merge a set of oligo files and use it as input for
## matrix-from-patterns.
sub MergeAndBuildMatrix {
  my ($pattern_type, @pattern_files) = @_;
  my $cmd = "cat ";
  $cmd .= join " ", @pattern_files;
  $cmd .= "> ".$main::outfile{$pattern_type};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
  &MatrixFromPatterns($main::outfile{$pattern_type}, $main::prefix{$pattern_type."_pssm"}, $pattern_type, 0);
}

################################################################
## Concatenate the options for MEME and compute the suffix
sub CalcMemeOptions {
  $meme_options = ""; ## Must be a global variable
  $meme_suffix = ""; ## Must be a global variable

  ## sequence file
  $meme_options .= " ".$main::outfile{"test_purged"};

  ## strands
  $meme_suffix .= $main::param{strand};
  unless ($main::param{strand} eq "-1str") {
    $meme_options .= " -revcomp";
  }

  ## If background sequences have been specified, check that
  ## background model file exists.
  if ($main::outfile{meme_bg_file}) {
    $meme_options .= " -bfile ".$main::outfile{meme_bg_file};
  }


  foreach my $option (@MEME_options) {
#    &RSAT::message::Debug("adding meme option", $option, $main::param{"meme_".$option}) if ($main::verbose >= 10);
    $meme_options .= " -".$option." ".$main::param{"meme_".$option};
    unless ($option eq "bfile") {
      $meme_suffix .= "_".$option.$main::param{"meme_".$option};
    }
  }
}

################################################################
## Concatenate the options for DREME and compute the suffix
sub CalcDremeOptions {
  $dreme_options = ""; ## Must be a global variable
  $dreme_suffix = ""; ## Must be a global variable

  ## sequence file
  $dreme_options .= " -p ".$main::outfile{"test_purged"};

  ## strands: there is apparently no strand option in the current release of DREME (MEME 4.6.1)

  ## If background sequences have been specified, sue them as negative sequence file for DREME
  if (($main::infile{ctrl_seq})) {
    $dreme_options .= " -n ".$main::outfile{"ctrl_purged"};
    $dreme_options .= "_test_vs_ctrl";
  }

  foreach my $option (@DREME_options) {
#    &RSAT::message::Debug("adding dreme option", $option, $main::param{"dreme_".$option}) if ($main::verbose >= 10);
    $dreme_options .= " -".$option." ".$main::param{"dreme_".$option};
    unless ($option eq "bfile") {
      $dreme_suffix .= "_".$option.$main::param{"dreme_".$option};
    }
  }
}

################################################################
## Compute background for MEME
sub CalcMemeBackground {
  my $cmd = "cat ".$main::outfile{"test_purged"}." | ";
  $cmd .= &RSAT::server::GetProgramPath("fasta-get-markov", $die_on_error);
  $cmd .= " -m ".$main::param{bg_meme_markov};
  $cmd .= " > ".$main::outfile{meme_bg_file};
  &RSAT::util::one_command($cmd, 1, $main::outfile{meme_bg_file}."_time.txt", log=>$main::out);
}


################################################################
## Run MEME
sub RunMEME {
  my $die_on_error = 0;
  my $meme_cmd = &RSAT::server::GetProgramPath("meme", $die_on_error);
  my $cmd = "";

  ## Make sure the background file is computed before running MEME
  &CalcMemeBackground unless (-e $main::outfile{meme_bg_file});

  ################################################################
  ## MEME does not accept sequences shorter than 8
  ## However, peas are usually smaller than 8bp -> we skip this conversion
#  my $min_seq_len = &max(8, $main::param{meme_minw});
#  $cmd = "convert-seq ";
#  $cmd .= " -from fasta -to fasta -skip_short ".$min_seq_len;
#  $cmd .= " -i ".$main::outfile{"test_purged"};
#  $cmd .= " -o ".$main::outfile{"test_purged_noshort"};

  ## Run the MEME command
  $cmd .= $meme_cmd;
  $cmd .= " ".$meme_options;
  $cmd .= " > ".$main::outfile{meme};
  &RSAT::util::one_command($cmd, 1, $main::outfile{meme}."_time.txt", log=>$main::out);

  ## Convert the matrix
  $cmd = $SCRIPTS."/convert-matrix -i ".$main::outfile{meme};
  $cmd .= " -from meme";
  $cmd .= " -to transfac";
  $cmd .= " -return counts,parameters,logo";
  $cmd .= " -logo_file ".$main::outfile{meme_logos};
  $cmd .= " -o ".$main::outfile{meme_tf};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
}

################################################################
## Run DREME
sub RunDREME {
  my $die_on_error = 0;
  my $dreme_cmd = &RSAT::server::GetProgramPath("dreme", $die_on_error);
  my $cmd = "";

  ################################################################
  ## DREME does not accept sequences shorter than 8
  ## However, peas are usually smaller than 8bp -> we skip this conversion
#  my $min_seq_len = &max(8, $main::param{dreme_minw});
#  $cmd = "convert-seq ";
#  $cmd .= " -from fasta -to fasta -skip_short ".$min_seq_len;
#  $cmd .= " -i ".$main::outfile{"test_purged"};
#  $cmd .= " -o ".$main::outfile{"test_purged_noshort"};

  ## Run the DREME command
  $cmd .= $dreme_cmd;
  $cmd .= " ".$dreme_options;
  $cmd .= " > ".$main::outfile{dreme};
  &RSAT::util::one_command($cmd, 1, $main::outfile{dreme}."_time.txt", log=>$main::out);

  ## Convert the matrix
#  $cmd = $SCRIPTS."/convert-matrix -i ".$main::outfile{dreme};
#  $cmd .= " -from dreme";
#  $cmd .= " -to transfac";
#  $cmd .= " -return counts,parameters,logo";
#  $cmd .= " -logo_file ".$main::outfile{dreme_logos};
#  $cmd .= " -o ".$main::outfile{dreme_tf};
#  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
}

################################################################
## Run ChIPMunk
sub RunChIPMunk {
  my $die_on_error = 0;
  my $chipmunk_dir = $ENV{chipmunk_dir};
  unless ($chipmunk_dir) {
   &RSAT::error::FatalError("ChIPMunk directory must be specified in the file RSAT_config.props (chipmunk_dir=[ChIPMunk installation path]). ");
  }


  my $cmd = "java -cp '".$chipmunk_dir."'";
  $cmd .= " ru_genetika.ChIPMunk";


  $chipmunk_options .= " ".$main::param{chipmunk_start_motif_length};
  $chipmunk_options .= " ".$main::param{chipmunk_stop_motif_length};
  $chipmunk_options .= " ".$main::param{chipmunk_verbose};
  $chipmunk_options .= " ".$main::param{chipmunk_mode};
  $chipmunk_options .= " s:".$main::outfile{"test_purged"};

  ## Run the CHIPMUNK command
  $cmd .= " ".$chipmunk_options;
  $cmd .= " > ".$main::outfile{chipmunk};
  &RSAT::util::one_command($cmd, 1, $main::outfile{chipmunk}."_time.txt", log=>$main::out);

#   ## Convert the matrix
#   $cmd = $SCRIPTS."/convert-matrix -i ".$main::outfile{chipmunk};
#   $cmd .= " -from chipmunk";
#   $cmd .= " -to transfac";
#   $cmd .= " -return counts,parameters,logo";
#   $cmd .= " -logo_file ".$main::outfile{chipmunk_logos};
#   $cmd .= " -o ".$main::outfile{chipmunk_tf};
#  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

}

################################################################
## Compare the significance of words (oligos, dyads) discovered by
## the different approaches
sub MergeWords {
  &RSAT::message::Debug("Merging words", join(",", @patterns_to_merge)) if ($main::verbose >= 2);
  my $cmd = $SCRIPTS."/compare-scores -v 1 ";
  my $file_nb = 0;
  foreach my $pattern_type (@patterns_to_merge) {
    &RSAT::message::Debug("Merging words of type", $pattern_type) if ($main::verbose >= 5);
    next if ($pattern_type eq "merged_words"); ## avoid recycling the merged words from a previous run in the compilation
    my $pattern_file = $main::outfile{$pattern_type};
    if (-e $pattern_file) {
      my $sig_col;
      $file_nb++;
      $cmd .= " -i ".$pattern_file;
      if ($pattern_type =~ /oligos_/) {
	$sig_col = 8;
      } elsif ($pattern_type =~ /oligo_diff/) {
	$sig_col = 11;
      } elsif ($pattern_type =~ /dyads/) {
	$sig_col = 8;
      } elsif ($pattern_type =~ /local_words/) {
	$sig_col = 9;
      } elsif ($pattern_type =~ /positions/) {
	$sig_col = 9;
      } else {
	&RSAT::message::Warning("Unknown score column for pattern type", $pattern_type);
	next;
      }
      $cmd .= " -sc".$file_nb." ".$sig_col;
      $cmd .= " -suppress ".$main::dir{output}."/"."results/".$pattern_type."/";
      #    &RSAT::message::Debug($file_nb, $pattern_type, $sig_col, "\n", $pattern_file) if ($main::verbose >= 5);
    } else {
      &RSAT::message::Warning("Missing pattern file", $pattern_type, $pattern_file) if ($main::verbose >= 2);
    }
  }
  $cmd .= " -ic 1";
  $cmd .= " -lc";
  $cmd .= " -null .";
  $cmd .= " -suppress ".$main::param{prefix}."_";
  $cmd .= " -suppress '\.tab'";
  $cmd .= " | ".$SCRIPTS."/row-stats -after 1 -sort ";
  $cmd .= " -o ".$main::outfile{merged_words};
  &RSAT::util::one_command($cmd, 1, $main::outfile{merged_words}."_time.txt", log=>$main::out);

  ## Generate a HTML format of the table (convenient for sorting the
  ## words according to different columns)
  $cmd = $SCRIPTS."/text-to-html -i ".$main::outfile{merged_words};
  $cmd .= " -o ".$main::outfile{merged_words_html};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

  ## Draw a heat map of the word significance table
  $cmd = $SCRIPTS."/draw-heatmap -min 0 -max 10 -rownames -digits 2 -out_format png";
  $cmd .= " -r_plot" if ($main::param{r_plot});
  $cmd .= " -col_width 40 -rownames -gradient fire";
  $cmd .= " -row_height 16";
  $cmd .= " -i ".$main::outfile{merged_words};
  $cmd .= " -o ".$main::outfile{merged_words_heatmap};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

  &RSAT::message::TimeWarn("Word comparison table", $main::outfile{merged_words}) if ($main::verbose >= 2);

  ## Extract position-specific scoring matrices from the merged words
  if ($disco{merged_words}) {
      my $pattern_type = "merged_words";
      &MatrixFromPatterns($main::outfile{$pattern_type}, $main::prefix{$pattern_type.'_pssm'}, $pattern_type, 4);
  }
}


################################################################
## Merge all discoverd motifs in a single file
sub MergeMotifs {
  &RSAT::message::TimeWarn("Merging discovered motifs", join(",", @pattern_types)) if ($main::verbose >= 2);

  ## Remove previous versions of the merged motif file
  my $cmd = "rm -f ".$main::outfile{motifs_discovered};

  ## Initialize the motif file with the reference motif if provided
#  if (defined($main::infile{ref_motifs})) {
#    $cmd = "cp -f";
#    $cmd .= " ".$main::outfile{ref_motifs_transfac};
#    $cmd .= " ".$main::outfile{motifs_discovered};
#  }

  ################################################################
  ## Concatenate all discovered motifs (matrices) in a single file.
  ## Use TRANSFAC format because if allows to associate a name to each
  ## matrix.

  foreach my $pattern_type (@pattern_types) {
    my $matrix_file = $main::outfile{$pattern_type.'_pssm_tf'};

    if (-e $matrix_file) {
      $cmd .= "; cat ".$matrix_file." >> ".$main::outfile{motifs_discovered};
    } else {
      &RSAT::message::Warning("Missing matrix file", $pattern_type, $matrix_file) if ($main::verbose >= 2);
    }
  }
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

  &RSAT::message::TimeWarn("Merged discovered motifs", $main::outfile{motifs_discovered}) if ($main::verbose >= 2);
}


################################################################
## Load the discovered motifs from the merged .tf file
sub ReadDiscoveredMotifs {

  ## If motifs have already been read, avoir re-reading them
  return (@motifs) if ($motifs_read);

  ## make sure the list is empty since it is a global variable
  @motifs = ();

  ################################################################
  ## Load background model for computing the consensus and logo of each motif
  &RSAT::message::Info("Loading background model from file", $main::param{scan_bg_file}) if ($main::verbose >= 3);
  local $bg_model = new RSAT::MarkovModel();
  if (-e $main::param{scan_bg_file}) {
    $bg_model->load_from_file($main::param{scan_bg_file}, "motifsampler");
  } else {
    &RSAT::message::Warning("Cannot generate synthesis by motifs because the background model has not been computed yet")
      if ($main::verbose >= 1);
    return;
  }

  ################################################################
  ## Read the motifs and compute their consensus
  if (-e $main::outfile{motifs_discovered}) {
    @motifs = &RSAT::MatrixReader::readFromFile($main::outfile{motifs_discovered}, "transfac");
#    my $motif_nb = scalar(@motifs);

    foreach my $matrix (@motifs) {
      my $motif_id = $matrix->get_attribute("id");
      &SetFileNamesForMotif($motif_id);

      ## We compute the consensus and logos here with the same background model as used for sequence scanning
      $matrix->setMarkovModel($bg_model);
      $matrix->calcConsensus();

      ## Index motifs by ID
      $motif_by_id{$motif_id} = $matrix;
    }

    &RSAT::message::Info($motif_nb." discovered motifs read from file", $main::outfile{motifs_discovered}) if ($main::verbose >= 3);
  } else {
    &RSAT::message::Warning("Discovered motif file does not exist",  $main::outfile{motifs_discovered});
  }

  $motifs_read = 1;
  return @motifs;
}


################################################################
## Specify the names of all files associated to one discovered motif
## and index those file names for post-processing and synthesis.
sub SetFileNamesForMotif {
  my ($motif_id) = @_;

  ## Define a specific sub-directory for each motif type (algorithm)
  my $motif_subdir = "results/discovered_motifs/".$motif_id;

  ## Only index the output directories once because this routine may be
  ## called several times (by &SplitMotifs() and &ReadMotifTable()).
  unless (defined($main::dir{$motif_id})) {
    $main::dir{$motif_id.'_subdir'} = $subdir; 

    ## Path for indexing
    my $motif_dir = $dir{output}."/".$motif_subdir;
    $main::dir{$motif_id} = $motif_dir; push @outdir, $motif_id;
  }

  ## Prefix for motif files
  $main::prefix{$motif_id."_prefix"} = &OutFileName($motif_subdir, "", $motif_id);

  ## Output file for the motif (transfac format)
  ($main::outfile{$motif_id.'_tf'}, $subdir) = &OutFileName($motif_subdir, ".tf", $motif_id);
#  $main::outfile{$motif_id.'_tf'} = $motif_file;
  #      &RSAT::message::Debug("motif subdir", $motif_id, $subdir) if ($main::verbose >= 5);


#  $main::prefix{$motif_id."_prefix"} = $motif_prefix;

#  $main::outfile{$motif_id."_tab"} = &OutFileName($motif_subdir, ".tab", $motif_id);

  ## Tab-delimited matrix file
  $main::outfile{$motif_id."_tab"} = $main::prefix{$motif_id."_prefix"}.".tab";

  ## Sequence logos
  $main::prefix{$motif_id."_logo"} = $main::prefix{$motif_id."_prefix"}."_logo";
  $main::outfile{$motif_id."_logo"} = $main::prefix{$motif_id."_logo"}.".".$main::param{img_format};
  $main::prefix{$motif_id."_logo_rc"} = $main::prefix{$motif_id."_prefix"}."_logo_rc";
  $main::outfile{$motif_id."_logo_rc"} = $main::prefix{$motif_id."_logo_rc"}.".".$main::param{img_format};
#  $main::outfile{$motif_id."_logo_rc"} = $main::prefix{$motif_id."_prefix"}."_logo_rc";

  ## Predicted sites (sequence scanning)
  $main::outfile{$motif_id.'_pssm_sites'} = $main::prefix{$motif_id."_prefix"}."_sites.tab";
  push @site_files, $main::outfile{$motif_id.'_pssm_sites'};
  if (($main::infile{coord})||($main::param{seq_source})) {
    $main::outfile{$motif_id.'_pssm_sites_genomic'} = $main::prefix{$motif_id."_prefix"}."_sites_genomic.bed";
    push @bed_files, $main::outfile{$motif_id.'_pssm_sites_genomic'};
  }
  $main::outfile{$motif_id.'_pssm_site_distrib'} = $main::prefix{$motif_id."_prefix"}."_site_distrib.tab";
  $main::outfile{$motif_id.'_pssm_site_distrib_graph'} = $main::prefix{$motif_id."_prefix"}."_site_distrib.".$main::param{img_format};
  $main::outfile{$motif_id.'_pssm_sites_per_peak'} = $main::prefix{$motif_id."_prefix"}."_sites_per_peak.tab";
  $main::outfile{$motif_id.'_pssm_sites_per_peak_graph'} = $main::prefix{$motif_id."_prefix"}."_sites_per_peak.".$main::param{img_format};

  ## Enrichment in binding sites
  $main::outfile{$motif_id.'_pssm_enrichment'} = $main::prefix{$motif_id."_prefix"}."_scan_mkv".$main::param{scan_markov}."_enrichment.tab";
  $main::outfile{$motif_id.'_pssm_enrichment_png'} = $main::prefix{$motif_id."_prefix"}."_scan_mkv".$main::param{scan_markov}."_enrichment.png";

}

################################################################
## Split the discovered motifs in separate files in order to analyze
## them separately. One separate directory is created for each motif.
sub SplitMotifs {
  &RSAT::message::TimeWarn("Splitting discovered motifs in separate files") if ($main::verbose >= 2);

  @motifs = &ReadDiscoveredMotifs();
  my $m = 0;

  ## Create a table with the lists of motifs
  my $motif_table = &OpenOutputFile($main::outfile{motifs_discovered_table});

  ## Print the header of the motif table
  print $motif_table join("\t", "#nb", "identifier", "directory", "prefix", "transfac_format_file"), "\n";

  if (scalar(@motifs) == 0) {
    print $motif_table "; NO MOTIF FOUND\n";
    &RSAT::message::Warning("No motif found") if ($main::verbose >= 2);
  } else {
    ## Number of digits for the counter of split matrix files
    my $id_digits;
    $id_digits = POSIX::ceil(log(scalar(@motifs))/log(10));

    ## Store each motif in a separate directory and file
    foreach my $motif (@motifs) {
      $m++;
      ## Get the motif ID or, if not defined, create an ID
      my $motif_id = $motif->get_attribute("id");
      unless ($motif_id) {
	## Use the same number of digits for all matrices so that the
	## alphabetical order ot the splitted files corresponds to the
	## numerical order of the matrices.
	my $id_nb = sprintf "%${id_digits}s", $m;
	$id_nb =~ s/ /0/g;
	$motif_id = "discomotif_".$id_nb;
      }

      ## Define all the file names for the current motif
      &SetFileNamesForMotif($motif_id);

      ## Print the motif in transfac format
      $current_out = &OpenOutputFile($main::outfile{$motif_id.'_tf'});
      print $current_out $motif->toString(sep=>"\t",
					  type=>"counts",
					  format=>"transfac",
					 );
      close($current_out);

      print $motif_table join("\t", $m, $motif_id, $subdir, $main::prefix{$motif_id."_prefix"}, $main::outfile{$motif_id.'_tf'}), "\n";

      ## Convert the motif to a tab-delimited file
      my $cmd = $SCRIPTS."/convert-matrix -v 0";
      $cmd .= " -i ".$main::outfile{$motif_id.'_tf'};
      $cmd .= " -from transfac -to tab";
      $cmd .= " -return counts";
      $cmd .= " -o ".$main::outfile{$motif_id."_tab"};
      &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
      #    &RSAT::message::Debug("&SplitMotifs()", "motif", $m, $motif_id, $main::outfile{$motif_id.'_tf'}) if ($main::verbose >= 10);
    }
  }
  close $motif_table;

  &RSAT::message::Info("Motif table",$main::outfile{motifs_discovered_table}) if ($main::verbose >= 2);
}


################################################################
## Identify clusters of similar motifs, and build
## consensus motifs.
sub ClusterMotifs {
  my $cmd = $SCRIPTS."/matrix-clustering -v 0";
  $cmd .= " -matrix '".$main::param{prefix}."' ".$main::outfile{motifs_discovered};
  $cmd .= " transfac ";
  $cmd .= " -title '".$main::param{title}."'";
  $cmd .= " -lth Ncor 0.4 -lth cor 0.5 -lth w 5 ";
  $cmd .= " -hclust_method average -label_in_tree name ";
  $cmd .= " -metric_build_tree Ncor -return heatmap,align_consensus"; 
  $cmd .= " -o ".$main::outfile{motifs_disco_clusters};

  &RSAT::util::one_command($cmd, 1, $main::outfile{"motifs_disco_clusters"}."_time.txt", log=>$main::out);
}


################################################################
## Read the list of discovered motifs (produced by the method
## &SplitMotifs()) and index them. This is required for several
## post-processing methods.
sub ReadMotifTable {
  unless (-e $main::outfile{motifs_discovered_table}) {
    &RSAT::message::Warning("Discovered motif table does not exist", $main::outfile{motifs_discovered_table});
    return;
  }
  my ($in) = &OpenInputFile($main::outfile{motifs_discovered_table});
  local $motif_id;
  local $motif_dir;
  local $motif_file;
  while (<$in>) {
    next unless (/\S/); # Skip empty lines
    next if (/^;/); # skip comment lines
    next if (/^#/); # skip header line
    chomp();
    ($motif_nb, $motif_id, $motif_dir, $motif_prefix, $motif_file) = split("\t");
    push @motif_ids, $motif_id;
    push @motif_dirs, $motif_dir;
    push @motif_prefixes, $motif_prefix;
    push @motif_files, $motif_file;

    &SetFileNamesForMotif($motif_id);
#    &RSAT::message::Debug("Indexed motif", $motif_nb, $motif_id, $motif_dir, $motif_file) if ($main::verbose >= 10);
  }
  close ($in);
}


################################################################
## Convert Word assemblies into PSSMs
sub MatrixFromPatterns {
  my ($pattern_file, $pssm_file, $pattern_type, $score_column) = @_;
  &RSAT::message::TimeWarn("Matrix from patterns", $pattern_type) if ($verbose >= 3);
  &RSAT::message::Debug("Matrix from pattern",
			"\n\tpattern_type: ".$pattern_type,
			"\n\tpattern_file: ".$pattern_file,
			"\n\tpssm_file: ".$pssm_file,
      ) if ($verbose >= 5);
  my $cmd = $SCRIPTS."/matrix-from-patterns -v 1 ";

  ## Done: add the option -sites (request by Anthony Mathelier,
  ## 2017-08-08). 
  ##
  ## TO DO: add a link to the sites used to build the PSSM +
  ## check that these are included in the transfac-formatted output
  ## file.
  $cmd .= " -sites";
#  $cmd .= " -seq ".$infile{test_seq};
  $cmd .= " -seq ".$main::outfile{"test_converted"};
  $cmd .= " -pl ".$pattern_file;
  $cmd .= " -bgfile ".$main::param{scan_bg_file};
  $cmd .= " -toppat ".$main::param{asmb_toppat};
  $cmd .= " -max_asmb_nb ".$main::param{matrix_nmotifs};
  $cmd .= " -max_asmb_width ".$main::param{max_asmb_width};
  $cmd .= " -sc ".$score_column if ($score_column > 0);
  if ($pattern_type eq "dyads") {
    $cmd .= " -subst 0";
#    $cmd .= " -match 5";
#    $cmd .= " -weight 5";
  } else {
    $cmd .= " -subst 1";
  }
  $cmd .= " -prefix ".$pattern_type;
  $cmd .= " -flanks 2";
  $cmd .= " -collect_method matrix-scan-quick";
  $cmd .= " -logo";
  $cmd .= " -o ".$pssm_file;
  &RSAT::util::one_command($cmd, 1, $pssm_file."_time.txt", log=>$main::out);
}


################################################################
## Compute profile of enrichment of the test peaks in hits for a given
## matrix (reference or discovered motif).
sub MotifEnrichment {
  my ($seq_file, $matrix_file, $out_file, $graph_file) = @_;

  ## Compute the significance of enrichment in matrix hits
  $cmd = $SCRIPTS."/matrix-scan -v 1";
  $cmd .= " -quick";	   ## TO DO : CHECK IF THIS AFFECTS THE RESULT
  $cmd .= " -seq_format fasta -i ".$seq_file;
  $cmd .= " -matrix_format transfac -m ".$matrix_file;
  #    $cmd .= " -bg_format inclusive -bgfile ".$main::param{scan_bg_file};
  $cmd .= " ".$main::param{scan_options};
  $cmd .= " ".$main::param{scan_enrich_options};
  $cmd .= " -return distrib -return occ_proba";
  #    $cmd .= " -lth occ_sig 0  -uth occ_sig_rank 1";
  $cmd .= " -o ".$out_file;
  #    &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
  &RSAT::util::one_command($cmd, 1, $out_file."_time.txt", log=>$main::out);

  ## Draw a XY plot of enrichment in matrix hits
  $cmd = $SCRIPTS."/XYgraph ";
  $cmd .= " -i ".$out_file;
  $cmd .= " -xcol 2 -xleg1 'Weight score'";
  $cmd .= " -xmin ".$main::param{scan_enrich_min_score};
  $cmd .= " -xsize 600 -ysize 250";
  $cmd .= " -xgstep1 5 -xgstep2 1";
  $cmd .= " -hline red 100";
  $cmd .= " -hline violet 0";
  $cmd .= " -vline violet 5";
  $cmd .= " -ycol 11 -yleg1 'Binomial significance of hit number'";
  $cmd .= " -ysize 400";
  $cmd .= " -lines";
  $cmd .= " -r_plot" if ($main::param{r_plot});
  $cmd .= " -o ".$graph_file;
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
}

################################################################
## Scan peak sequences with each discovered motif. Draw a positional
## profile of motif instances + assess the significance of the number
## of hits for each possible weight threshold value.
sub ScanSequences {

  &RSAT::message::TimeWarn("Scanning sequences") if ($main::verbose >= 2);

  &ReadMotifTable() unless (scalar(@motif_ids) > 1); ## Don't re-read the table if already done
#  my $motif_nb = scalar(@motif_ids);

  foreach my $i (0..$#motif_ids) {
    my $m = $i+1;
    my $motif_id = $motif_ids[$i];
    my $motif_dir = $motif_dirs[$i];
#    my $motif_file = $motif_files[$i];

    &RSAT::message::Debug("Scanning sequences with motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);

    ################################################################
    ## Predict site positions

    ## Site prediction with matrix-scan (option -quick)
    my $cmd = $SCRIPTS."/matrix-scan -quick -v 1";
    $cmd .= " -seq_format fasta -i ".$main::outfile{"test_converted"};
#    $cmd .= " -seq_format fasta -i ".$infile{test_seq};
    $cmd .= " -matrix_format transfac -m ".$main::outfile{$motif_id."_tf"};
    $cmd .= " -bg_format inclusive -bgfile ".$main::param{scan_bg_file};
    $cmd .= " ".$main::param{scan_options};
    $cmd .= " ".$main::param{scan_sites_options};
    $cmd .= " -o ".$main::outfile{$motif_id.'_pssm_sites'};
    &RSAT::util::one_command($cmd, 1, $main::outfile{$motif_id.'_pssm_sites'}."_time.txt", log=>$main::out);

    ## Compute the genomic coordinates
    if (($main::infile{coord})||($main::param{seq_source})) {
      &RSAT::message::TimeWarn("Computing genomic coordinates of predicted sites") if ($main::verbose >= 2);
      $cmd = $SCRIPTS."/convert-features";
      $cmd .= " -i ".$main::outfile{$motif_id.'_pssm_sites'};
      $cmd .= " -coord ".$main::infile{coord} if ($main::infile{coord});
      if ($main::param{seq_source}) {
	  $cmd .= " -coord ".$main::outfile{"test_seq_coord"};
	  $cmd .= " -from ".$main::param{seq_source};
      } else {
	  $cmd .= " -from ft";
      }
      $cmd .= " -origin ".$main::param{origin};
#      $cmd .= " -offset ".$main::param{offset};
      $cmd .= " -to bed ";
      my $bed_prefix = $main::param{title};
      my $bed_header= "track name=\"".$motif_id."\" description=\"RSAT peak-motifs_quick ".$bed_prefix.": ".$motif_id."\" visibility=2 itemRgb=\"On\" use_score=1 color=247,73,2";
      $cmd .= " | sed '1s/^.*/".$bed_header."/' ";## change first line
      $cmd .= " > ".$main::outfile{$motif_id.'_pssm_sites_genomic'};
      &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
    }

    ################################################################
    ## Compute the positional distribution of sites
    &RSAT::message::Debug("Computing positional distribution of motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);
    $cmd = "grep -v '^;'";
    $cmd .= " ".$main::outfile{$motif_id.'_pssm_sites'};
    $cmd .= " | grep -v '^#'";
    $cmd .= " | awk '{print \(\$6\+\$5\)/2}'";
    $cmd .= " | ".$SCRIPTS."/classfreq -v 1";
    $cmd .= " -ci ".$main::param{profiles_ci};
    $cmd .= " -o ".$main::outfile{$motif_id.'_pssm_site_distrib'};
    &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

    ## Draw the graph of predicted site position profiles (occurrences per non-overlapping window)
    &RSAT::message::Debug("Drawing positional distribution of motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);
    $cmd = $SCRIPTS."/XYgraph";
    $cmd .= " -format ".$main::param{img_format};
    $cmd .= " -i ".$main::outfile{$motif_id.'_pssm_site_distrib'};
    $cmd .= " -lines -xcol 3 -ycol 4";
    $cmd .= " -ysize 250 -ycol 4 -yleg1 'Number of sites'";
    $cmd .= " -xsize 600 -xcol 3 -xleg1 'Position relative to sequence ".$main::param{origin}."'";
#   $cmd .= " -xgstep1 ".($main::param{profiles_ci});
    $cmd .= " -xgstep2 ".$main::param{profiles_ci};
    $cmd .= " -vline '#666666' 0";
    $cmd .= " -title1 'Predicted sites : $motif_id'";
    $cmd .= " -r_plot" if ($main::param{r_plot});
    $cmd .= " -o ".$main::outfile{$motif_id.'_pssm_site_distrib_graph'};
    &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

    ################################################################
    ## Distribution of sites per peak and %peaks with motifs
    &RSAT::message::Debug("Calculating distribution of sites per peak", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);
    $cmd = $SCRIPTS."/contingency-table";
    $cmd .= " -i ".$main::outfile{$motif_id.'_pssm_sites'};
    $cmd .= " | ".$SCRIPTS."/classfreq -v 1";
    $cmd .= " -ci 1";
    $cmd .= " -o ".$main::outfile{$motif_id.'_pssm_sites_per_peak'};
    &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

    ## Draw the graph of predicted site positions (occurrences per window)
    &RSAT::message::Debug("Drawing sites per peak distribution of motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);
    $cmd = $SCRIPTS."/XYgraph";
    $cmd .= " -format ".$main::param{img_format};
    $cmd .= " -i ".$main::outfile{$motif_id.'_pssm_sites_per_peak'};
    $cmd .= " -lines -xcol 1 -ycol 4 ";
    $cmd .= " -xsize 500 -xcol 1 -xleg1 'Number of sites'";
    $cmd .= " -ysize 250 -ycol 4,6 -yleg1 'Number of peaks' -legend";
    $cmd .= " -xgstep1 5 -xgstep2 1";
    $cmd .= " -title1 'Nb predicted sites : $motif_id'";
    $cmd .= " -r_plot" if ($main::param{r_plot});
    $cmd .= " -o ".$main::outfile{$motif_id.'_pssm_sites_per_peak_graph'};
    &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

    ################################################################
    ## Analyze enrichment of test sequences for the discovered motif
    &RSAT::message::Debug("Estimating enrichment of test sequences for motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);
    &MotifEnrichment($main::outfile{"test_converted"}, $main::outfile{$motif_id."_tf"},
		     $main::outfile{$motif_id.'_pssm_enrichment'},
		     $main::outfile{$motif_id.'_pssm_enrichment_png'});
  }

  ################################################################
  ## Generate a single file with the predicted sites for all
  ## discovered motifs (+ peaks if genomic coordinates)

  ## File in tab-delimited format, compatible with feature-maps
  if (scalar (@site_files) > 0) {
    my @checked_site_files = ();
    foreach my $file (@site_files) {
      if (-e $file) {
	push @checked_site_files, $file;
      } else {
	&RSAT::message::Warning("&ScanSequences()", "site merging", "file does not exist", $file) if ($main::verbose >= 1);
      }
    }
    if (scalar(@checked_site_files) > 0) {
      my $merge_sites_cmd = "cat ";
      $merge_sites_cmd .= join " ", @checked_site_files;
      $merge_sites_cmd .= " > ".$main::outfile{merged_sites};
      &RSAT::util::one_command($merge_sites_cmd, 1, "", log=>$main::out);
    }
  }


  ## File in bed format, for uploading in Genome Browsers (e.g. UCSC)
  if (($main::infile{coord})||($main::param{seq_source})) {
    if (scalar (@bed_files) > 0) {
      my @checked_bed_files = ();
      foreach my $file (@bed_files) {
	if (-e $file) {
	  push @checked_bed_files, $file;
	} else {
	  &RSAT::message::Warning("&ScanSequences()", "bed merging", "file does not exist", $file) if ($main::verbose >= 1);
	}
      }

      if (scalar(@checked_bed_files) > 0) {
	my $merge_beds_cmd = "cat ";
	$merge_beds_cmd .= join " ", @checked_bed_files;
	$merge_beds_cmd .= " > ".$main::outfile{merged_sites_bed};
	&RSAT::util::one_command($merge_beds_cmd, 1, "", log=>$main::out);
      }
    }

  }
}



################################################################
## Generate a table summarizing the time spent in the different tasks
## on the basis of the unix "time" command.
sub TimeTable {
  my $timetable = &OpenOutputFile($main::outfile{timetable});
  foreach my $key (@timetable_keys) {
    my $time_file;
    if (defined($main::outfile{$key})) {
      $time_file = $main::outfile{$key}."_time.txt";
    } elsif (defined($main::prefix{$key})) {
      $time_file = $main::prefix{$key}."_time.txt";
    } else {
      &RSAT::message::Warning("&TimeTable()", "Neither output file nor prefix has been defined for key", $key) if ($main::verbose >= 1);
      next;
    }
    my $time = "NA";
    if (-e $time_file) {
      my ($in) = &OpenInputFile($time_file);
      while (<$in>) {
	if (/real\s+(\S+)/) {
	  $time = $1;
	  if ($time =~ /^(\d+)m(\d+)\.(\d+)s$/) {
	    my $minutes = $1;
	    my $seconds = $2;
	    my $millisecs = $3;
	    $time = $minutes*60 + $seconds + $millisecs/1000;
	  }
	}
      }
    } else {
      &RSAT::message::Warning("Missing time file", $time_file) if ($main::verbose >= 2);
    }
    print $timetable $key, "\t", $time, "\n";
    &RSAT::message::Info("TimeTable", $key, $time) if ($main::verbose >= 4);
  }
  &RSAT::message::TimeWarn("Time table file", $main::outfile{timetable}) if ($main::verbose >= 2);
}

################################################################
## Generate a table summarizing the time spent in the different tasks
## on the basis of the time report in RSAT programs.
sub TimeLog {
  my $timelog = &OpenOutputFile($main::outfile{timelog});

  my $prefix = "NA";
  if (defined($main::param{prefix})) {
    $prefix = $main::param{prefix};
#    $prefix =~ s/_$//;
  }

  print $timelog join("\t", "#start_time       ", "done_time        ", "elapsed", "seconds", "task", "prefix", "file"), "\n";

#  foreach my $pattern_type (@pattern_types) {
#    foreach my $key ($pattern_type, $pattern_type."_2pssm") {
  foreach my $key (@timelog_keys) {
    my $time_file = $main::outfile{$key};
    my $start = "NA";
    my $done = "NA";
    my $elapsed = "NA";
    my $seconds = "NA";
    if (-e $time_file) {
      my ($in) = &OpenInputFile($time_file);
      while (<$in>) {
	if (/^;\s*Job started\s+(\S+)/i) {
	  $start = $1;
	} elsif (/^;\s*Job done\s+(\S+)/i) {
	  $done = $1;
	} elsif (/^;\s*Seconds\s+(\S+)/i) {
	  $seconds = $1;
	}
      }
      close $in;
    }
    if ($start =~ /(\d{4})_(\d{2})_(\d{2}).(\d{2})(\d{2})(\d{2})/) {
      my ($start_year, $start_month, $start_day, $start_hour, $start_min, $start_sec) =
	($1, $2, $3, $4, $5, $6);
      if ($done =~ /(\d{4})_(\d{2})_(\d{2}).(\d{2})(\d{2})(\d{2})/) {
	my ($done_year, $done_month, $done_day, $done_hour, $done_min, $done_sec) =
	  ($1, $2, $3, $4, $5, $6);
	if ($done_month > $start_month) {
	  $done_day = $start_day +1; ## Quick and tricky treatment of
	  ## month-overlapping tasks
	}
	$elapsed = ($done_sec - $start_sec)
	  + ($done_min - $start_min)*60
	    + ($done_hour - $start_hour)*3600
	      + ($done_day - $start_day)*86400;
      }
    }
    print $timelog join("\t", $start, $done, $elapsed, $seconds, $key, $prefix, $time_file), "\n";
  }
  #}
  close $timelog;
  &RSAT::message::TimeWarn("Time log file", $main::outfile{timelog}) if ($main::verbose >= 2);

  ## Create a HMTL version of the timelog table
  my $cmd = $SCRIPTS."/text-to-html";
  $cmd .= " -font variable";
  $cmd .= " -i ".$main::outfile{timelog};
  $cmd .= " -o ".$main::outfile{timelog_html};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
  &RSAT::message::TimeWarn("Time log html", $main::outfile{timelog_html}) if ($main::verbose >= 2);
}


################################################################
## Generate a HTML table of content of the results with links to the
## different parts
sub ResultTOC {
  my $result_toc = "";

  $result_toc .=  "<div id='Menu'>\n";
  $result_toc .=  "<h3>Results</h3>\n";

  if ((-e $main::outfile{archive}) || ($task{archive})) {
    my $archive_short = &RSAT::util::ShortFileName($main::outfile{archive});
    my $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{archive});
    $result_toc .=  "<span class='menulink'>";
    $result_toc .=  "[<a href='".$link."'> Download all results (".$archive_short.")</a> ]</span> \n";
    $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{motifs_discovered});
    $result_toc .=  "<span class='menulink'>";
    $result_toc .=  "[<a href='".$link."'> Download all matrices (transfac format)</a> ]</p></span>\n";
  }

  $result_toc .=  "<a href='#seq_composition'>Sequence composition & statistics </a><br/>";
  @motifs = &ReadDiscoveredMotifs();
#  my $motif_nb = scalar(@motifs);

  $result_toc .=  "<a href='#motifs_by_algo'>Discovered motifs (by algorithm) - </a>".$motif_nb."<br/>";
  $result_toc .=  "<a href='#discovered_motifs'>Discovered motifs (with motif comparison) - </a>".$motif_nb."<br/>";

  $result_toc .=  " \n\n\n<table> ";
  $result_toc .=  " \n\n<tr>\n ";
  $result_toc .=  " \n\n<td> ";

  ## Display the motifs
  for my $mot (1..$motif_nb) {
    my $matrix = $motifs[$mot-1];
    my $motif_id= $matrix->get_attribute("id");
    my $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$motif_id."_tf"});
    my $consensus = $matrix->get_attribute("consensus.IUPAC");
    my $colored_consensus = &RSAT::SeqUtil::ColorConsensus($consensus, bold=>1, iupac=>$main::param{iupac_coloring});
    $result_toc .=  "<span class='menulink'><a href='#discovered_motifs_".$mot."'>Motif ".$mot."</a></span>: <a href='".$link."'>[matrix]</a> ".$colored_consensus."<br/> ";
    if ($mot == 5) {
    	$result_toc .=  " </td>\n ";
    	$result_toc .=  " \n<td> ";
    }
    if ($mot == 10) {
      if (($motif_nb-$mot) > 0) {
	$result_toc .=  "<a href='#discovered_motifs_".$mot."'>".($motif_nb-$mot)." more motifs</a>";
      }
      last;
    }
  }
  $result_toc .=  " </td>\n ";
  $result_toc .=  " </tr>\n</table>\n\n\n ";

  $result_toc .=  "<a href='#merged_sites'>Motif locations (sites)</a><br/>";
  $result_toc .=  "<a href='#logs'>Logs & Commands</a><br/>";
   if ($task{small_summary}) {
  	$result_toc .=  "<font color='red'>New ! </font><a href='".$main::param{prefix}."_small_summary.html' target=_blank>Small summary</a><br/>";
  }
  $result_toc .=  "</div>";


  return($result_toc);
}


################################################################
## Generate a HTML table summarizing the results for each discovered
## motif. this table is inserted in the synthetic report.
sub SynthesisByMotif {
  print $syn '<p style="page-break-after:always;"></p>'; ## Page break for printing
  print $syn &open_menu_heading($menu_nb++, "<a name='discovered_motifs'></a><h3>Discovered motifs (with motif comparison)</h3>\n", 0);
  print $syn "\n\n\n<table class='whitebg'>\n";

  ## Header line
  &SyntheticTableAddHeaderRow("Motif discovery");
  
  ## Prepare summary text file
  if ($task{small_summary}) {
    $small_sum_txt = &OpenOutputFile($main::outfile{small_summary_txt}) ; 
    print $small_sum_txt "#Motif\tLogo\t3 Top hits in databases\n";
  }

  &ReadMotifTable();

  @motifs = &ReadDiscoveredMotifs();

  ## Options for generating logos
  my $logo_opt = "";

  ## Treat each motif
  for my $m (1..$motif_nb) {
    my $matrix = $motifs[$m-1];
    my $matrix_id = $matrix->get_attribute("id");
    my $pattern_type = $matrix_info{$matrix_id}->{pattern_type};
    &RSAT::message::TimeWarn("Synthesis for discovered motif", $m."/".$motif_nb, $matrix_id) if ($main::verbose >= 3);
    #    $matrix->setMarkovModel($bg_model);

    ## We compute the consensus and logos here with the same background model as used for sequence scanning
    ## We need to recompute them because in the section "Discovered motifs by algorithm" we used the consensus of the assembly rather than matrix.
    $matrix->calcConsensus();
    my $consensus = $matrix->get_attribute("consensus.IUPAC");
    my $colored_consensus = &RSAT::SeqUtil::ColorConsensus($consensus, bold=>1, iupac=>$main::param{iupac_coloring});
    my $consensus_rc = $matrix->get_attribute("consensus.IUPAC.rc");
    my $colored_consensus_rc = &RSAT::SeqUtil::ColorConsensus($consensus_rc,bold=>1, iupac=>$main::param{iupac_coloring});

    ## Get consensus as defined in the previous report section
    ## THIS DOES NOT WORK BECAUSE WHAT WE STORED BEFORE WERE CONSENSUS FROM ASSEMBLIES RATHER THAN MATRICES
    # my $consensus = $matrix_info{$matrix_id}->{consensus};
    # my $colored_consensus = $matrix_info{$matrix_id}->{colored_consensus};
    # my $consensus_rc = $matrix_info{$matrix_id}->{consensus_rc};
    # my $colored_consensus_rc = $matrix_info{$matrix_id}->{colored_consensus_rc};

    ## JVH: WHY SHOULD I COMPUTE LOGOS HERE ? I SHOULD ALREADY HAVE THEM ABOVE. TO BE CHECKED (2016-02-28)
    ## Compute matrix logos. 
#    $matrix->makeLogo($main::prefix{$matrix_id."_logo"},$main::param{img_format},$logo_opt, 0); ## Generate the logo image
#    $matrix->makeLogo($main::prefix{$matrix_id."_logo_rc"},$main::param{img_format},$logo_opt, 1); ## Generate the reverse complementary logo image

    print $syn "\n\n<tr>\n";
    print $syn "\n<td><a name='discovered_motifs_".$m."'></a><b> Motif ", $m,"</b> \n";
    print $syn "&nbsp;"x2, $matrix->get_attribute("name"),"</td>\n\n";
    print $syn "\n<td>\n\n\n<table align='center' >\n\n<tr>\n";
    
    print $small_sum_txt $matrix->get_attribute("name"), "\t" if ($task{small_summary});
    
    ## Sequence logo
#    my $file = $main::outfile{$matrix_id."_logo"};
    my $file = $matrix_info{$matrix_id}->{logo_file};
    if (-e $file) {
      my $logo_link = &RSAT::util::RelativePath($main::outfile{synthesis}, $file);
      print $syn ("\n<td align='center' style='border-bottom-style:none;' >", $colored_consensus, "<br>\n",
		  "<a href='", $logo_link, "'>",
		  "<img border='".$main::param{img_border}."' height='",$main::param{logo_table_height},
		  "' src='",$logo_link,"'>","</a>","</td>\n\n");
	  
      print $small_sum_txt ("<img border='", $main::param{img_border}, 
			    "' height='", $main::param{logo_table_height}, 
			    "' src='",$logo_link,"'>"."\t" ) if ($task{small_summary});

    } else {
      print $syn "\n<td><font color='red'>No logo</font>","</td>\n\n";
      print $small_sum_txt "\t" if ($task{small_summary});
    }
    
    ## HERE JvH SHOULD ADD THE SIGNIFICANCE OF THE MOTIFS
    print $syn join("", 
		    "k-mer sig=",
		    $matrix_info{$matrix_id}->{start_format},
		    $matrix_info{$matrix_id}->{score},
		    $matrix_info{$matrix_id}->{end_format},
		    "; evalue=",
		    $matrix_info{$matrix_id}->{start_format},
		    $matrix_info{$matrix_id}->{evalue},
		    $matrix_info{$matrix_id}->{end_format},
		    "\n");

    ## Sequence logo, reverse complementary
#    $file = $main::outfile{$matrix_id."_logo_rc"};
    $file = $matrix_info{$matrix_id}->{logo_file_rc};
    if (-e $file) {
      my $logo_link = &RSAT::util::RelativePath($main::outfile{synthesis}, $file);
      print $syn ("\n<td align='center' style='border-bottom-style:none;border-right-style:none;' >", $colored_consensus_rc, "<br>\n",
		  "<a href='", $logo_link, "'>",
		  "<img border='".$main::param{img_border}."' height='",$main::param{logo_table_height},
		  "' src='",$logo_link,"'>","</a>","</td>\n\n");
    } else {
      print $syn "<font color='red'>No logo</font>","</td>\n\n";
    }
    print $syn "</tr>\n</table>\n\n\n</td>\n\n";

    &PrintFileLinks_New("k-mers", ["statistics",$main::outfile{$pattern_type}, 
				   "assemblies",$main::outfile{$pattern_type."_asmb"}],
			"matrix", ["tab_format",$main::outfile{$matrix_id."_tab"},
				   "transfac_format",$main::outfile{$matrix_id."_tf"}
			]);
    print $syn "</tr>\n\n";

    ################################################################
    ## Scan results
    my $scan_file = $main::outfile{$matrix_id.'_pssm_sites'};
    if (-e $scan_file) {
      ## get information on % peaks with a motif and max number sites/peaks
      #my $nb_peak_matching_motif = `cat $scan_file | grep -v '^;' | grep -v '^#' | cut -f 1  | uniq | wc -l`;
      my $nb_peak_matching_motif = `grep '; count' $main::outfile{$matrix_id.'_pssm_sites_per_peak'}`;
      chomp($nb_peak_matching_motif);
      $nb_peak_matching_motif =~ s/.*count\s*//;
      ## Get mean number of predicted sites per peak
      my $mean_peak_matching_motif = `grep '; mean' $main::outfile{$matrix_id.'_pssm_sites_per_peak'}`;
      chomp($mean_peak_matching_motif);
      $mean_peak_matching_motif =~ s/.*mean\s*//;
      $mean_peak_matching_motif = sprintf("%.2f", $mean_peak_matching_motif);

      ## Get max number of predicted sites per peak
      my $max_nb_sites = `cat $scan_file | grep -v '^;' | grep -v '^#' | cut -f 1  | uniq -c | sort -r | head -n 1`;
      chomp ($max_nb_sites);
      $max_nb_sites =~ s/^\s+//;
      $max_nb_sites =~ s/\s+(.*)$/<br\/>&nbsp\($1\)/;

      ## Get number of peaks from sequence length distribution
      my $peak_nb = `grep '; count' $main::outfile{test_seqlen_distrib}`;
      chomp($peak_nb);
      $peak_nb =~ s/.*count\s*//;
      ## calculate percentage
      my $site_peak_percent = $nb_peak_matching_motif*100/$peak_nb;
      $site_peak_percent = sprintf("%.2f", $site_peak_percent);

      my $table = "\n\n\n<table>\n\n<tr>\n";
      my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$matrix_id.'_pssm_site_distrib_graph'});
      $table .= "\n<td style='border-bottom-style:none;border-right-style:none;'>Distribution of sites<br><a  href='".$img."'><img border='".$param{img_border}."' height='200' src='".$img."'></a></td>\n";
      $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$matrix_id.'_pssm_sites_per_peak_graph'});
      $table .= "\n<td style='border-bottom-style:none;border-right-style:none;'>Nb of predicted sites per peak<br><a  href='".$img."'><img border='".$param{img_border}."' height='200' src='".$img."'></a></td>\n";
      $table .= join("", "\n<td style='border-bottom-style:none;border-right-style:none;'>",
		     "<u>Nb peaks with at least one predicted site:</u> ",
		     $nb_peak_matching_motif,
		     " (".$site_peak_percent."%) <br>",
		     "<u>Max nb of predicted sites per peak:</u> ".$max_nb_sites."<br/>",
		     "<u>Mean nb of predicted sites per peak:</u> ".$mean_peak_matching_motif."</td>\n");
      # $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$matrix_id.'_pssm_enrichment_png'});
      #      $table .= "\n<td style='border-bottom-style:none;border-right-style:none;'>Enrichment in binding sites<br><a  href='".$img."'><img border='".$param{img_border}."' height='200' src='".$img."'></a></td>\n";
      $table .= "</tr>\n</table>\n\n\n";

      ################################################################
      ## Display UCSC track
      ## If from fasta sequence: first get assembly
      if (($main::infile{coord})||($main::param{seq_source})) {
#      if ($main::param{seq_source}) {
      	## also store the genome assembly
      	$genome_assembly = ` grep '^>' $infile{test_seq} | head -n 1 ` ;
      	chomp($genome_assembly);
      	if ($main::param{seq_source} eq "galaxy") {
	  $genome_assembly =~ s/>([^_]+)_.*$/$1/;
	}
	&RSAT::message::Info("Assembly from fasta header", $main::param{seq_source},$genome_assembly) if ($main::verbose >= 3);
	## IF from coord file
      } elsif ($main::param{coord_assembly}) {
	$genome_assembly = $main::param{coord_assembly};
      }

      if ((defined($main::outfile{$matrix_id.'_pssm_sites_genomic'})) && (-e $main::outfile{$matrix_id.'_pssm_sites_genomic'})) {
	## UCSC link to genomic location of predicted sites
	my $BED_URL =$ENV{rsat_www}."/tmp/".&RSAT::util::RelativePath(&RSAT::util::get_pub_temp(), $main::outfile{$matrix_id.'_pssm_sites_genomic'});
	my $browser_url = "<a target='_blank' href='";
	$browser_url .= "http://genome.ucsc.edu/cgi-bin/hgTracks?";
	$browser_url .= "db=".$genome_assembly;
	$browser_url .= "&hgt.customText=".$BED_URL.">";

	$browser_url .= "'<img border=0 height='20' src='images_html/UCSC_icon.jpg' style='vertical-align:text-bottom;' alt='UCSC'></a>";

	&SyntheticTableAddRow_New("Predicted sites on test peaks",
				  $table,
				  "view in genome browser ", [$browser_url,,
							     ],
				  "sites", ["text",$main::outfile{$matrix_id.'_pssm_sites'},
					    "BED (UCSC track)",$main::outfile{$matrix_id.'_pssm_sites_genomic'}
					   ],
				  "distribution", ["text",$main::outfile{$matrix_id.'_pssm_site_distrib'},
						  ],
				  "sites per peak", ["text",$main::outfile{$matrix_id.'_pssm_sites_per_peak'},
						    ],

				  "enrichment", [ "text",$main::outfile{$matrix_id.'_pssm_enrichment'},
						  "graph", $main::outfile{$matrix_id.'_pssm_enrichment_png'}
						]
				 );
      } else {			## without UCSC track
	&SyntheticTableAddRow_New("Predicted sites on test peaks",
				  $table,
				  "sites", ["text",$main::outfile{$matrix_id.'_pssm_sites'},
					   ],
				  "distribution", ["text",$main::outfile{$matrix_id.'_pssm_site_distrib'},
						  ],
				  "sites per peak", ["text",$main::outfile{$matrix_id.'_pssm_sites_per_peak'},
						    ],
				  "enrichment", [ "text",$main::outfile{$matrix_id.'_pssm_enrichment'},
						  "graph", $main::outfile{$matrix_id.'_pssm_enrichment_png'}
						]
				 );
      }
    }

    ## Add a black line + a page break for printing (except for the last motif)
    print $syn "\n\n\n<tr>\n\n<td colspan=5 style='border-top-style:none;border-bottom-color:black;border-bottom-width:3px;'><pre/>";
    if ($m < $motif_nb) {
      print $syn '<p style="page-break-after:always;"></p>'; ## Page break for printing
    }
    print $syn "</td>\n</tr>\n\n";
  }
  print $syn "</table>\n\n\n";
  print $syn "</table>\n\n\n<p>\n";
  print $syn &close_menu_heading();
}


################################################################
## Open the HTML page for the synthesis
sub OpenSynthesis {

  my $refresh_time;
  if ($progressive_synthesis) {
    $refresh_time = 120;
  } else {
    $refresh_time = 0;
  }

  ## Create a directory to store the arrow icons
  $main::dir{images_html} = $dir{output}."/images_html";
  &RSAT::util::CheckOutDir($main::dir{images_html}, "", 775);
  my $cmd = "rsync -ruptl ";
  $cmd .= $ENV{RSAT}."/perl-scripts/lib/images/arrow*.gif ";
  $cmd .= $ENV{RSAT}."/perl-scripts/lib/images/UCSC_icon.jpg ";
  $cmd .= " ".$main::dir{images_html};
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

  ## Open the HTML file
  $syn = &OpenOutputFile($main::outfile{synthesis});
  $synthesis_path = `dirname $main::outfile{synthesis}`;
  chomp($synthesis_path);
  print $syn &PrintHtmlResultHeader(program=>"peak-motifs_quick", "title"=>$main::param{title}, "result_toc"=>1, refresh_time=>$refresh_time);

  &ReportCommand();
}


################################################################
## Report results of motif comparisons
sub SynthesisMergedSites {
  ## Scan results
  my $scan_file = $main::outfile{merged_sites};
  if (-e $scan_file) {

    my $browser_url = "";
    if (($main::infile{coord})||($main::param{seq_source})) {
      ## UCSC link to genomic location of predicted sites
      my $BED_URL =$ENV{rsat_www}."/tmp/". &RSAT::util::RelativePath(&RSAT::util::get_pub_temp(), $main::outfile{merged_sites_bed});
      $browser_url = "<a target='_blank' href='";
      $browser_url .= "http://genome.ucsc.edu/cgi-bin/hgTracks?";
      $browser_url .= "db=".$genome_assembly;
      $browser_url .= "&hgt.customText=".$BED_URL;
      $browser_url .= "'><img border=0 height='20' src='images_html/UCSC_icon.jpg' style='vertical-align:text-bottom;' alt='UCSC'></a>";
    }

    ## Print the link table
    print $syn '<p style="page-break-after:always;"></p>'; ## Page break for printing
    print $syn &open_menu_heading($menu_nb++, "<a name='merged_sites'></a><h3>Motif locations (sites)</h3>", 0);
    print $syn "\n\n\n<table class='whitebg'>\n";
    &SyntheticTableAddRow("Predicted sites on test peaks (all motifs)",
			  $browser_url,
			  "tab",$main::outfile{merged_sites},
			  "bed",$main::outfile{merged_sites_bed},
			 );

    print $syn "</table>\n\n\n<p>\n";
    print $syn &close_menu_heading();
  }
}


################################################################
## Report the command before running the analysis, for the sake of
## debugging and to facilitate copy-paste of the options.
sub ReportCommand {
  print $syn &open_menu_heading($menu_nb++, "<a name='logs'></a><h3>Command</h3>", 1);
  print $syn "<pre>";
  print $syn "peak-motifs_quick ";
  &PrintArguments($syn);
  print $syn "</pre>";
  print $syn &close_menu_heading();
}


################################################################
## Report the list if input/output files. This has to be done at the
## end of the process, since it includes a list of discovered motifs
## is not known before having run the motif discovery algorithms.
sub SynthesisParamsAndFiles {

  ## Write the HTML file with the list of input/output files and the parameters
  &ReportParamsAndFiles();

  ## Log files
#  print $syn '<p style="page-break-after:always;"></p>'; ## Page break for printing
  print $syn &open_menu_heading($menu_nb++, "<a name='logs'></a><h3>Logs and parameter files</h3>", 1);
  print $syn "\n\n\n<table class='whitebg'>\n";
  &SyntheticTableAddHeaderRow("Log files");
  &SyntheticTableAddRow("Parameters and files",
			"",
 			"txt"=>$main::outfile{log},
 			"html"=>$main::outfile{links},
 		       );
  &SyntheticTableAddRow("Time log",
			"",
 			"time table"=>$main::outfile{timetable},
 			"time log"=>$main::outfile{timelog},
 			"html"=>$main::outfile{timelog_html},
 		       );
  ## Close the synthetic table
  print $syn "</table>\n\n\n</p>\n";
  print $syn &close_menu_heading();
}


################################################################
## Generate a compressed archive with all the results
sub Archive {
  my ($remove_first, $to_archive) = @_;

  ## By default, archive the whole output directory
  $to_archive = $main::dir{output} unless $to_archive;

  &RSAT::message::TimeWarn("\n; Archiving data and results") if ($main::verbose >= 2);

  ## Delete previous version of the archive to avoid including the old archive in the new one
  $cmd .= "rm -f ".$main::outfile{archive}."; " if ($remove_first);

  ## Archive all data and results
  my ($archive_dir, $archive) = &SplitFileName($main::outfile{archive}); ## Identify directory containing the archive
  my ($archive_dir_dir, $archive_dir_base) = &SplitFileName($archive_dir); ## Identify parent dir of the archive-containing dir
#  $to_archive_rel_path = &RSAT::util::RelativePath($main::outfile{archive}, $to_archive);
  my $to_archive_rel_path = &RSAT::util::RelativePath($archive_dir_dir, $to_archive);
  my $archive_rel_path = &RSAT::util::RelativePath($archive_dir_dir, $main::outfile{archive});
#  $archive_rel_path = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{archive});

  ## avoids crash of the program when run from the "$archive_dir_dir"
  if ($archive_dir_dir eq "") {
    $archive_dir_dir = ".";
    $to_archive_rel_path = $to_archive;
    $archive_rel_path = $main::outfile{archive};	 
  }
	
#&RSAT::message::Debug("&Archive()",
# 			"\n\tto_archive", $to_archive,
# 			"\n\tarchive_dir", $archive_dir,
# 			"\n\tarchive_dir_dir", $archive_dir_dir,
# 			"\n\tto_archive_rel_path", $to_archive_rel_path,
# 			"\n\tarchive_rel_path", $archive_rel_path,
#		       ) if ($main::verbose >= 1);

  my $cmd = "";
  if ($main::param{archive_format} eq "zip") {
    $cmd .= "(cd ".$archive_dir_dir." ; ";
    $cmd .= " zip -ryq ".$archive_rel_path." ".$to_archive_rel_path;
    $cmd .= " -x ".$archive;
    $cmd .= ")";
  } elsif (($main::param{archive_format} eq "tar") ||
	   ($main::param{archive_format} eq "tgz")) {
    $cmd .= "tar -cpf ".$main::outfile{archive};
    $cmd .= " -z" if ($main::param{archive_format} eq "tgz");
    $cmd .= " -C ".$archive_dir_dir; ## Avoid including the whole path in the archive paths
    $cmd .= " --exclude ".$archive;
    $cmd .= " ".$to_archive_rel_path;
  #$cmd .= " ".$to_archive;
  } else {
    &RSAT::error::FatalError($main::param{archive_format}, "Invalid archive format. Supported: zip, tar, tgz.");
  }

#  $cmd .= "zip -q ".$archive." ".$to_archive;
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);

  &RSAT::message::TimeWarn("Archive", $main::outfile{archive}) if ($main::verbose >= 2);
}


################################################################
## Generate a HTML report with summarizing the main results with
## links to the iriginal result files.
sub Synthesis {
  &RSAT::message::TimeWarn("\n; Generating HTML synthesis") if ($main::verbose >= 2);

  &OpenSynthesis() unless ($progressive_synthesis);

  ## Header of the synthetic table
  &SynthesisSequenceComposition() unless ($progressive_synthesis);

  ## Reference motifs
    #&SynthesisRefMotifs() unless ($progressive_synthesis);

  ## Discovered motifs (by algorithm)
  &SynthesisMotifsByAlgo() unless ($progressive_synthesis);

  ## Discovered motifs (by motif)
  &SynthesisByMotif() unless ($progressive_synthesis);

  ## Motif locations (merged sites)
  &SynthesisMergedSites() unless ($progressive_synthesis);

  ## Motif comparisons
  &SynthesisMotifCompa() unless ($progressive_synthesis);

  ## Parameters and files
  &SynthesisParamsAndFiles() unless ($progressive_synthesis);

  ## References
  print $syn &open_menu_heading($menu_nb++, "<a name='refs'></a><h3>Publications</h3>", 0);
  print $syn "<ol>\n";
  print $syn "<li>Thomas-Chollier M, Herrmann C, Defrance M, Sand O, Thieffry D, van Helden J. (2012). RSAT peak-motifs: motif analysis in full-size ChIP-seq datasets. Nucleic Acids Res 40(4): e31.</li>\n";
  print $syn "<li>Thomas-Chollier M, Darbo E, Herrmann C, Defrance M, Thieffry D, van Helden J. (2012). A complete workflow for the analysis of full-size ChIP-seq (and similar) data sets using peak-motifs. Nat Protoc 7(8): 1551-1568.</li>\n";
  print $syn "</ol>\n";

  ## End of the HTML file
  &CloseSynthesis();

  ## Add the HTML synthesis report to the archive
  &Archive(0, $main::outfile{synthesis}) if ($task{archive}); 

  &RSAT::message::TimeWarn("Synthetic report", $main::outfile{synthesis}) if ($main::verbose >= 2);
}


################################################################
## Report results of motif comparisons
sub SynthesisMotifCompa {
    #  print $syn '<p style="page-break-after:always;"></p>'; ## Page break for printing
    print $syn &open_menu_heading($menu_nb++, "<a name='motif_compa'></a><h3>Motif comparisons</h3>", 0);
    print $syn "\n\n\n<table class='whitebg'>\n";
    
    ## Word comparisons
    &SynthesisWordCompa() if ($task{merge_words});
    
    ## Comparison between discovered motifs
    &SynthesisMotifsVsMotifs();
    
    print $syn "</table>\n\n\n<p>\n";
    print $syn &close_menu_heading();
}


sub CloseSynthesis {
  print $syn &ResultTOC();
  print $syn "</body>\n";
  print $syn "</html>\n";
  close $syn;
}

################################################################
## Add a row to the syhnthetic table
sub SyntheticTableAddRow {
  my ($type, $summary, @files) = @_;
  print $syn "\n\n<tr>\n";
  print $syn "\n<td>$type</td>\n\n";
  print $syn "\n<td>$summary</td>\n\n";
  &PrintFileLinks(@files);
  print $syn "</tr>\n\n";
}
################################################################
## Add a row to the synthetic table
sub SyntheticTableAddRow_New {
  my ($type, $summary, @files) = @_;
  print $syn "\n\n<tr>\n";
  print $syn "\n<td style='border-top: 1px solid #cbcbb4;'>$type</td>\n\n";
  print $syn "\n<td style='border-top: 1px solid #cbcbb4;'>$summary</td>\n\n";
  &PrintFileLinks_New(@files);
  print $syn "</tr>\n\n";
}

################################################################
## Taking as input a key-file list, print a HTML table cell with a
## list of links to the files
sub PrintFileLinks_New {
  my (@files) = @_;
  print $syn "\n<td >\n";
  my $title;
  my $key;
  my $file;

 for (my $i=0; $i<=$#files; $i+=2){
 	$title = $files[$i];
 	print $syn "[ ".$title.": \n";

	## treat each link
 	my $links = $files[$i+1];
    while ($key = shift (@$links)){
    last unless $key;
    $file = shift(@$links);
    &RSAT::message::Debug("&SyntheticTableAddRow()", $key, $file) if ($main::verbose >= 5);
    if (($file)&&(-e $file)) {
      my $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $file);
      print $syn "<a href='".$link."'>".$key."</a>\n";
    } else {
      print $syn $key."\n";
    }
  }
  print $syn " ]<br/>";
}
  print $syn "</td>\n\n";
}

################################################################
## Add a header row to the synthetic table
sub SyntheticTableAddHeaderRow {
  my ($header) = @_;
  print $syn "\n\n<tr>\n";
  print $syn "<th colspan=3>\n";
  print $syn $header;
  print $syn "</th>\n";
  print $syn "</tr>\n\n";
}

################################################################
## Add sequence composition on the synthetic report
sub SynthesisSequenceComposition {

  print $syn '<p style="page-break-after:always;"></p>'; ## Page break for printing
  print $syn &open_menu_heading($menu_nb++, "<a name='seq_composition'></a><h3>Sequence composition</h3>", 0);
  print $syn "<p>\n\n\n<table class='whitebg'>\n";

  foreach my $seq_type (@seq_types) {

    ## Header line
    &SyntheticTableAddHeaderRow("Sequence composition ($seq_type sequences)");


    my $peak_nb = "NA";
    my $mean_peak_len = "NA";
    my $min_peak_len = "NA";
    my $max_peak_len = "NA";
    my $seq_size = "NA";

    if (-e $main::outfile{$seq_type."_seqlen_distrib"}) {
      &RSAT::message::Info("Collecting sequence length statistics from file", $main::outfile{$seq_type."_seqlen_distrib"}) if ($main::verbose >= 3);

      ## Get number of peaks from sequence length distribution
      $peak_nb = `grep '; count' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($peak_nb);
      $peak_nb =~ s/.*count\s*//;

      ## Get min peak length
      $min_peak_len = `grep '; min' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($min_peak_len);
      $min_peak_len =~ s/.*min\s*//;

      ## Get mean peak length
      $mean_peak_len = `grep '; mean' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($mean_peak_len);
      $mean_peak_len =~ s/.*mean\s*//;

      ## Get max peak length
      $max_peak_len = `grep '; max' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($max_peak_len);
      $max_peak_len =~ s/.*max\s*//;

      ## Get total sequence size from sequence length distribution
      $seq_size = `grep '; sum' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($seq_size);
      $seq_size =~ s/.*sum\s*//;
      $seq_size = round($seq_size/1000);

      &RSAT::message::Debug("Nb of peaks=".$peak_nb,
			    "sequence size=".$seq_size) if ($main::verbose >= 5);
    } else {
      &RSAT::message::Warning("Sequence length file does not exist. Cannot collect statistics for the synthesis.\n", $main::outfile{$seq_type."_seqlen_distrib"});
    }

    ## Sequence lengths
    my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$seq_type."_seqlen_distrib_graph"});
    my $seq_stats = "<u>Nb of peaks:</u> ".$peak_nb;
    $seq_stats .= "<br>\n<u>Total seq. size:</u> ".$seq_size." kb";
    $seq_stats .= "<br>\n<u>Min length:</u> ".$min_peak_len." bp";
    $seq_stats .= "<br>\n<u>Mean length:</u> ".$mean_peak_len." bp";
    $seq_stats .= "<br>\n<u>Max length:</u> ".$max_peak_len." bp";

    ## Display peaks UCSC track.
    ## If from fasta sequence: first get assembly.
    my $genome_assembly ="";
    if ($main::param{seq_source}) {
      ## also store the genome assembly
      $genome_assembly = ` grep '^>' $infile{test_seq} | head -n 1 ` ;
      chomp($genome_assembly);
      if ($main::param{seq_source} eq "galaxy") {
	$genome_assembly =~ s/>([^_]+)_.*$/$1/;
      }
      &RSAT::message::Info("Assembly from fasta header", $main::param{seq_source},$genome_assembly) if ($main::verbose >= 3);
      ## IF from coord file
    } elsif ($main::param{coord_assembly}) {
      $genome_assembly = $main::param{coord_assembly};
    }

    if ((($main::infile{coord}) && (-e $main::infile{coord}))
      	|| (($main::outfile{"test_seq_coord"}) && (-e $main::outfile{"test_seq_coord"}))
	) {

      ## UCSC link
      my $BED_UR = "";
#      $BED_URL = join ("/", 
#			  $ENV{rsat_www},
#			  "tmp",
#			  &RSAT::util::ShortFileName($main::dir{output}));
#      $BED_URL .= "/";
#      $BED_URL .= &RSAT::util::RelativePath($main::dir{output}, $main::infile{coord}) if ($main::infile{coord});
#      $BED_URL .= &RSAT::util::RelativePath($main::dir{output}, $main::outfile{"test_seq_coord"}) if ($main::outfile{"test_seq_coord"});
      if ($main::outfile{"test_seq_coord"}) {
	  $BED_URL = $ENV{rsat_www}."/tmp/". &RSAT::util::RelativePath(&RSAT::util::get_pub_temp(), $main::outfile{"test_seq_coord"});
      } elsif ($main::infile{coord}) {
	  $BED_URL = $ENV{rsat_www}."/tmp/". &RSAT::util::RelativePath(&RSAT::util::get_pub_temp(), $main::infile{coord});
      }

      my $browser_url = "<a target='_blank' href='";
      $browser_url .= "http://genome.ucsc.edu/cgi-bin/hgTracks?";
      $browser_url .= "db=".$genome_assembly;
      $browser_url .= "&hgt.customText=".$BED_URL;
      $browser_url .= "'><img border=0 height='20' src='images_html/UCSC_icon.jpg' style='vertical-align:text-bottom;' alt='UCSC'></a>";

      my $BED = "";
      $BED = $main::infile{coord} if ($main::infile{coord});
      $BED = $main::outfile{"test_seq_coord"} if ($main::outfile{"test_seq_coord"});
      &SyntheticTableAddRow_New($seq_stats,
				"<a  href='".$img."'><img border='".$param{img_border}."' height='200' src='".$img."'></a>",
				"view peaks in genome browser ", [$browser_url,,],
				"coordinates", ["UCSC BED track",$BED,
					       ],
				"seq",["converted",$main::outfile{$seq_type."_converted"},
				       "purged" , $main::outfile{$seq_type."_purged"}
				      ],
				"lengths",['list', $main::outfile{$seq_type."_seqlen"},
					   'distrib',$main::outfile{$seq_type."_seqlen_distrib"},
					   'graph',$main::outfile{$seq_type."_seqlen_distrib_graph"}
					  ]
			       );
    } else {			## without UCSC track
      &SyntheticTableAddRow_New($seq_stats,
				"<a  href='".$img."'><img border='".$param{img_border}."' height='150' src='".$img."'></a>",
				"seq",["converted",$main::outfile{$seq_type."_converted"},
				       "purged" , $main::outfile{$seq_type."_purged"}
				      ],
				"lengths",['list', $main::outfile{$seq_type."_seqlen"},
					   'distrib',$main::outfile{$seq_type."_seqlen_distrib"},
					   'graph',$main::outfile{$seq_type."_seqlen_distrib_graph"}
					  ]
			       );

    }

    ## Residue composition
    for my $ol (@composition_oligo_lengths) {
      # my $table = "\n\n\n<table class='whitebg'>\n\n<tr>\n";
      my $table = "\n\n\n<table>\n\n<tr>\n";
      my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$ol."nt_".$seq_type."_heatmap"});
      $table .= "\n<td style='border-bottom-style:none;border-right-style:none;'>Transition frequencies<br><a  href='".$img."'><img border='".$param{img_border}."' width=300 src='".$img."'></a></td>\n";
      $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$ol."nt_".$seq_type."_profiles_graph"});
      $table .= "\n<td style='border-bottom-style:none;border-right-style:none;'>Position profiles<br><a  href='".$img."'><img border='".$param{img_border}."' height='200' src='".$img."'></a></td>\n";
      $table .= "</tr>\n</table>\n\n\n";
      my $title;
      if ($ol == 1) {
	$title = "Nucleotide";
      } elsif ($ol == 2) {
	$title = "Dinucleotide";
      } elsif ($ol == 3) {
	$title = "Trinucleotide";
      } elsif ($ol == 4) {
	$title = "Tetranucleotide";
      } else {
	$title = $ol."nucleotide";
      }
      $title .= " composition profiles";
      &SyntheticTableAddRow_New($title,
				$table,
				$ol."nt", ["freq",$main::outfile{$ol."nt_".$seq_type."_freq"},
					   "transitions",$main::outfile{$ol."nt_".$seq_type."_transitions"}
					  ],
			   	"bg model", [ "Inclusive format",$main::outfile{$ol."nt_".$seq_type."_inclusive"}
					    ],
				"profile", [ "table",$main::outfile{$ol."nt_".$seq_type."_profiles"},
					     "html(individual)",$main::outfile{$ol."nt_".$seq_type."_profiles_index"}
					   ]
			       );
    }
  }
  print $syn "</table>\n\n\n<p>\n";
  print $syn &close_menu_heading();
}


################################################################
## Report discovered motifs by algorithms
sub SynthesisMotifsByAlgo {
  &RSAT::message::TimeWarn("Synthesis per motif discovery algorithm") if ($main::verbose >= 2);
  print $syn '<p style="page-break-after:always;"></p>'; ## Page break for printing
  print $syn &open_menu_heading($menu_nb++, "<a name='motifs_by_algo'></a><h3>Discovered motifs (by algorithm)</h3>", 1);
  print $syn "\n\n\n<table class='whitebg'>\n";
  &SyntheticTableAddHeaderRow("motif discovery");
  foreach my $pattern_type (@pattern_types) {
    &SynthesisPatternDisco($pattern_type);
  }
  print $syn "</table>\n\n\n<p>\n";
  print $syn &close_menu_heading();
}

################################################################
## Synthesis for one motif discovery algorithm
## Usage:
##   &SynthesisPatternDisco($pattern_type);
## Where pattern type can be oligos, dyads, local_words, oligo-diff
sub SynthesisPatternDisco {
  my ($pattern_type) = @_;
  &RSAT::message::TimeWarn("Synthesis for pattern type", $pattern_type) if ($main::verbose >= 3);


  ################################################################
  ## Synthesize results of pattern assembly (assembly consensus + sig
  ## scores)
  my $pattern_nb = 0;
  my $asmb_or_isol = "";
#  my %pattern_info = ();
  my @pattern_info = ();

  ## Parse the assembly file to get the consensus + sig score
  if (-e $main::outfile{$pattern_type.'_asmb'}) {
    my ($asmb) = &OpenInputFile($main::outfile{$pattern_type.'_asmb'});
    while (<$asmb>) {
      next if /^#/;		## Skip header line
      next unless /\S/;		## Skip empty lines

      if (/assembly\s+\#\s+(\d+)/i) {
	## Assembly number
	$pattern_nb = $1;
	$asmb_or_isol = "assembly";
	next;

      } elsif (/Isolated patterns/) {
	## Isolated patterns at the end of the assembly file
	$asmb_or_isol = "singleton";
	next;
      }

      next if (/^;/); ## Skip comments that do not indicate assembly number or start of isolated patterns
      if ($asmb_or_isol eq "singleton") {
	$pattern_nb++;
      } elsif (!/consensus/) {
	next;
      }
      my ($word, $rc_word, $score) = split "\t";
      $pattern_info[$pattern_nb]->{type} = $asmb_or_isol;
      $pattern_info[$pattern_nb]->{word} = $word;
      $pattern_info[$pattern_nb]->{rc_word} = $rc_word;
      $pattern_info[$pattern_nb]->{score} = $score;
    }
    close $asmb;


    ## Synthesize matrix logos
    my $pattern_table = "\n\n\n<table class='whitebg' align='center' style='border-style:none;'>";

    ## Read the discovered motifs (matrices) in ordert to get their
    ## consensus for the report.
    @motifs = &ReadDiscoveredMotifs();

#    foreach my $logo_nb (0..$#pattern_info) {
#    foreach my $logo_nb (1..$#pattern_info) {
    foreach my $logo_nb (1..$main::param{matrix_nmotifs}) {

      ## Build matrix ID
      my $matrix_id = $pattern_type."_m".$logo_nb;
      $matrix_info{$matrix_id}->{pattern_type} = $pattern_type; ## Store matrix info for later use

      $pattern_table .= "\n\n<tr>\n";
#      my $logo_file = $main::outfile{$pattern_type.'_pssm_logo'.$logo_nb};
#      my $logo_file_rc = $main::outfile{$pattern_type.'_pssm_logo_rc'.$logo_nb};
#      my $start_format = "";
#      my $end_format = "";
      my $score = $pattern_info[$logo_nb]->{score};

      
      ################################################################
      ## Store the info assocuated to this motif in order to display
      ## it in the other part of the report (Discovered motifs with motif comparisons).
      $matrix_info{$matrix_id}->{score} = $pattern_info[$logo_nb]->{score};
      $matrix_info{$matrix_id}->{evalue} = sprintf("%.2g", 10**(-$pattern_info[$logo_nb]->{score}));
      $matrix_info{$matrix_id}->{logo_file} = $main::outfile{$pattern_type.'_pssm_logo'.$logo_nb};
      $matrix_info{$matrix_id}->{logo_file_rc} = $main::outfile{$pattern_type.'_pssm_logo_rc'.$logo_nb};
#      $matrix_info{$matrix_id}->{logo_file} = $pattern_info[$logo_nb]->{logo_file};
#      $matrix_info{$matrix_id}->{logo_file_rc} = $pattern_info[$logo_nb]->{logo_file_rc};
      
      ## Highlight in bold the patterns with score > 10
      if (&IsReal($score)) {
	if ($score >10) {
	  $matrix_info{$matrix_id}->{start_format} .= '<b>';
	  $matrix_info{$matrix_id}->{end_format} .= '</b>';
	  if ($score >= 75) {
	    $matrix_info{$matrix_id}->{start_format} .= "<font color='red'>";
	    $matrix_info{$matrix_id}->{end_format} = "</font>".$matrix_info{$matrix_id}->{end_format};
	  }
	}
      }

      &RSAT::message::Debug("Logo", $logo_nb, $matrix_info{$matrix_id}->{logo_file}) if ($main::verbose >= 5);


      ## Get the consensus from the assembly 
      my $consensus = $pattern_info[$logo_nb]->{word};
      my $consensus_rc = $pattern_info[$logo_nb]->{rc_word};
      $matrix_info{$matrix_id}->{consensus} = $consensus;
      $matrix_info{$matrix_id}->{consensus_rc} = $consensus_rc;

#       ## Get the consensus from the matix rather than from the assembly
#       ## THIS IS NOT WORKING, I ONLY GET THE MATRICES FROM THE FIRST MOTIF DISCOVERY PROGRAM
#       my $matrix = $motif_by_id{$matrix_id};
#       &RSAT::message::Debug("motif", $matrix_id, $matrix) if ($main::verbose >= 10);
#       if ($matrix) {
# 	$consensus = $matrix->get_attribute("consensus.IUPAC");
# 	$consensus_rc = $matrix->get_attribute("consensus.IUPAC.rc");
#       }

      my $colored_consensus = &RSAT::SeqUtil::ColorConsensus($consensus, bold=>1, iupac=>$main::param{iupac_coloring});
      my $colored_consensus_rc = &RSAT::SeqUtil::ColorConsensus($consensus_rc, bold=>1, iupac=>$main::param{iupac_coloring});
      $matrix_info{$matrix_id}->{colored_consensus} = $colored_consensus;
      $matrix_info{$matrix_id}->{colored_consensus_rc} = $colored_consensus_rc;


      $pattern_table .= "\n<td style='border-right-style:none;'>".$matrix_id."</td>\n";
      $pattern_table .= "\n<td align='right'>";
      $pattern_table .= $pattern_info[$logo_nb]->{type};
      $pattern_table .= ": ";
      #	my $seed = $pattern_info[$logo_nb]->{word};
      #	$pattern_table .= &RSAT::SeqUtil::ColorConsensus($pattern_info[$logo_nb]->{word}, bold=>1, iupac=>0);


      ## Print the colored consensus + sig + draw logo
      $pattern_table .= $colored_consensus;
      $pattern_table .= $matrix_info{$matrix_id}->{start_format};
      $pattern_table .= " (sig=".$matrix_info{$matrix_id}->{score}."; e-value=".$matrix_info{$matrix_id}->{evalue}.")";
      $pattern_table .= $matrix_info{$matrix_id}->{end_format};
      if (-e $matrix_info{$matrix_id}->{logo_file}) {
	my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $matrix_info{$matrix_id}->{logo_file});
	$pattern_table .= "<br><a  href='".$img."'><img  border='".$param{img_border}."' height='".$main::param{logo_table_height}."' src='".$img."'></a>";
      } elsif ($main::verbose >= 2) {
	&RSAT::message::Warning("Cannot find logo", $matrix_info{$matrix_id}->{logo_file}) if ($main::verbose >= 3);
      }
      $pattern_table .= "</td>\n\n";

      ## Print reverse complement of the colored consensus + draw logo
      $pattern_table .= "\n<td align='left' style='border-right-style:none;'>";
      $pattern_table .= "RC: ";
      $pattern_table .= $colored_consensus_rc;
      if (-e $matrix_info{$matrix_id}->{logo_file_rc}) {
	my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $matrix_info{$matrix_id}->{logo_file_rc});
	$pattern_table .= "<br><a  href='".$img."'><img border='".$param{img_border}."' height='".$main::param{logo_table_height}."' src='".$img."'></a>";
      } elsif ($main::verbose >= 2) {
	&RSAT::message::Warning("Cannot find reverse complementary logo", $matrix_info{$matrix_id}->{logo_file_rc}) if ($main::verbose >= 3);
      }

      #    $pattern_table .= "\n<td align='right'>".$pattern_info[$logo_nb]->{score}."</td>\n";
      $pattern_table .= "</tr>\n\n";
    }
    $pattern_table .= "</table>\n\n\n";

#    &SyntheticTableAddRow($pattern_type,
#			  $pattern_table,
#			  $pattern_type,$main::outfile{$pattern_type},
#			  "assembly",$main::outfile{$pattern_type.'_asmb'},
#			  "sig matrices",$main::outfile{$pattern_type.'_pssm_sig'},
#			  "matrices - tab format",$main::outfile{$pattern_type.'_pssm_counts'},
#			  "matrices - transfac format",$main::outfile{$pattern_type.'_pssm_tf'},
##			  "sites",$main::outfile{$pattern_type.'_pssm_sites'},
##			  "site distrib",$main::outfile{$pattern_type.'_pssm_site_distrib'},
##			  "site distrib graph",$main::outfile{$pattern_type.'_pssm_site_distrib_graph'},
#			 );
    &SyntheticTableAddRow_New($pattern_type,
			      $pattern_table,
			      "discovered words",["text",$main::outfile{$pattern_type},
			      ],
			      "assembly",['text', $main::outfile{$pattern_type.'_asmb'},
					  'sig matrices',$main::outfile{$pattern_type.'_pssm_sig'},
			      ],
			      "matrices",['tab format', $main::outfile{$pattern_type.'_pssm_counts'},
					  'transfac format',$main::outfile{$pattern_type.'_pssm_tf'},
			      ],
	);
    print $syn "\n\n", '<tr><td><p style="page-break-after:always;"></p></td></tr>', "\n\n"; ## Page break for printing

    
  } else {
    if (-e $main::outfile{$pattern_type}) {
      &SyntheticTableAddRow($pattern_type,
			    "<font color='red'red>Not a single significant pattern</font>",
			    $pattern_type,$main::outfile{$pattern_type},
	  );
    } else {
      &SyntheticTableAddRow($pattern_type,
			    "<font color='red'red>Missing files</font>",
			    $pattern_type,$main::outfile{$pattern_type},
			    "assembly",$main::outfile{$pattern_type.'_asmb'},
			    "sig matrix",$main::outfile{$pattern_type.'_pssm_sig'},
			    "matrices",$main::outfile{$pattern_type.'_pssm_counts'},
			   );
  }
  }
}

################################################################
## Synthesis of word comparisons
sub SynthesisWordCompa {
  &SyntheticTableAddHeaderRow("Word significance table");
  my $txt = "";
  if ($main::param{display_graphs}) {
    my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{merged_words_heatmap});
    $txt .= "<a  href='".$img."'><img border='".$param{img_border}."' height=300 src='".$img."'></a>",
  }


  &SyntheticTableAddRow("Word comparisons",
			$txt,
			"tab",$main::outfile{merged_words},
			"html",$main::outfile{merged_words_html},
			"heatmap",$main::outfile{merged_words_heatmap},
		       );
}

## Synthesis of motif comparisons
sub SynthesisMotifsVsMotifs {
    &SyntheticTableAddHeaderRow("Comparisons between discovered motifs");
    
    my $txt = ""; # "<p>Network files (gml, tab) can be open with CytoScape<p></n>";
    if ($main::param{display_graphs}) {
        my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{"motifs_disco_compa_png"});
        $txt .= "<a  href='".$img."'><img border='".$param{img_border}."' height=300 src='".$img."'></a>"
    }
    
    &SyntheticTableAddRow_New("Discovered motifs versus discovered motifs",
    $txt,
    "Discovered motif(s)",["tf",$main::outfile{motifs_discovered}],
    
    "clustering", [ "html",$main::outfile{"motifs_disco_clusters_html"},
    #					    "text",$main::outfile{"motifs_disco_compa_alignmeants_1ton"}
    ]
    );
}



################################################################
## Creates a separate HTML file
## with a very synthetic view of the results
sub SmallSummary {
  ## close the text file handler
  close $small_sum_txt;

  ## Convert into html file
  my $cmd = $SCRIPTS."/text-to-html ";
  $cmd .= " -i ".$main::outfile{small_summary_txt};
  $cmd .= " -no_sort ";
  $cmd .= " -o ".$main::outfile{small_summary};
  
  &RSAT::util::one_command($cmd, 1, "", log=>$main::out);
  
  ## Add the small summary to the archive
  &Archive(0, $main::outfile{small_summary}) if ($task{archive});
}



################################################################
## Delete purged sequence files after analysis has been completed.
sub CleanSequences {
  foreach my $seq_type (@seq_types) {
    &RSAT::message::TimeWarn("Cleaning sequences", $seq_type) if ($main::verbose >= 2);
    &RSAT::util::one_command("rm -f ".$main::outfile{$seq_type."_purged"}, 1, "", log=>$main::out);
  }
}


################################################################
## Read arguments
sub ReadArguments {
  &RSAT::message::TimeWarn("Reading arguments") if ($main::verbose >= 2);
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);
    ## Verbosity

=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
	$main::verbose = shift(@arguments);
      } else {
	$main::verbose = 1;
      }

=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();

=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();

=pod

=item B<-i test_seq_file>

Test peak sequence file (mandatory).

For single-set analysis, this file contains the peak sequences of the
unique set.  For test versus control analysis, it contains the test
sequences.

=cut
    } elsif ($arg eq "-i") {
      $main::infile{test_seq} = shift(@arguments);

=pod

=item B<-source sequence_source>

Enter the source of the fasta sequence file.

Supported source: galaxy

When the sequence file comes from Galaxy, peak coordinates embedded in
the fasta headers are extracted and used to convert predicted site
coordinates to genomic coordinates (in the form of a bed file), which
can then be uploaded to the UCSC genome browser as an annotation
track.

This option is incompatible with -coord.

=cut
    } elsif ($arg eq "-source") {
      my $seq_source = lc(shift(@arguments));
      if ($supported_seq_source{$seq_source}) {
	$main::param{seq_source} = $seq_source;
      } else {
	&RSAT::message::Warning($seq_source, "Invalid sequence source, will be ignored. Supported: ".$supported_seq_sources);
      }

=pod

=item B<-coord assembly peak_coordinate_file>

Specify a file in bed format indicating the coordinates of each peak .
The name of the file must be after the assembly corresponding to this
file.  Assembly are encoded in the form of uscsc (eg. mm9, hg19 ...)

Example: -coord hg19 path/to/bed/file.bed

When a coordinate file is provided, predicted site coordinates are
also converted to genomic coordinates (in the form of a bed file),
which can be uploaded to the UCSC genome browser as an annotation
track.

The 4th column of the BED file must correspond to the fasta
headers. See the documentation of the UCSC Genome Browser for the
specification of the bed format.

=cut
    } elsif ($arg eq "-coord") {
      $main::param{coord_assembly} = shift(@arguments);
      $main::infile{coord} = shift(@arguments);
       &RSAT::error::FatalError($main::param{coord_assembly}, $main::infile{coord}, "are not valid values for -coord option. Should be -coord assembly path/to/bed/file.bed")
	if ($main::infile{coord} =~ /^-/);


=pod

=item B<-ctrl control_seq_file>

Control peak sequence file (optional).

The control sequence file is used:
- as control sequence for I<oligo-diff>
- to estimate the background models for I<oligo-analysis> and
  I<dyad-analysis>.

Control sequences are supposed to contain a large number of sequences
without particular enrichment for any motif. The choice of appropriate
background sequences is crucial to detect relevant motifs.

The file should be sufficiently large (several Mb) to provide a robust
estimate of prior probabilities (frequencies expected at random) for
oligonucleotides and dyads.

Typical examples of control sequences:

- peak sequences pulled down with the same DNA-binding protein as the
  test but in a different tissue (e.g. Blow et al., 2010).

- sets of sequences pulled down in a mock experiment (without the
  antibody) and characterized by ChIP-seq or ChIP-chip.

- sets of peaks for a compendium of transcription factors different
  from the factor of interest.

- random fragments of the genome of interest
  (e.g. obtained with I<random-genome-fragments>)

=cut
    } elsif ($arg eq "-ctrl") {
      $main::infile{ctrl_seq} = shift(@arguments);


=pod

=item B<-max_seq_len msl>

Maximal sequence length. larger sequences are truncated at the
specified length around the sequence center (from -msl/2 to +msl/2).

=cut
    } elsif ($arg eq "-max_seq_len") {
      my $max_seq_len = shift(@arguments);
      &RSAT::error::FatalError($max_seq_len, "is not a valid value for max sequence length. Should be a Natural number.")
	unless ((&IsNatural($max_seq_len)));
      if ($max_seq_len > 0) {
	$main::param{max_seq_len} = $max_seq_len;
      } else {
	&RSAT::message::Info("Max seq len = 0 interpreted as no limit") if ($main::verbose >= 2);
      }

=pod

=item B<-top_peaks N>

Restrict the analysis to the N peaks at the top of the test sequence
file. Some peak calling programs return the peaks sorted by score. In
such case, the -top_peaks option allows to restrict the analysis to
the highest scoring peaks. In some cases, the top-scoring peaks might
contain a higher density of binding sites, allowing to detect motifs
with a higher significance.

This option can also be convenient for performing quick tests,
parameter selection and debugging before running the full analysis of
large sequence sets.

A value of 0 is interpreted as "no limit" (this is convenient for
setting the default value is scripts testing progressively increasing
values ot TOPS).

=cut
    } elsif ($arg eq "-top_peaks") {
      my $top_peaks = shift(@arguments);
      &RSAT::error::FatalError($top_peaks, "is not a valid value for max sequence length. Should be a Natural number.")
	unless ((&IsNatural($top_peaks)));
      if ($top_peaks > 0) {
	$main::param{top_peaks} = $top_peaks;
      } else {
	&RSAT::message::Info("Top peak number = 0 interpreted as no limit") if ($main::verbose >= 2);
      }

=pod

=item	B<-outdir output_directory>

Output directory (mandatory).

The result files and index files produced by the different programs
will be stored in this directory.

=cut
    } elsif ($arg eq "-outdir") {
      $main::dir{output} = shift(@arguments);


=pod

=item	B<-prefix output_prefix>

Prefix for the output files.

=cut
    } elsif ($arg eq "-prefix") {
      $main::param{prefix} = shift(@arguments);

=pod

=item	B<-title graph_title>

Title displayed on top of the graphs.

=cut
    } elsif ($arg eq "-title") {
      $main::param{title} = shift(@arguments);

=pod

=item	B<-img_format img_format>

Image format.

All the formats supported by XYgraph can be used.

=cut
    } elsif ($arg eq "-img_format") {
      $main::param{img_format} = shift(@arguments);

=pod

=item	B<-r_plot>

Use R rather than the Perl GD library to generate plots.  This gives
much nicer plots, but requires to have R installed on the RSAT server.

=cut
    } elsif ($arg eq "-r_plot") {
      $Rscript_path = &RSAT::server::GetProgramPath("Rscript", 0);
      unless ($Rscript_path) {
	&RSAT::message::Warning("Rscript program is not found in the path. Ignoring option -r_plot.");	
     } else {
      $main::param{r_plot} = 1;
     }

=pod

=item B<-disco oligos|dyads|positions|merged_words|meme|chipmunk>

Specify the software tool(s) that will be used for motif discovery.

Several algorithms can be specified either by using the option
iteratively:

  -disco oligos -disco dyads

or by entering a comma-separated list of algorithms:

  -disco oligos,dyads

B<Default motif discovery algorithms>

=over

=item I<oligos>

Run I<oligo-analysis> to detect over-represented oligonucleotides of a
given length (k, specified with option -l) in the test set (van Helden
et al., 1998). Prior frequencies of oligonucleotides are taken from
Markov model of order m (see option -markov) estimated from the test
set sequences themselves.

=item I<dyads>

Run I<dyad-analysis> to detect over-represented dyads, i.e. pairs of
short oligonucleotides (monads) spaced by a region of fixed width but
variable content (van Helden et al., 2000). Spaced motifs are typical
of certain classes of transcription factors forming homo- or
heterodimers.

By default, peak-motifs_quick analyzes pairs of trinucleotides with
any spacing between 0 and 20.

The expected frequency of each dyad is estimated as the product of its
monad frequencies in the sequences (option -bg monads of
dyad-analysis).

=item I<positions>

Run I<position-analysis> to detect oligonucleotides showing a
positional bias, i.e. have a non-homogeneous distribution in the peak
sequence set.

This method was initially developed to analyze termination and
poly-adenylation signals in downstream sequences (van Helden et al.,
2001), and it turns out to be very efficient for detecting motifs
centred on the ChIP-seq peaks. For ChIP-seq analysis, the reference
position is the center of each sequence.

The reference position can however be changed with the option
I<-origin> (center, start, end).

Note that I<peak-motifs> also uses I<position-analysis> for the
task B<composition>, in order to detect compositional biases (residues,
dinucleotides) in the test sequence set.

=item I<merged_words>

Extract a position-specific scoring matrix (using
I<matrix-from-patterns>) from all the words discovered by the selected
string-based motif disovery algorithms (oigos,dyads,positions and/or
local_words).

=back

=cut
    } elsif ($arg eq "-disco") {
      my @requested_discos = split ",", shift (@arguments);
      foreach my $disco (@requested_discos) {
	next unless $disco;
	if ($supported_disco{$disco}) {
	  $disco{$disco} = 1;
	} else {
	  &RSAT::error::FatalError("Motif discovery algorithm '$disco' is not supported. \n\tSupported: $supported_discos");
	}
      }




=pod

=item B<-task>

Specify a subset of tasks to be executed.

By default, the program runs all necessary tasks. However, in some
cases, it can be useful to select one or several tasks to be executed
separately.

Beware: task selection requires expertise, because most tasks depends
on the prior execution of some other tasks in the workflow. Selecting
tasks before their prerequisite tasks have been completed will provoke
fatal errors.

B<Default tasks>

=over

=item I<all> (default)

Run all the default tasks.

=item I<purge>

Purge sequences (test set and, if specified, control set) to mask
redundant fragments before applying pattern discovey
algorithms. Sequence purging is necessary because redundant fragments
would violate the hypothesis of independence underlying the binomial
significance test, resulting in a large number of false positive
patterns.

=item I<seqlen>

Compute sequence lengths and their distribution.

Sequence lengths are useful for the negative control (selection of
random genome fragments).

Sequence length distribution is informative to get an idea about the
variability of peak lengths.

=item I<composition>

Compute compositional profiles, i.e. distributions of residues and
dinucleotide frequencies per position (using I<position-analysis>).

Residue profiles may reveal composition biases in the neighborhood of
the peak sequences. Dinucleotide profiles can reveal (for example) an
enrichment in CpG island.

Note that I<peak-motifs> also runs I<position-analysis> with
larger oligonucleotide length (see option -l) to detect motifs on the
basis of positionally biased oligonucleotides (see task B<positions>).


=over

=item Formating of the reference motif

Perform various format conversion for the reference motif (compute
parameters, consensus, logo).

=item Motif enrichment

Generate an enriched motif by scanning the peak sequence set with the
reference motif.

=back

=item I<disco>

Run the motif discovery algorithms. See option -disco for the
selection of motif discovery algorithm(s).

=item I<meme>

Run the motif discovery program MEME on the test sequences.

B<Beware>: the complexity of MEME is quadratic: the computing time
increases as the square of sequence size. It is thus not recommended
to use MEME for data sets exceeding 1Mb. If the sequence set contains
many peaks, the option -task meme can be combined with a restriction
on the number of top peaks to be considered (e.g. -top_peaks 500).

MEME reference: Bailey, T. L. and Elkan, C. (1994). Fitting a mixture
model by expectation maximization to discover motifs in
biopolymers. Proc Int Conf Intell Syst Mol Biol 2, 28-36.

=item I<chipmunk>

Run the motif discovery program ChIPMunk.

ChIPMunk reference: Kulakovskiy, I. V., Boeva, V. A., Favorov,
A. V. and Makeev, V. J. (2010). Deep and wide digging for binding
motifs in ChIP-Seq data. Bioinformatics 26, 2622-3.

=item I<merge_words>

Merge the words (oligos or dyads) discovered by the different
string-based motif discovery algorithms.

The table of merged words has one row per word (oligo or dyad) and one
column per motif discovery program. This table is convenient to
analyze the consistency between the words detected by different
approaches, e.g. show that a word is both over-represented
(oligo-analysis, dyad-analysis) and positionally biased
(position-analysis, local-words). A heatmap is also exported to
provide a graphical representation of the significance of each word
(row) for each algorthm (column).

The merged words can optionally be used as seeds for extracting
position-specific scoring matrices from the sequences, using the
program I<matrix-from-patterns> (see option -disco merged_words).

=item I<timelog>

Generate a log file summarizing the time spent in the different tasks.

=item I<synthesis>

Generate the HTML file providing a synthesis of the results and
pointing towards the individual result files.

=back

B<Extra tasks>

A few extra tasks are available, which are not executed by default. Those
tasks are executed only when they are explicitly invoked with the option
I<-task>, they are not called with the option "-task all".

=over

=item I<clean_seq>

Delete the purged sequence files after the analysis, in order to save
space.


=item I<meme_bg>

Compute meme background model from the test sequences.

=back

=cut
    } elsif ($arg eq "-task") {
      my @requested_tasks = split ",", shift (@arguments);
      foreach my $task (@requested_tasks) {
	next unless $task;
	if ($supported_task{$task}) {
	  $task{$task} = 1;
	} else {
	  &RSAT::error::FatalError("Task '$task' is not supported. \n\tSupported: $supported_tasks");
	}
      }

=pod

=item B<-nmotifs max_motif_number>

Maximal number of motifs (matrices) to return for each motif discovery
algorithm. Note the distinction between the maximal number of motifs
(matrices) and the maximum number of patterns (words, dyads): a motif
generally corresponds to a set of several mutually overlapping patterns (dyads,
words).

=cut
    } elsif ($arg eq "-nmotifs") {
      $main::param{matrix_nmotifs} = shift (@arguments);
      $main::param{dreme_m} = $main::param{matrix_nmotifs};
      $main::param{meme_nmotifs} = $main::param{matrix_nmotifs};
      &RSAT::error::FatalError($main::param{matrix_nmotifs}, "Invalid value for max number of motifs. Should be a Natural number")
	unless (&IsNatural($main::param{matrix_nmotifs}));

=pod

=item B<-maxpat max_pattern_number>

Maximal number of patterns (words) to return for each motif discovery
algorithm. Note the distinction between the maximal number of patterns
(words,dyads) and the maximum number of motifs (matrices): a motif
generally corresponds to a set of several mutually overlapping
patterns (dyads, words).

=cut
    } elsif ($arg eq "-maxpat") {
      $main::param{patterns_max_rank} = shift (@arguments);
      &RSAT::error::FatalError($main::param{patterns_max_rank}, "Invalid value for max number of patterns. Should be a Natural number")
	unless (&IsNatural($main::param{patterns_max_rank}));

=pod

=item B<-minol oligo_min_len>

=item B<-maxol oligo_max_len>

Minimal (-minol) and maximal (-maxol) lengths of oligonucleotide for
word-counting approaches (oligo-analysis, position-analysis,
local-word-analysis, oligo-diff, dreme).

In our experience, optimal results are obtained with hexanucleotides
and heptanucleotides.

Note: the monad length used for dyad-analysis is not affected by those
options. Instead it is fixed to to 3. Indeed, dyad-analysis
automatically detects larger motifs by sampling various spacings
between the two trinucleotide monads.

=cut
    } elsif ($arg eq "-l") {
      my $oligo_len = shift (@arguments);
      $main::param{oligo_min_len} = $oligo_len;
      $main::param{oligo_max_len} = $oligo_len;
      $main::param{dreme_mink} = $main::param{oligo_min_len};
      $main::param{dreme_maxk} = $main::param{oligo_max_len};
      &RSAT::message::Warning("The option -l is obsolete, please use the options -minol and -maxol instead.");

    } elsif ($arg eq "-minol") {
      $main::param{oligo_min_len} = shift (@arguments);
      $main::param{dreme_mink} = $main::param{oligo_min_len};
    } elsif ($arg eq "-maxol") {
      $main::param{oligo_max_len} = shift (@arguments);
      $main::param{dreme_maxk} = $main::param{oligo_max_len};


=pod

=item B<-merge_lengths>

=item B<-no_merge_lengths>

These two options define whether oligonucleotides of different lengths
should be merged (used algtogther) for assembly (I<pattern-assembly>)
and matrix building (I<matrix-from-patterns>).

Default: -merge_lengths

The program I<oligo-analysis>, I<position-analysis> and
I<local-word-analysis> can run with different oligonucleotide sizes
(e.g. 6, 7, 8), which generally reveal redundant fragments of larger
motifs. 

When the option I<-no_merge_lengths> is active, matrices are built
separately for each oligo length. However, this generally leads to
redundant motifs, which were detected separately as 6-mers and
7-mers. Separate assembly was the initial mode of assembly
since the original publication of the method (2011). 

The option I<-merge_lengths> was implemented in December 2013.

=cut

  } elsif ($arg eq "-merge_lengths") {
    $main::param{merge_lengths} = 1;

  } elsif ($arg eq "-no_merge_lengths") {
    $main::param{merge_lengths} = 0;


=pod

=item B<-markov>

Order of the Markov model used to estimate expected oligonucleotide
frequencies for I<oligo-analysis> and I<local-word-analysis>.

B<Note:> the option I<-markov> only applies to motif
discovery. Sequence scanning is performed with a different Markov
order, because the scanning time strongly increases with Markov order,
whereas the computing time for I<oligo-analysis> and
I<local-word-analysis> is independent of the Markov order. The Markov
order for sequence scanning (site prediction and enrichment analysis)
can be specified with the option I<-scan_markov>.

Higher order Markov models are more stringent, lower order are more
sensitive, but tend to return a large number of false positives.

Markov models can be specified with either a positive or a negative
value. Positive value indicate the length of the prefix in the
transition matrix. Negative value indicate the order of the Markov
model relative to the oligonucleotide length. For example, the option
-markov -2 gives a model of order m=k-2 (thus, an order 5 for
heptanucleotides, an order 4 for hexanucleotides).

The optimal Markov order depends on the number of sequences in the
test set. Since ChIP-seq data typically contain hundreds to thoursands
of peaks, high Markov orders are generally good, because they are
stringent and still sensitive enough.  In our experience, motifs are
well detected with the most stringent Markov order (-markov -2).

=item B<-min_markov min_markov_order>

=item B<-max_markov max_markov_order>

A miminal and a maximal value can be specified for the Markov
order. The program then iterates over all markov values between
min_markov_order and max_markov_order.


=cut
    } elsif ($arg eq "-markov") {
      $main::param{oligo_min_mkv} = $main::param{oligo_max_mkv} = shift (@arguments);
      if ($main::param{oligo_min_mkv} eq "auto") {
	$main::auto_markov = 1;
      } else {
	&RSAT::error::FatalError($main::param{oligo_min_mkv}, "Invalid value for Markov order. Should be an Integer")
	  unless (&RSAT::util::IsInteger($main::param{oligo_min_mkv}));
      }
    } elsif ($arg eq "-min_markov") {
      $main::param{oligo_min_mkv} = shift (@arguments);
      if ($main::param{oligo_min_mkv} eq "auto") {
	$main::auto_markov = 1;
      } else {
	&RSAT::error::FatalError($main::param{oligo_min_mkv}, "Invalid value for min Markov order. Should be an Integer")
	  unless (&RSAT::util::IsInteger($main::param{oligo_min_mkv}));
      }
    } elsif ($arg eq "-max_markov") {
      $main::param{oligo_max_mkv} = shift (@arguments);
      if ($main::param{oligo_max_mkv} eq "auto") {
	$main::auto_markov = 1;
      } else {
	&RSAT::error::FatalError($main::param{oligo_max_mkv}, "Invalid value for max Markov order. Should be an Integer")
	  unless (&RSAT::util::IsInteger($main::param{oligo_max_mkv}));
      }

=pod

=cut
    } elsif ($arg eq "-scan_markov") {
      $main::param{scan_markov} = shift (@arguments);
      &RSAT::error::FatalError($main::param{scan_markov}, "Invalid value for scanning Markov order. Should be an Integer")
	unless (&RSAT::util::IsInteger($main::param{scan_markov}));

=pod

=item B<-1str | -2str>

Single-strand (-1str) or double-strand (-2str) analysis.

The default is double-strand analysis (-2str), since ChIP-seq results
have no particular strand orientation.

However, for some data types such as CLIP-seq it might be relevant to
analyze single-strand motifs (option -1str).

=cut
    } elsif ($arg eq "-1str") {
      $main::param{strand} = "-1str";

    } elsif ($arg eq "-2str") {
      $main::param{strand} = "-2str";

=pod

=item B<-noov | -ovlp>

Treatment of self-overlapping words for motif discovery: count (-ovlp)
or do not count (-noov) overlapping occurrences. In -noov mode, only
renewing occurrences are counted.

It is recommended to use the -noov mode (default) to avoid the effect
of self-overlap, which violates the hypothesis of independence of
successive occurrences underlying the binomial significance test
(oligo-analysis, dyad-analysis).

B<Beware>: the options -noov and -ovlp only apply to motif discovery,
and not to compositional profiles. Dinucleotide frequencies are always
computed with the option -ovlp (count all occurrences), to avoid weird
effect. Since those composition profiles further serve to estimate the
probability of larger words, which may include repeated residues, we
need to count all dinucleotide occurrences. Indeed with the -noov mode
(renewing occurrences only), the transition tables of the first order
Markov model would be unbalanced: the expected frequency of all the
repeated dinucleotides (AA, TT, CC, GG) would be under-estimated,
leading to an under-estimation of the expected frequency of
repeat-containing words (e.g. AAAAAA, AAAGGG, ...).

=cut
    } elsif ($arg eq "-noov") {
      $main::param{disco_noov} = "-noov";
    } elsif ($arg eq "-ovlp") {
      $main::param{disco_noov} = "-ovlp";

=pod

=item B<-under>

Allows to search for under-represented motifs instead of over-represented. Only compatible with oligo-analysis.

=cut
    } elsif ($arg eq "-under") {
      $main::param{under_represented} = 1;

=pod

=item B<-ci class_interval>

Class interval for I<position-analysis>.

=cut

    } elsif ($arg eq "-ci") {
      $main::param{profiles_ci} = shift(@arguments);
      &RSAT::error::FatalError($main::param{profiles_ci}, "is not a valid value for class interval. Should be a strictly positive Natural number.")
	unless ((&IsNatural($main::param{profiles_ci})) && ($main::param{profiles_ci} > 0));

=pod

=item B<-origin center|start|end>

Default: center

Reference point for position-analysis and sequence scanning. By
default, peak-motifs aims at analyzing peaks from ChIP-seq and related
experiments. The peak center is thus the default origin (position
0). However, for some particular purposes it might be relevant to take
the start or the end of the sequences as position 0. For example, to
analyze promoters the end of the sequence generally serves as
reference position and coordinates are displayed as negative numbers.

=cut

    } elsif ($arg eq "-origin") {
      $main::param{origin} = shift(@arguments);

=pod

=item B<-offset offset_value>

Add an offset to site positions. The offset value must be an Integer
number (positive, null or negative). This option allows to select an
arbitrary position as origin.

=cut

    } elsif ($arg eq "-offset") {
       $main::param{offset} = shift(@arguments);
        &RSAT::error::FatalError($main::param{offset}, "Invalid value for the offset. Should be an Integer number") unless (&RSAT::util::IsInteger($main::param{offset}));

    ## Other parameters are not accepted
    } else {
      &RSAT::error::FatalError("Invalid option", $arg);

    }

  }

=pod

=back

=cut

}

################################################################
## Check arguments
sub CheckArguments {
  &RSAT::message::TimeWarn("Checking arguments") if ($main::verbose >= 2);

  ################################################################
  ## Test sequence file
  if ($infile{test_seq}) {
    unless (-e $infile{test_seq}) {
      &RSAT::error::FatalError("Test sequence file does not exist", $infile{test_seq});
    }
    @main::seq_types = ("test");

    ## Check that the test file contains fasta-formatted sequences.
    ## This is important because some users of the Web site seem to
    ## submit coordinates (bed filesà) rather than sequences (fasta
    ## files).
    &RSAT::message::Debug("Checking fasta format for test sequence file", $infile{test_seq}) if ($main::verbose >= 2);
    my $fasta_header_lines = &CheckFastaFile($infile{test_seq}, $check_fasta_top);

    &RSAT::message::TimeWarn($fasta_header_lines, "fasta headers in the top", $check_fasta_top, "lines") if ($main::verbose >= 3);
    &RSAT::error::FatalError("Test sequence file does not contain fasta headers (checked the ".$check_fasta_top." top lines)",
			     $infile{test_seq}) unless ($fasta_header_lines);

  } else {
    &RSAT::error::FatalError("You must define the test sequence set (option -i)");
  }


  ################################################################
  ## Control file

  ## If control file has been specified, check that it exists
  if ($infile{ctrl_seq}) {
    unless (-e $infile{ctrl_seq}) {
      &RSAT::error::FatalError("Control sequence file does not exist", $infile{ctrl_seq});
    }

    ## Check that the ctrl file contains fasta-formatted sequences.
    ## This is important because some users of the Web site seem to
    ## submit coordinates (bed filesà) rather than sequences (fasta
    ## files).
    my $fasta_header_lines = &CheckFastaFile($infile{ctrl_seq}, $check_fasta_top);
    &RSAT::message::TimeWarn($fasta_header_lines, "fasta headers in the top", $check_fasta_top, "lines") if ($main::verbose >= 3);
    &RSAT::error::FatalError("Control sequence file does not contain fasta headers (checked the ".$check_fasta_top." top lines)",
			     $infile{ctrl_seq}) unless ($fasta_header_lines);

    push @main::seq_types, ("ctrl");
  }

  ## Control file is required for oligo-diff
  if ($disco{oligo_diff}) {
    unless ($infile{ctrl_seq}) {
      &RSAT::error::FatalError("The option -disco oligo_diff requires to specify a control sequence file (option -ctrl)");
    }
  }
  
  ## if control file, only keep the differential motifs
  ## for the moment, we will only keep oligo_diff, oligo-analysis and dyad-analysis and dreme
  if ($infile{ctrl_seq}) {
  	my %disco_ctrl = ();
  	$disco_ctrl{dyads} = 1 if ($disco{dyads});
  	$disco_ctrl{oligos} = 1 if ($disco{oligos});
  	$disco_ctrl{oligo_diff} = 1 if ($disco{oligo_diff});
  	$disco_ctrl{oligo_diff} = 1 if ($disco{dreme});
  	%disco = %disco_ctrl;
  }	

  ################################################################
  ## Check if the scanning markov order (m) is higher than 2 (in which
  ## case, add corresponding oligo length (k=m+1) for the computation
  ## of oligo compositions).
  $main::param{scan_bg_ol} = $main::param{scan_markov}+1;
  if ($main::param{scan_bg_ol} > 2) {
    push @composition_oligo_lengths, $main::param{scan_bg_ol};
  }

  ################################################################
  ## Output directory
  if ($main::dir{output}) {
    &RSAT::util::CheckOutDir($main::dir{output}, "", 755);
    push @outdir, "output";
  } else {
    &RSAT::error::FatalError("You must define the output directory (option -outdir)");
  }

  ## Prefix
  unless ($main::param{prefix}) {
    &RSAT::error::FatalError("You must define a prefix for the output files (option -prefix)");
  }

  ## Title
  unless ($main::param{title}) {
    $main::param{title} = $main::param{prefix};
  }

  ## Log files
  $main::outfile{log} = &OutFileName("reports", ".txt", "log");
  $main::outfile{links} = &OutFileName("reports", ".html", "links");
  $main::outfile{timelog} = &OutFileName("reports", ".txt", "timelog");
  $main::outfile{timelog_html} = &OutFileName("reports", ".html", "timelog");
  $main::outfile{timetable} = &OutFileName("reports", ".txt", "timetable");
  $main::outfile{archive} = &OutFileName("", ".".$main::param{archive_format}, "archive");

  ## Synthesis file
  $main::outfile{synthesis} = &OutFileName("", ".html", "synthesis");

  ## Conditions under which progressive synthesis is not required
  $progressive_synthesis = 0 unless ($task{synthesis});
  if ((scalar(keys(%task)) == 1) && ($task{synthesis})) {
    $progressive_synthesis = 0;
  }

  ## Small summary
  $main::outfile{small_summary_txt} = &OutFileName("reports", ".txt", "small_summary");
  $main::outfile{small_summary} = &OutFileName("", ".html", "small_summary");
	

  ################################################################
  ## Specify scanning options after having read command-line arguments

  ## Concatenate scanning options for matrix-scan
  $main::param{scan_options} = " -pseudo ".$main::param{scan_pseudo};
  $main::param{scan_options} .= " ".$main::param{strand};
  $main::param{scan_options} .= " -decimals ".$main::param{scan_decimals};
  $main::param{scan_options} .= " -bg_pseudo ".$main::param{scan_bg_pseudo};
  $main::param{scan_options} .= " -n score"; ##  Assign a score to the sequence fragments containing masked/undefined nucleotides (N)

  ## Options used for site scanning but not for enrichment analysis
  $main::param{scan_sites_options} = " -lth score ".$main::param{scan_min_score};
  $main::param{scan_sites_options} .= " -origin ".$main::param{origin};
  $main::param{scan_sites_options} .= " -offset ".$main::param{offset};
  $main::param{scan_sites_options} .= " -return sites";

  ## Options used for enrichment in matrix hits but not for site detection
  $main::param{scan_enrich_options} = " -lth score ".$main::param{scan_enrich_min_score};
  $main::param{scan_enrich_options} .= " -bginput -markov ".$main::param{scan_markov};

  &RSAT::message::Debug($param{strand}, $param{scan_options}) if ($main::verbose >= 3);
    
  ## Modalities of motif comparisons
  if ($task{motif_compa}) {
    $task{merge_motifs} = 1;
    #   $task{cluster_motifs} = 1;
    $task{collect_motifs} = 1;
    }

  ## The task collect_motifs regroups merge_motifs and split_motifs
  if ($task{collect_motifs}) {
    $task{merge_motifs} = 1;
    $task{split_motifs} = 1;
  }

  
  ################################################################
  ## Check tasks

  ## If all tasks are requested or if no task is defined, execute all
  ## tasks.
  if ((scalar(keys(%task)) == 0) || ($task{all})) {
    foreach my $task (@default_tasks) {
      $task{$task} = 1;
    }

    
  }

  ## If no discovery algorithms has been specified, run the default ones
  if ((scalar(keys(%disco)) == 0) || ($disco{all})) {
    foreach my $disco (@default_discos) {
      $disco{$disco} = 1;
    }

    ## Adapt selection of discovery algorithms depending on whether a control set has been provided
#     if (defined($main::infile{ctrl_seq})) {
#       delete($disco{oligos});
#       delete($disco{dyads});
#       delete($disco{positions});
#       delete($disco{local_words});
#     } else {
#       delete($disco{oligo_diff});
#     }
  }


  ################################################################
  ## Dependencies between tasks

  ## If motif discovery runs, the tasks that depend on it must re-run
## I hesitate to redo all the subsequent tasks, this would somewhat loose the advantage of the option -task
#  if ($task{disco}) {
#      $task{merge_words} = 1;
#      $task{collect_motifs} = 1;
#
#      $task{motifs_vs_ref} = 1;
#      $task{motifs_vs_db} = 1;
#      $task{scan} = 1;
#  }


  ## Dependency between task merge_words and motif discovery type merged_words
  if (($task{disco}) && ($disco{merged_words})) {
      $task{merge_words} = 1;
  }
  
  if ($task{small_summary}) {
  	  $task{synthesis} = 1;
  }

  ## Prepare a list of the requested tasks (by order of execution)
  foreach my $task (@supported_tasks) {
   push (@tasks, $task) if $task{$task};
  }
  &RSAT::message::Info("Tasks: ", join (",", @tasks)) if ($main::verbose >= 2);

}


################################################################
## Report parameters and input/output files.
##
## This has to be executed after the processing because the verbose
## includes a list of all result files, and the motif list depend on
## the number of motifs returned by the different algorithms.
sub ReportParamsAndFiles {

  ## Print the command line
  print $syn "<pre>";
  print $syn "<b>Command:</b>  peak-motifs_quick ";
  &PrintArguments($syn, 1);
  print $syn "</pre>";

  ## Open HTML file for links
  local $links = &OpenOutputFile($main::outfile{links});
  print $links &PrintHtmlResultHeader(program=>"peak-motifs_quick", "title"=>$main::param{title}, "result_toc"=>0);
#  &PrintHtmlResultHeader($links);

  ## Read the list of directories and files for the discovered motifs
  &ReadMotifTable();

  print $links "<p>\n\n\n<table class='sortable'>\n";

  print $main::out "; peak-motifs_quick ";
  &PrintArguments($main::out, 1);
  printf $main::out "; %-22s\t%s\n", "Program version", $program_version;

  ## Parameter values
  print $main::out "; Parameter values\n";
  foreach my $param_name (@param_list) {
    print $main::out sprintf ";\t%-22s\t%s\n", $param_name, $param{$param_name};
  }

  print $out &PrintThresholdValues();

  ## Input file(s)
  if (%main::infile) {
    print $main::out "; Input files\n";
    print $links "\n\n<tr>\n<th colspan=2>","Input files","</th></tr>\n";

#    while (my ($key,$file) = each %main::infile) {
    foreach my $key (sort keys  %main::infile) {
      my $file = $main::infile{$key};
      printf $main::out ";\t%-22s\t%s\n", $key, $file; ## Text output
      &PrintFileLink($key, $file, $main::outfile{links}, $links); ## HTML output
    }
  }

  ## Directories
  if (scalar(@outdir) > 0) {
    print $main::out "; Directories\n";
    print $links "\n\n<tr>\n<th colspan=2>","Directories","</th></tr>\n";
    foreach my $key (@outdir) {
      my $dir = $main::dir{$key};
      $dir .= "/";
      $dir =~ s|/+|/|g;
      printf $main::out ";\t%-30s\t%s\n", $key, $dir; ## Text output
      &PrintFileLink($key, $dir, $main::outfile{links}, $links); ## HTML output
    }
  }

  ## Output files
  if (%main::outfile) {
    print $main::out "; Output files\n";
    print $links "\n\n<tr>\n<th colspan=2>","Output files","</th></tr>\n";
    foreach my $key (sort keys  %main::outfile) {
      my $file = $main::outfile{$key};
      printf $main::out ";\t%-30s\t%s\n", $key, $file; ## Text output
      &PrintFileLink($key, $file, $main::outfile{links}, $links); ## HTML output
    }
  }

  ## Close the links file
  print $links "</table>\n\n\n";
  print $links "</body>\n\n";
  print $links "</html>\n";
  close $links;
}

=pod

=head1 REFERENCES

Description and assessment of I<peak-motifs>

=over

Thomas-Chollier, M., Herrmann, C., Defrance, M., Sand, O., Thieffry,
D. & van Helden, J. (2011). RSAT peak-motifs: motif analysis in
full-size ChIP-seq datasets. Nucleic Acids Res.

=back

The program I<peak-motifs> combines a series of tried-and-tested
programs which have been detailed in the following publications.

=over

=item I<oligo-analysis>

van Helden, J., Andre, B. and Collado-Vides, J. (1998). Extracting
regulatory sites from the upstream region of yeast genes by
computational analysis of oligonucleotide frequencies. J Mol Biol 281,
827-42.

=item I<dyad-analysis>

van Helden, J., Rios, A. F. and Collado-Vides, J. (2000). Discovering
regulatory elements in non-coding sequences by analysis of spaced
dyads. Nucleic Acids Res 28, 1808-18.

=item I<position-analysis>

van Helden, J., del Olmo, M. and Perez-Ortin,
J. E. (2000). Statistical analysis of yeast genomic downstream
sequences reveals putative polyadenylation signals. Nucleic Acids Res
28, 1000-10.

=item I<matrix-scan>

Turatsinze, J. V., Thomas-Chollier, M., Defrance, M. and van Helden,
J. (2008). Using RSAT to scan genome sequences for transcription
factor binding sites and cis-regulatory modules. Nat Protoc 3,
1578-88.

=item I<matrix-quality>

Medina-Rivera, A., Abreu-Goodger, C., Thomas-Chollier, M., Salgado,
H., Collado-Vides, J. and van Helden, J. (2011). Theoretical and
empirical quality assessment of transcription factor-binding
motifs. Nucleic Acids Res 39, 808-24.

=back

=head1 SEE ALSO

=over

=item I<oligo-analysis>

=item I<dyad-analysis>

=item I<position-analysis>

=item I<matrix-scan>


=back

=head1 WISH LIST


=over

=item B<site treatment>

Add an option to include or not sites in the Transfac-formatted
PSSM.

If included by default, they can take a lot of disk space for large
sequence files, and they are partly redundant with sites resulting
from further scanning (which can be asked or not). For site scanning,
we should also have the possibility either to store the sites, or to
just calculate the distribution, coverage etc, and then clean the
sites. Maybe add an option "-clean_sites" (but then we need to treat
separately TRANSFAC sites) ? Or have two separate options for
computing motif positional distrib+ coverage and for exporting sites,
resp.


=item B<-return explanation>

Print guidelines for explaining the motifs, comparison tables and
graphs at the bottom of the result page.


=item B<motif_cluster>

Compare all discovered motifs (plus reference motif if specified) and
cluster them in order to extract a consensus motif.


=item B<weeder>

Add a task to run Weeder on the peak sequences.

 weederlauncher.out input organism large S M T5


=item B<matrix_quality>

Run I<matrix-quality> to evaluate the enrichment of each discovered
motif in the test sequences, by comparing its score distribution with
the theoretical distribution. The motif-specific character of the
enrichment is further assessed by computing score distribution with
randomized matrices generated by permuting the columns of the
discovered motif. A "good" motif should show a clear enrichment (more
high-scoring hits than expected theoretically), whereas the
column-permuted matrices should fit the theoretical distribution.


=item B<all_oligos>

Run I<oligo-analysis> without any threshold in order to produce a plot of
observed versus expected occurrences for all the
oligonucleotides. This analysis is performed with the option
-two_tails, which detects both under- and over-represented patterns.


=item B<in the full HTML report>

=over

=item link to positin-analysis

Add a ink from the result page to the link table returned by
position-analysis (file *_graph_index.html).

=item sig in report per motif

The significance of the motif should be added to the report per motif.

=back

=back


=cut

__END__
