#!/usr/bin/perl -w

############################################################
#
# $Id: peak-motifs,v 1.176 2011/02/27 06:16:52 jvanheld Exp $
#
############################################################


## use strict;

=pod

=head1 NAME

peak-motifs

=head1 VERSION

$program_version

=head1 DESCRIPTION

Pipeline for discovering motifs from ChIP-seq (or ChIP-chip, or
ChIP-PET) peak sequences.

=head1 AUTHORS

=over

=item Jacques van Helden <Jacques.van.Helden@ulb.ac.be>

Conception and implementation of the work flow + testing.

=item Morgane Thomas-Chollier <thomas-c@molgen.mpg.de>

Conception of the work flow + output layout + Web interface + testing.

=item Matthieu Defrance <defrance@ccg.unam.mx>

Implementation of the efficient algorithms used in the work flow
(I<count-words>, I<matrix-scan-quick>, I<local-word-analysis>).

=item Olivier Sand <oly@bigre.ulb.ac.be> for the Web services

Web services.

=item Carl Herrmann <carl.herrmann@univmed.fr> and Denis Thieffry
<thieffry@tagc.univ-mrs.fr>

Analysis of the case studies. Definition of optimal conditions of
utilization. Motif comparisons and clustering.

=back


=head1 CATEGORY

Motif discovery

=head1 USAGE

peak-motifs [-i inputfile] [-o outputfile] [-v #] [...]

=head1 INPUT FORMAT

The program takes as input either one (test) or two sequence files
(test versus control).

All input sequences formats supported by convert-sequences are
supported.

=head1 OUTPUT FORMAT

The pipeline runs a series of programs generating each one or several
result file. An HTML index is generated in order to synthesize the
results and give access to the individual result files.

The index file is formed from the output directory (option -outdir)
and the file prefix (option -prefix).

  [output_dir]/[prefix]_synthesis.html

=cut


BEGIN {
    if ($0 =~ /([^(\/)]+)$/) {
	push (@INC, "$`lib/");
    }
}
require "RSA.lib";
require "RSA.disco.lib";
require "footprint.lib.pl";
use RSAT::util;
use RSAT::MatrixReader;
use Data::Dumper;
use File::Basename;

################################################################
## Main package
package main;
{

  ################################################################
  ## Initialize parameters
  local $start_time = &RSAT::util::StartScript();
  local $die_on_error = 1;

  ## Check that the RSAT paths of the programs required for the script are specified
  $PYTHON =  $ENV{RSAT}."/python_scripts" unless ($PYTHON);
  $SCRIPTS = $ENV{RSAT}."/perl-scripts" unless ($SCRIPTS);
  $BIN = $ENV{RSAT}."/bin" unless ($BIN);

  $program_version = do { my @r = (q$Revision: 1.176 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  #    $program_version = "0.00";

  %main::param = ();
  @main::param_list = ();

  %main::dir = ();
  @main::outdir = (); ## Directories to report in the log file
  %main::infile = ();
  %main::outfile = ();

  $main::verbose = 0;
  $main::out = STDOUT;

  ## Sequence types (can be eiter 'test' or 'test' + 'ctrl')
  @seq_types = ();

  ## Pattern types (oligos, positions, local-word-analysis,... with the oligo length suffix)
  @pattern_types = ();
  @patterns_to_merge = ();

  ## This is a bit tricky: I first called "timelog" the time
  ## measurements taken from RSAT programs. Then I implemented a
  ## systematic way to measure the exectution time of a task with the
  ## Unix time command, which allows me to apply to third-party
  ## algorithms as well. I should see later if I suppress all the
  ## timelog stuff.
  @timetable_keys = ();
  @timelog_keys = ();


  ################################################################
  ## Supported tasks
  @default_tasks = ("purge",
		    "seqlen",
		    "composition",
		    "ref_motifs",
		    "disco",
		    "merge_words",
		    "merge_motifs",
		    "split_motifs",
		    "collect_motifs",
#		      "cluster_motifs",
		    "motifs_vs_ref",
		    "motifs_vs_db",
#		      "motif_compa",

		    "scan",

		    "timelog",
		    "synthesis",
		    "archive",
		    );

  ## Extra tasks are only executed on explicit demand of the user
  @extra_tasks = ("all",
		  "clean_seq",
		  "meme_bg",
		  "meme",
		  "chipmunk",
		 );

  ## All the supported tasks
  @supported_tasks = (@default_tasks, @extra_tasks);
  $supported_tasks = join ",", @supported_tasks;
  %supported_task = ();
  foreach my $task (@supported_tasks) {
    $supported_task{$task} = 1;
  }
  %task = ();  ## List of tasks to be executed


  ################################################################
  ## Supported motif discovery algorithms
  @default_discos = ("oligos",
		     "positions",
		    );

  ## Extra tasks are only executed on explicit demand of the user
  @extra_discos = ("dyads",
		   "local_words",
		   "local_dyads",
		   "oligo_diff",
		   "merged_words",
## Still to be treated: include the meme and chipmunk results in the HTML synthesis ?
		   "meme",
		   "chipmunk",
		  );

  ## All the supported motif discovery algorithms
  @supported_discos = (@default_discos, @extra_discos);
  $supported_discos = join ",", @supported_discos;

  %supported_disco = ();
  foreach my $disco (@supported_discos) {
    $supported_disco{$disco} = 1;
  }
  %disco = (); ## List of motif discovery algorithms to run


  ################################################################
  ## Supported sequence sources
  local %supported_seq_source = ("galaxy"=>1);
  local @supported_seq_sources = sort (keys (%supported_seq_source));
  local $supported_seq_sources = join( ",", @supported_seq_sources);

  ## Motif databases
  @motif_databases = ();
  @motif_db_format = ();

  ## Options and suffix for MEME (must be global variables)
  local $meme_options = "";
  local $meme_suffix = "";

  ## Options and suffix for ChIPMunk  (must be global variables)
  local $chipmunk_options = "";
  local $chipmunk_suffix = "";

  ## Discovered motifs
  local @motifs; ## list of discovered motifs for the synthesis
  local $motifs_read = 0; ## flag indicating if the motifs have been read (since the method ReadDiscoveredMotifs may be called from several places

  ## Those variables need to be global for motif post-processing and indexing
  local @motif_ids;
  local @motif_dirs;
  local @motif_prefixes;
  local @motif_files;
  local $motif_nb;

  local $progressive_synthesis = 1;
  local $syn;
  local $synthesis_path;
  local $menu_nb = 100;


  ################################################################
  ## Set default parameters
  &DefaultParameters();

  ################################################################
  ## Read argument values
  &ReadArguments();

  &CheckArguments();

  &SetOutFileNames();

  ################################################################
  ## Open output stream
  $main::out = &OpenOutputFile($main::outfile{log});

  &OpenSynthesis() if ($progressive_synthesis);

  ## Sequence composition
  foreach my $seq_type (@seq_types) {
    &PurgeSeq($seq_type) if ($task{purge});
    &SequenceLengths($seq_type) if ($task{seqlen});
    &CompositionProfiles($seq_type) if ($task{composition});
  }
  &SynthesisSequenceComposition() if ($progressive_synthesis);


  ## Control sequences
  if ($main::infile{ctrl_seq}) {
    &ComputeBgModels($main::infile{ctrl_seq});
    &OligoDiff() if ($task{oligo_diff});
  }

  ## Reference motifs
  if (defined($main::infile{ref_motifs})) {
    &RefMotif() if ($task{ref_motifs});
  }
  &SynthesisRefMotif() if ($progressive_synthesis);

  ## Run motif discovery algorithms
  if ($task{disco}) {
    &OligoAnalysis() if ($disco{oligos});
    &DyadAnalysis() if ($disco{dyads});
    &PositionAnalysis() if ($disco{positions});
    &LocalWords() if ($disco{local_words});
    &LocalDyads() if ($disco{local_dyads});
  }
  &MergeWords() if ($task{merge_words});

  ## MEME pattern discovery
  &CalcMemeBackground() if ($task{meme_bg});
  &RunMEME() if (($task{meme}) || (($task{disco}) && ($disco{meme}))); ## Not yet clear to me (JvH) whether meme should be a separat etask or a disco algo that runs with task disco

  ## ChIPMunk pattern discovery
  &RunChIPMunk() if  (($task{chipmunk}) || (($task{disco}) && ($disco{chipmunk}))); ## Not yet clear to me (JvH) whether chipmunk should be a separat etask or a disco algo that runs with task disco

  ## Merge all discovered motifs
  &MergeMotifs() if ($task{merge_motifs});

  ## HTML synthesis by algorithm
  &SynthesisMotifsByAlgo() if ($progressive_synthesis);

  ## Motif-wise analysis
  &SplitMotifs() if ($task{split_motifs});

#  &ClusterMotifs() if ($task{cluster_motifs});

  &MotifsVersusReference() if ((defined($main::infile{ref_motifs}))
			       && ($task{motifs_vs_ref}));

  &MotifsVersusDatabase() if ((scalar(@motif_databases) > 0)
			      && ($task{motifs_vs_db}));

  &ScanSequences() if ($task{scan});

  &SynthesisByMotif() if ($progressive_synthesis);

  &SynthesisMotifCompa() if ($progressive_synthesis);

  ## Write the time spent by each program in a time table
  if ($task{timelog}) {
    &TimeLog();
    &TimeTable();
  }

  ## Report parameters and files
  &SynthesisParamsAndFiles() if ($progressive_synthesis);

  &CleanSequences() if ($task{clean_seq});

  ## Archiving has to be done before synthesis in order to enable
  ## liking the archive file in the HTML synthesis. However, the
  ## archive has to be updated after the synthesus, in order to
  ## onclude the HTML synthesis file.
  &Archive(1) if ($task{archive});

  &Synthesis() if ($task{synthesis});

  if ($main::verbose >= 1) {
    &RSAT::message::TimeWarn("Log file", $main::outfile{log}) if ($main::verbose >= 2);
  }

  ################################################################
  ## Close output stream
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
  print $main::out $exec_time if ($main::verbose >= 1);	## only report exec time if verbosity is specified
  close $main::out if ($main::outfile{output});
  exit(0);
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################


################################################################
## Display full help message
sub PrintHelp {
  system "pod2text -c $0";
  exit()
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}


################################################################
## Set parameter values for all the programs
sub DefaultParameters {
  &RSAT::message::TimeWarn("\n; Setting default parameter values") if ($main::verbose >= 2);

  ## Formats
  $main::param{seq_format} = "fasta"; push(@main::param_list, "seq_format");
#  $main::param{img_format} = "pdf"; push(@main::param_list, "img_format");
  $main::param{img_format} = "png"; push(@main::param_list, "img_format");
  $main::param{img_border} = 1; ## Border for the images in the synthetic table (generally blue because linked to the image file)
  $main::param{logo_table_height} = 50; ## Height for displaying logos in the synthetic tables
  $main::param{iupac_coloring} = 0; ## Color IUPAC degenerate letters in the consensus of HTML reports
  $main::param{archive_format} = "zip"; ## Supported: zip, tar, tgz

  ## Motif formats
  $main::param{ref_motifs_format} = "transfac"; push(@main::param_list, "ref_motifs_format");

  ## Purge-seq
  $main::param{purge_match_length} = 30; push(@main::param_list, "purge_match_length");
  $main::param{purge_mismatches} = 0; push(@main::param_list, "purge_mismatches");

  ################################################################
  ## Residue  profiles (position-analysis)
  $main::param{profiles_ci} = 20; push(@main::param_list, "profiles_ci");
  $main::param{profiles_max_graphs} = 20; push(@main::param_list, "profiles_max_graphs");
  $main::param{profiles_strand} = "-1str"; push(@main::param_list, "profiles_strand");
  @main::profiles_oligo_lengths = (1,2);
  $main::param{profiles_oligo_lengths} = join(',',@profiles_oligo_lengths); push(@main::param_list, "profiles_oligo_lengths");

  ## Beware: computational profiles are always computed with the
  ## option -ovlp (count all occurrences), to avoid weird effectw:
  ## with the -noov mode, the transition tables are unbalanced even
  ## when the input sequences are generated with equiprobable
  ## residues. This is consistent with the fact that those profiles
  ## further serve to estimate the probability of larger words, which
  ## may include repeated residues. Using the option -noov here would
  ## induce an under-estimation of the expected frequency for words
  ## containing repeated residues.
  $main::param{profiles_noov} = "-ovlp"; push(@main::param_list, "profiles_noov");

  ################################################################
  ## Motif discovery options
  $main::param{strand} = "-2str"; push(@main::param_list, "strand");
  $main::param{disco_noov} = "-noov"; push(@main::param_list, "disco_noov");
  $main::param{oligo_min_len} = 6; push(@main::param_list, "oligo_min_len");
  $main::param{oligo_max_len} = 7; push(@main::param_list, "oligo_max_len");
  $main::param{patterns_max_rank} = 25; push(@main::param_list, "patterns_max_rank");

  ## Thresholds for oligo-analysis, dyad-analysis and oligo-diff
  $main::param{min_ratio} = 1; push(@main::param_list, "min_ratio");
  $main::param{min_zscore} = 6; push(@main::param_list, "min_ratio");
  $main::param{min_sig} = 0; push(@main::param_list, "min_sig");

  ## oligo-analysis
  $main::param{oligo_min_mkv} = -2; push(@main::param_list, "oligo_min_mkv");
  $main::param{oligo_max_mkv} = -2; push(@main::param_list, "oligo_max_mkv");

  ## dyad-analysis

  ## position-analysis
  $main::param{positions_ci} = 50; ## Class interval
  $main::param{positions_min_occ} = 1; ## Min number of occurrences
  $main::param{positions_max_graphs} = 20; push(@main::param_list, "positions_max_graphs")
;
  ## local-word-analysis
#  $main::param{local_words_heuristic} = "slices"; ## Heuristic
  $main::param{local_words_window} = 50; ## Class interval
  $main::param{local_words_windowgroup} = 50; ## Class interval for centered windows of increasing sizes


  ## local-word-analysis with the option --dyad
  ## Other parameters are the same as for local-word-analysis

  ## matrix-from-patterns
  $main::param{asmb_toppat} = $main::param{patterns_max_rank}; push(@main::param_list, "asmb_toppat");
  $main::param{matrix_nmotifs} = 3; push(@main::param_list, "matrix_nmotifs");

  ## BG model building
  $main::param{scan_markov_order} = 1; ## Used for computing the background model in inclusive format (required by matrix-scan-quick).


  ################################################################
  ## matrix-scan options
  $main::param{scan_strands} = "-2str";
  $main::param{scan_pseudo} = 1;
  $main::param{scan_decimals} = 1;
  $main::param{scan_bg_pseudo} =  0.01;

  ## Concatenate scanning options for matrix-scan
  $main::param{scan_options} = " -pseudo ".$main::param{scan_pseudo};
  $main::param{scan_options} .= " ".$main::param{scan_strands};
  $main::param{scan_options} .= " -decimals ".$main::param{scan_decimals};
  $main::param{scan_options} .= " -bg_pseudo ".$main::param{scan_bg_pseudo};
  $main::param{scan_options} .= " -n score"; ##  Assign a score to the sequence fragments containing masked/undefined nucleotides (N)

  ## Options used for site scanning but not for enrichment analysis
  $main::param{scan_min_score} = 7.5;
  $main::param{scan_sites_options} = " -lth score ".$main::param{scan_min_score};
  $main::param{scan_sites_options} .= " -origin center -return sites";

  ## Options used for enrichment in matrix hits but not for site detection
  $main::param{scan_enrich_min_score} = 0;
  $main::param{scan_enrich_markov} = 2;
  $main::param{scan_enrich_options} = " -lth score ".$main::param{scan_enrich_min_score};
  $main::param{scan_enrich_options} .= " -bginput -markov ".$main::param{scan_enrich_markov};


  ## Default MEME options
  $main::param{meme_text} = ""; push @MEME_options, "text";	## Ouptut format = text
  $main::param{meme_dna} = ""; push @MEME_options, "dna"; ## Sequence type == dna
  $main::param{meme_mod} = "anr"; push @MEME_options, "mod"; ## Accept any number of occurrences per sequence
  $main::param{meme_minw} = 12; push @MEME_options, "minw"; ## Minimal motif width
  $main::param{meme_maxw} = 12; push @MEME_options, "maxw"; ## Maximal motif width
  $main::param{meme_nmotifs} = 1; push @MEME_options, "nmotifs"; ## Number of motifs
  $main::param{meme_evt} = 1; push @MEME_options, "evt"; ## upper threshold on E-value
  $main::param{meme_maxsize} = 10000000; push @MEME_options, "maxsize"; ## maximum size for the sequence set

  ## Options for MEME background model
  $main::param{bg_meme_markov} = 3; ## Markov order for MEME

  ################################################################
  ## Default ChIPMunk options
  ##
  ## java -cp "some_dir" ru_genetika.ChIPMunk <start_motif_length>
  ## <stop_motif_length> <verbose>=(y)es|(n)o
  ## <mode>=oops|zoops_factor=0.0..1.0 <x:input_set1>..<x:input_setN>
  ## <try_limit> <step_limit> <iter_limit> <thread_count>
  ## <seeds>=random|filename.mfa <gc%>=0.
  $main::param{chipmunk_start_motif_length} = 12;
  $main::param{chipmunk_stop_motif_length} = 12;
  $main::param{chipmunk_verbose} = "yes";
  $main::param{chipmunk_mode} = "oops";
  $chipmunk_suffix = join("_", "_w",
			  $main::param{chipmunk_start_motif_length},
			  $main::param{chipmunk_stop_motif_length},
			  $main::param{chipmunk_mode},
			 );

  ## TO BE TESTED
#   ## Facultative ChIPMunk options - to evaluate
#   $main::param{chipmunk_try_limit} = 100;
#   $main::param{chipmunk_step_limit} = 10;
#   $main::param{chipmunk_iter_limit} = 1;
#   $main::param{chipmunk_thread_count} = 7;
#   $main::param{chipmunk_gc} = 0.5;

  ################################################################
  ## Options used for motif comparison
  $main::param{matrix_compa_min_w} = 5; ## min number of aligned columns
  $main::param{matrix_compa_min_Wr} = 0.3; ## min relative width
  $main::param{matrix_compa_min_cor} = 0.75; ## min correlation
  $main::param{matrix_compa_min_Ncor} = 0.4; ## min normalizd correlation
  $main::param{matrix_compa_metrics} = "cor,Ncor,logoDP,NIcor,NsEucl,SSD,NSW";
  $main::param{matrix_compa_sort_field} = "Ncor"; ## sorting field

  my $motif_compa_options =  "";
  $motif_compa_options .= " -DR -mode matches";
  $motif_compa_options .= " -uth offset_rank 1"; ## Only report the best matching shift between a pair of matrices
  $motif_compa_options .= " -lth w ".$main::param{matrix_compa_min_w}; ## Min number of aligned columns
  $motif_compa_options .= " -lth Wr ".$main::param{matrix_compa_min_Wr}; ## Min relative width of the alignment
  $motif_compa_options .= " -lth cor ".$main::param{matrix_compa_min_cor}; ## Min correlation
  $motif_compa_options .= " -lth Ncor ".$main::param{matrix_compa_min_Ncor}; ## Min normalized correlation
  $motif_compa_options .= " -return matrix_name,matrix_id,".$main::param{matrix_compa_metrics}.",match_rank,width,strand,offset,consensus,alignments_1ton";
  $motif_compa_options .= " -sort ".$main::param{matrix_compa_sort_field};
  $main::param{motif_compa_options} = $motif_compa_options;

}

################################################################
## Define one output file name by concatenating arguments
sub OutFileName {
  my ($subdir, $extension, @name_elements) = @_;
  my $dir = $main::dir{output};
  if ($subdir) {
    #    $dir = join ("/", $main::dir{output}, $subdir);
#    &RSAT::util::CheckOutDir($dir);
    $dir = &CheckSubDir($subdir);
  }
  my $out_file_name = $dir;
  if (scalar(@name_elements) > 0) {
    $out_file_name .= "/";
    $out_file_name .= join ("_", $main::param{prefix}, @name_elements);
    if ($extension) {
      $out_file_name .= $extension;
    }
  }
  if (wantarray) {
    return ($out_file_name, $dir);
  } else {
    return ($out_file_name);
}

}

################################################################
## Define the name of an output directory by concatenating arguments
sub CheckSubDir {
  my ($subdir) = @_;
  my $dir = $main::dir{output};
  $dir = join ("/", $main::dir{output}, $subdir);
  &RSAT::util::CheckOutDir($dir);

  return($dir);
}

################################################################
## Set output file names
sub SetOutFileNames {
  ## Purged sequences
  foreach my $seq_type (@seq_types) {
    my @seq_suffix = ();
    if (defined($main::param{max_seq_len})) {
      push @seq_suffix, "maxlen".$main::param{max_seq_len};
    }
    if (defined($main::param{top_peaks})) {
      push @seq_suffix, "top".$main::param{top_peaks};
    }
#    if (defined($main::param{max_seq_len})) {
    $main::outfile{$seq_type."_converted"} = &OutFileName("data/sequences",
							  ".fasta",
							  $seq_type,
							  @seq_suffix);
    $main::outfile{$seq_type."_purged"} = &OutFileName("data/sequences",
						       ".fasta",
						       $seq_type,
						       @seq_suffix,
						       "purged",
						       "ml".$main::param{purge_match_length},
						       "mis".$main::param{purge_mismatches},
						      );
    #    $seqfile{$seq_type} = $main::outfile{$seq_type."_converted"};
    #  } else {
    #      $main::outfile{$seq_type."_purged"} = &OutFileName("data/sequences",
    # ".fasta",
    #							  #							 $seq_type,
    #							 "purged",
    #							 "ml".$main::param{purge_match_length},
    #							 "mis".$main::param{purge_mismatches});
    $seqfile{$seq_type} = $main::infile{$seq_type."_seq"};
    #    }

    ## Sequence lengths
    $main::outfile{$seq_type."_seqlen"} = &OutFileName("data/sequences", ".tab", $seq_type."_seqlen");
    $main::outfile{$seq_type."_seqlen_distrib"} = &OutFileName("data/sequences", ".tab", $seq_type."_seqlen_distrib");
    $main::outfile{$seq_type."_seqlen_distrib_graph"} = &OutFileName("data/sequences", ".".$param{img_format}, $seq_type."_seqlen_distrib");
    #    if ($infile{ctrl_seq}) {
    #	$main::outfile{"ctrl_seqlen"} = &OutFileName("data/sequences", ".tab", "ctrl_seqlen");
    #	$main::outfile{"ctrl_seqlen_distrib"} = &OutFileName("data/sequences", ".tab", "ctrl_seqlen_distrib");
    #	$main::outfile{"ctrl_seqlen_distrib_graph"} = &OutFileName("data/sequences", $param{img_format}, "ctrl_seqlen_distrib");
    #    }

    ## BED file from fasta header (e.g. Galaxy)
    $main::outfile{$seq_type."_seq_coord"} = &OutFileName("data/sequences", ".bed", $seq_type."_seqcoord");

    ## Compositional Profiles
    for my $ol (@profiles_oligo_lengths) {

      ################################################################
      ## Profiles of oligo frequencies as a function of the position
      ##
      $main::outfile{$ol."nt_".$seq_type."_profiles"} = &OutFileName("results/composition", ".tab", $seq_type."_profiles".$main::param{profiles_strand}.$main::param{profiles_noov},
								     $ol."nt","ci".$main::param{profiles_ci});
      push @timelog_keys, $ol."nt_".$seq_type."_profiles";

      ## oligo frequencies in the sequence set
      $main::outfile{$ol."nt_".$seq_type."_freq"} = &OutFileName("results/composition", ".tab", $seq_type."_freq"."-1str".$main::param{profiles_noov},$ol."nt");
      push @timelog_keys, $ol."nt_".$seq_type."_freq";
      $main::outfile{$ol."nt_".$seq_type."_transitions"} = &OutFileName("results/composition", ".tab", $seq_type."_transitions"."-1str".$main::param{profiles_noov},$ol."nt");
      $main::outfile{$ol."nt_".$seq_type."_inclusive"} = &OutFileName("results/composition", ".txt", $seq_type."_inclusive"."-1str".$main::param{profiles_noov},$ol."nt");
      $main::outfile{$ol."nt_".$seq_type."_heatmap"} = &OutFileName("results/composition", ".".$main::param{img_format}, $seq_type."_heatmap"."-1str".$main::param{profiles_noov},$ol."nt");
      ## HTML index to the individual oligonucleotide profiles
      $main::outfile{$ol."nt_".$seq_type."_profiles_index"} = $main::outfile{$ol."nt_".$seq_type."_profiles"};
      $main::outfile{$ol."nt_".$seq_type."_profiles_index"} =~ s/\.tab$//;
      $main::outfile{$ol."nt_".$seq_type."_profiles_index"} .= "_graph_index.html";

      ## Graph prefix for positional profiles
      $main::outfile{$ol."nt_".$seq_type."_profiles_graph"} = $main::outfile{$ol."nt_".$seq_type."_profiles"};
      $main::outfile{$ol."nt_".$seq_type."_profiles_graph"} =~ s/\.tab$//;
      $main::outfile{$ol."nt_".$seq_type."_profiles_graph"} .= ".".$main::param{img_format};
    }
  }



  ################################################################
  ## Background model for matrix scanning
  my $scan_bg_ol = $main::param{scan_markov_order}+1;
  $main::param{scan_bg_file} = $main::outfile{$scan_bg_ol."nt_test_inclusive"};
  &RSAT::message::Info("Background model file",
		       "order=".$main::param{scan_markov_order},
		       "bg_ol=".$scan_bg_ol,
		       "key=".$scan_bg_ol."nt_test_inclusive",
		       "file=".$main::param{scan_bg_file}
		      ) if ($main::verbose >= 2);


  ################################################################
  ## Motif discovery results
  my $pattern_type = "";
  if ($main::infile{ctrl_seq}) {
    for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {
      ## oligo-diff
      if ($disco{oligo_diff}) {
	$pattern_type = 'oligo_diff_'.$len.'nt';
	$main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "oligo_diff".$main::param{strand}.$main::param{disco_noov},$len."nt");
	push @pattern_types, $pattern_type;
	push @patterns_to_merge, $pattern_type;
      }
    }
  } else {
    for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {

      if ($main::infile{ctrl_seq}) {
	################################################################
	## Background models estimated from the control sequences
	## oligo-analysis
	if ($disco{oligos}) {
	  $pattern_type = 'oligos_'.$len.'nt_vs_ctrl';
	  $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "oligos".$main::param{strand}.$main::param{disco_noov},
						       $len."nt", "vs_ctrl");
	  push @pattern_types, $pattern_type;
	  push @patterns_to_merge, $pattern_type;
	}

      } else {

	################################################################
	## Background models estimated from the test sequences
	## themselves (lower order Markov chain)
	for my $markov ($main::param{oligo_min_mkv}..$main::param{oligo_max_mkv}) {
	  ## Convert negative markov orders relative to the oligo length
	  $markov += $len if ($markov < 0);

	  ## oligo-analysis
	  if ($disco{oligos}) {
	    $pattern_type = 'oligos_'.$len.'nt_mkv'.$markov;
	    $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "oligos".$main::param{strand}.$main::param{disco_noov},
							 $len."nt", "mkv".$markov);
	    push @pattern_types, $pattern_type;
	    push @patterns_to_merge, $pattern_type;


	    ## oligo-analysis with all the oligos (no threshold) + a two-tails test (in order to detect under-represented patterns)
	    #	  $main::outfile{$pattern_type."_all"} = &OutFileName("results/".$pattern_type."_all_2tails", ".tab", "oligos".$main::param{strand}.$main::param{disco_noov}."_all_2tails",
	    #						       $len."nt", "vs_ctrl");
	  }
	}


	################################################################
	## I suppress the Makov loop for local_words since it seems incompatible with the mode --windowgroup
##	for my $markov ($main::param{oligo_min_mkv}..$main::param{oligo_max_mkv}) {
	  ## local-word-analysis
	  if ($disco{local_words}) {
	    $pattern_type = 'local_words_'.$len.'nt';
	    #	    $pattern_type = 'local_words_'.$len.'nt_mkv'.$markov;
	    $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab",
							 "local_words".$main::param{strand}.$main::param{disco_noov},
							 $len."nt", "windgroup".$main::param{local_words_windowgroup},
							 #$main::param{local_words_heuristic},
							 #"mkv".$markov
							);
	    push @pattern_types, $pattern_type;
	    push @patterns_to_merge, $pattern_type;
	  }

	}
##      }

      ## position-analysis
      if ($disco{positions}) {
	$pattern_type = 'positions_'.$len.'nt';
	$main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "positions".$main::param{strand}.$main::param{disco_noov},
						     $len."nt", "ci".$main::param{positions_ci});
	push @pattern_types, $pattern_type;
	push @patterns_to_merge, $pattern_type;
      }
    }

    ## dyad-analysis
    if ($disco{dyads}) {
      $pattern_type = "dyads";
      $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab", "dyads".$main::param{strand}.$main::param{disco_noov},
						   "3nt_sp0-20_bg_monads");
      push @pattern_types, $pattern_type;
      push @patterns_to_merge, $pattern_type;
    }

    ################################################################
    ## local-word-analysis with the option --dyad
    if ($disco{local_dyads}) {
      $pattern_type = 'local_dyads';
      $main::outfile{$pattern_type} = &OutFileName("results/".$pattern_type, ".tab",
						   "local_dyads".$main::param{strand}.$main::param{disco_noov},
						   "3nt_sp0-20"."windgroup".$main::param{local_words_windowgroup},
						  );
      push @pattern_types, $pattern_type;
      push @patterns_to_merge, $pattern_type;
    }
  }



  ## Comparison between significance of the discovered patterns
  $pattern_type = "merged_words";
  $main::outfile{merged_words} = &OutFileName("results/".$pattern_type, ".tab", "merged_words");
  $main::outfile{merged_words_html} = &OutFileName("results/".$pattern_type, ".html", "merged_words");
  $main::outfile{merged_words_heatmap} = &OutFileName("results/".$pattern_type, ".png", "merged_words_heatmap");
  if ($disco{merged_words}) {
    push @pattern_types, $pattern_type;
  }

  &RSAT::message::Info("Pattern types", join (",", @pattern_types)) if ($main::verbose >= 2);
  &RSAT::message::Info("Sequence types", join (",", @seq_types)) if ($main::verbose >= 2);

  ## Conversion from patterns to matrices + logos
  foreach my $pattern_type (@pattern_types) {
    $main::outfile{$pattern_type.'_pssm'} = $main::outfile{$pattern_type};
    $main::outfile{$pattern_type.'_pssm'} =~ s/\.tab$//;
    $main::outfile{$pattern_type.'_pssm'} .= "_pssm";
    $main::outfile{$pattern_type.'_2pssm'} = $main::outfile{$pattern_type.'_pssm'}."_log.txt"; push @timelog_keys, $pattern_type.'_2pssm';
    $main::outfile{$pattern_type.'_asmb'} = $main::outfile{$pattern_type.'_pssm'}.".asmb";
    $main::outfile{$pattern_type.'_pssm_sig'} = $main::outfile{$pattern_type.'_pssm'}."_sig_matrices.txt";
    $main::outfile{$pattern_type.'_pssm_gibbs'} = $main::outfile{$pattern_type.'_pssm'}."_gibbs_matrices.txt";
    $main::outfile{$pattern_type.'_pssm_counts'} = $main::outfile{$pattern_type.'_pssm'}."_count_matrices.txt";
    $main::outfile{$pattern_type.'_pssm_tf'} = $main::outfile{$pattern_type.'_pssm'}."_count_matrices.tf";
#    $main::outfile{$pattern_type.'_pssm_sites'} = $main::outfile{$pattern_type.'_pssm'}."_sites.tab";
#    $main::outfile{$pattern_type.'_pssm_site_distrib'} = $main::outfile{$pattern_type.'_pssm'}."_site_distrib.tab";
#    $main::outfile{$pattern_type.'_pssm_site_distrib_graph'} = $main::outfile{$pattern_type.'_pssm'}."_site_distrib".$main::param{img_format};
    foreach my $logo_nb (1..$main::param{matrix_nmotifs}) {
      $main::outfile{$pattern_type.'_pssm_logo'.$logo_nb} =
	$main::outfile{$pattern_type.'_pssm'}."_count_matrices_logo_m".$logo_nb.".".$main::param{img_format};
      $main::outfile{$pattern_type.'_pssm_logo_rc'.$logo_nb} =
	$main::outfile{$pattern_type.'_pssm'}."_count_matrices_logo_m".$logo_nb."_rc.".$main::param{img_format};
    }
    push @timelog_keys, $pattern_type;
    push @timetable_keys, $pattern_type;
    push @timetable_keys, $pattern_type."_pssm";
  }

  ## Output files for MEME
  if (($disco{meme}) || ($task{meme})) {
    $main::outfile{meme_bg_file} = &OutFileName("results/meme", ".txt", "meme_bg_mkv".$main::param{bg_meme_markov});
    &CalcMemeOptions(); ## this also computes the suffix of the meme output file
    $main::outfile{meme} = &OutFileName("results/meme", ".txt", "meme_".$meme_suffix);
    $main::outfile{meme_tf} = &OutFileName("results/meme", ".tf", "meme_".$meme_suffix);
    $main::outfile{meme_logos} = &OutFileName("results/meme", "", "meme_".$meme_suffix."_logos");
    push @timetable_keys, "meme";
  }

  ## Output files and options for ChIPMunk
  if (($disco{chipmunk}) || ($task{chipmunk})) {
    $main::outfile{chipmunk} = &OutFileName("results/chipmunk", ".txt", "chipmunk_".$chipmunk_suffix);
    #    $main::outfile{chipmunk_tf} = &OutFileName("results/chipmunk", ".tf", "chipmunk_".$chipmunk_suffix);
    #    $main::outfile{chipmunk_logos} = &OutFileName("results/chipmunk", "", "chipmunk_".$chipmunk_suffix."_logos");
    push @timetable_keys, "chipmunk";
  }

  ## Clustering of the discovered motifs + comparison with reference motif(s)
  ## Merged motifs
  $main::outfile{motifs_discovered} = &OutFileName("results/discovered_motifs", ".tf", "motifs_discovered");

  ## Table with the list of separated motifs
  $main::outfile{motifs_discovered_table} = &OutFileName("results/discovered_motifs", ".tab", "motifs_discovered_table");

  ## Comparisons between discovered motifs
  $main::outfile{motifs_disco_compa} = &OutFileName("results/discovered_motifs", ".tab", "motifs_disco_compa");
  $main::outfile{motifs_disco_compa_html} = &OutFileName("results/discovered_motifs", ".html", "motifs_disco_compa");
  push @timelog_keys, "motifs_disco_compa";
  $main::outfile{motifs_disco_compa_gml} = &OutFileName("results/discovered_motifs", ".gml", "motifs_disco_compa");
  $main::outfile{motifs_disco_compa_png} = &OutFileName("results/discovered_motifs", ".png", "motifs_disco_compa");

  ## Motif clustering
  $main::outfile{motifs_disco_clusters_mcl} = &OutFileName("results/discovered_motifs", ".mcl", "motifs_disco_clusters");
  $main::outfile{motifs_disco_clusters_tab} = &OutFileName("results/discovered_motifs", ".tab", "motifs_disco_clusters");
  $main::outfile{motifs_disco_clusters_graph} = &OutFileName("results/discovered_motifs", ".tab", "motifs_disco_clusters_graph");
  $main::outfile{motifs_disco_clusters_graph_gml} = &OutFileName("results/discovered_motifs", ".gml", "motifs_disco_clusters_graph");
  $main::outfile{motifs_disco_clusters_graph_png} = &OutFileName("results/discovered_motifs", ".png", "motifs_disco_clusters_graph");
  $main::outfile{motifs_disco_compa_cluster_intra_degree} = &OutFileName("results/discovered_motifs", ".tab", "motifs_disco_compa_cluster_intra_degree");
  #    $main::outfile{motifs_disco_ref} = &OutFileName("results/discovered_motifs", ".tf", "motifs_disco_ref");


  if (defined($main::infile{ref_motifs})) {
    ## Motif(s) considered as reference for the testing set
    $main::outfile{"ref_motifs"} = &OutFileName("data/ref_motifs", ".".$main::param{ref_motifs_format}, "ref_motifs");
    $main::outfile{"ref_motifs_transfac"} = &OutFileName("data/ref_motifs", ".tf", "ref_motifs");
    $main::outfile{"ref_motifs_tab"} = &OutFileName("data/ref_motifs", ".tab", "ref_motifs_tab");
    $main::outfile{"ref_motifs_logo"} = &OutFileName("data/ref_motifs", "", "ref_motifs_logo");
    $main::outfile{"ref_motifs_enriched"} = &OutFileName("data/ref_motifs", "tab", "ref_motifs_enriched");

    ## Comparison betweeen discovered motifs and reference motif
    $main::prefix{"motifs_vs_ref_prefix"} = &OutFileName("results/discovered_vs_ref", "", "motifs_vs_ref");
    $main::outfile{"motifs_vs_ref"} = &OutFileName("results/discovered_vs_ref", ".tab", "motifs_vs_ref");
    $main::outfile{"motifs_vs_ref_html"} = &OutFileName("results/discovered_vs_ref", ".html", "motifs_vs_ref");
    push @timelog_keys, "motifs_vs_ref";
    $main::outfile{"motifs_vs_ref_gml"} = &OutFileName("results/discovered_vs_ref", ".gml", "motifs_vs_ref");
    $main::outfile{"motifs_vs_ref_png"} = &OutFileName("results/discovered_vs_ref", ".png", "motifs_vs_ref");
    $main::outfile{"motifs_vs_ref_alignments_1ton"} = &OutFileName("results/discovered_vs_ref", ".tab", "motifs_vs_ref_alignments_1ton");
    $main::outfile{"motifs_vs_ref_alignments_1ton_html"} = &OutFileName("results/discovered_vs_ref", ".html", "motifs_vs_ref_alignments_1ton");
  }

  ## Comparison between discovered motifs and database(s)
  if (scalar(@motif_databases) > 0) {
    foreach my $db_name (@motif_databases) {
      $main::prefix{"motifs_vs_db_".$db_name."_prefix"} = &OutFileName("results/discovered_vs_db", "", "motifs_vs_db_".$db_name);
      $main::outfile{"motifs_vs_db_".$db_name} = &OutFileName("results/discovered_vs_db", ".tab", "motifs_vs_db_".$db_name);
      $main::outfile{"motifs_vs_db_".$db_name."_html"} = &OutFileName("results/discovered_vs_db", ".html", "motifs_vs_db_".$db_name);
      push @timelog_keys, "motifs_vs_db_".$db_name;
      $main::outfile{"motifs_vs_db_".$db_name."_gml"} = &OutFileName("results/discovered_vs_db", ".gml", "motifs_vs_db_".$db_name);
      $main::outfile{"motifs_vs_db_".$db_name."_png"} = &OutFileName("results/discovered_vs_db", ".png", "motifs_vs_db_".$db_name);
      $main::outfile{"motifs_vs_db_".$db_name."_alignments_1ton"} = &OutFileName("results/discovered_vs_db", ".tab", "motifs_vs_db_".$db_name."_alignments_1ton");
      $main::outfile{"motifs_vs_db_".$db_name."_alignments_1ton_html"} = &OutFileName("results/discovered_vs_db", ".html", "motifs_vs_db_".$db_name."_alignments_1ton");
    }
  }
}

################################################################
## Convert, truncate, select top and purge sequences
sub PurgeSeq {
  my ($seq_type) = @_;

  my $seq_file = $seqfile{$seq_type};

  ## Convert sequences to standard fasta format, mask non-DNA seqences
  ## selectd top peaks if required, and truncate sequences to max
  ## length if specified.
  &RSAT::message::TimeWarn("Converting sequences", $seq_type, $seq_file) if ($main::verbose >= 2);
  my $cmd = $SCRIPTS."/convert-seq";
  $cmd .= " -i ".$seq_file;
  $cmd .= " -from ".$main::param{seq_format};
  $cmd .= " -to fasta";
  $cmd .= " -mask non-dna";

  ## Select top peaks if required
  if (defined($main::param{top_peaks})) {
    &RSAT::message::Info("\tSelecting top peaks", $main::param{top_peaks}) if ($main::verbose >= 2);
    $cmd .= " -top ".$main::param{top_peaks};
  }

  ## Truncate sequences if required
  if (defined($main::param{max_seq_len})) {
    my $from = -round($main::param{max_seq_len}/2);
    my $to = $from + $main::param{max_seq_len} -1;
    &RSAT::message::Info("\tTruncating to ", $main::param{max_seq_len}." bp max") if ($main::verbose >= 2);
    $cmd .= "| ".$SCRIPTS."/sub-sequence";
    #    $cmd .= " -i ".$main::infile{$seq_type.'_seq'};
    $cmd .= " -origin center";
    $cmd .= " -from ".$from;
    $cmd .= " -to ".$to;
  }
  $cmd .= " -o ".$main::outfile{$seq_type."_converted"};
  &one_command($cmd, 1);

  ## Purge sequences (mask redundant fragments)
  &RSAT::message::TimeWarn("Purging sequences", $seq_type, $main::outfile{$seq_type."_converted"}) if ($main::verbose >= 2);
  $cmd = "$SCRIPTS/purge-sequence -dna";
  $cmd .= " -i ".$main::outfile{$seq_type."_converted"};
  $cmd .= " -ml ".$main::param{purge_match_length};
  $cmd .= " -mis ".$main::param{purge_mismatches};
  $cmd .= " -o ".$main::outfile{$seq_type."_purged"};
  &one_command($cmd, 1);
}


################################################################
## Compute sequence lengths
sub SequenceLengths {
  my ($seq_type) = @_;
  &RSAT::message::TimeWarn("Computing sequence lengths", $seq_type) if ($main::verbose >= 2);
  my $cmd = $SCRIPTS."/sequence-lengths";
  #  $cmd .= " -i ".$main::infile{$seq_type.'_seq'};
  $cmd .= " -i ".$main::outfile{$seq_type."_purged"};
  $cmd .= " -o ".$main::outfile{$seq_type.'_seqlen'};
  $cmd .= " ; cut -f 2 ".$main::outfile{$seq_type.'_seqlen'};
  $cmd .= " | ".$SCRIPTS."/classfreq -v 1 -ci ".$main::param{profiles_ci};
  $cmd .= " -o ".$main::outfile{$seq_type.'_seqlen_distrib'};
  &one_command($cmd, 1);

  $cmd = $SCRIPTS."/XYgraph -lines -pointsize 0 -legend";
  $cmd .= " -format ".$main::param{img_format};
  my $title = "Sequence lengths";
  $title .= "; ".$main::param{title} if ($main::param{title});
  $cmd .= " -title '".$title."'";
  $cmd .= " -ysize 200 -ycol 4 -yleg1 'Number of peaks'";
  $cmd .= " -xsize 800 -xcol 3 -xleg1 'Peak length'";
  $cmd .= " -xmin 0 -ymin 0";
  #  $cmd .= " -xgstep1 ".($main::param{profiles_ci});
  #  $cmd .= " -xgstep2 ".$main::param{profiles_ci};
  $cmd .= " -i ".$main::outfile{$seq_type.'_seqlen_distrib'};
  $cmd .= " -o ".$main::outfile{$seq_type.'_seqlen_distrib_graph'};
  &one_command($cmd, 1);

  ################################################################
  ## Convert the genomic coordinates
  ## if these are embeded in the fasta headers
  if ($main::param{seq_source}) {
    &RSAT::message::TimeWarn("Converting input fasta headers to BED file") if ($main::verbose >= 2);
    $cmd = $SCRIPTS."/convert-features";
    $cmd .= " -from ".$main::param{seq_source}."_seq -i ".$infile{test_seq};
    $cmd .= " -to bed ";
    my $new_bed_header= "track name=\"peaks\" description=\"RSAT peak-motifs: peaks\" visibility=2 use_score=1 color=13,115,67";
    $cmd .= " | sed '1s/^.*/".$new_bed_header."/' "; ## change first line
    $cmd .= " > ".$main::outfile{"test_seq_coord"};
    &one_command($cmd,1);
  }
}

################################################################
## Run position-analysis to compute composition profiles (residues,
## dinucleotides) anw count-words to compute background models for
## sequence scanning.
sub CompositionProfiles {
  my ($seq_type) = @_;

  ################################################################
  ## Compute nucleotide and dinucleotide frequencies
  &RSAT::message::TimeWarn("Computing nucleotide and dinucleotide frequencies") if ($main::verbose >= 2);
  for my $ol (@profiles_oligo_lengths) {

    ## Compute background models of order 0 (Bernoulli) and 1 (Markov)
    ## from the input sequence
    $cmd = $BIN."/count-words -v 1";
    $cmd .= " -i ".$main::outfile{$seq_type."_purged"};
    $cmd .= " -l ".$ol;
    $cmd .= " -1str";
    $cmd .= " ".$main::param{profiles_noov} if ($main::param{profiles_noov} eq "-noov"); ## The option -ovlp is not supported by count-words
    $cmd .= " > ".$main::outfile{$ol."nt_".$seq_type."_freq"};
    &one_command($cmd, 1,$main::outfile{$ol."nt_".$seq_type."_freq"}."_time.txt");

    ## Convert background model in INCLUSIVE format for matrix-scan-quick
    $cmd = $SCRIPTS."/convert-background-model -from oligos -to inclusive ";
    $cmd .= " -i ".$main::outfile{$ol."nt_".$seq_type."_freq"};
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_inclusive"};

    ## Convert background model to transition table and draw the heatmap of transition probabilities
    $cmd .= "; ".$SCRIPTS."/convert-background-model -from oligos -to transitions ";
    $cmd .= " -i ".$main::outfile{$ol."nt_".$seq_type."_freq"};
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_transitions"};
    $cmd .= " ; cut -f 1-5,7 ".$main::outfile{$ol."nt_".$seq_type."_transitions"};
    $cmd .= " | ".$SCRIPTS."/draw-heatmap -min 0 -max 1  -out_format png -col_width 50";
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_heatmap"};
    &one_command($cmd, 1);
  }

  ################################################################
  ## Compute position profiles for 1nt and 2nt
  &RSAT::message::TimeWarn("Computing composition profiles") if ($main::verbose >= 2);
  for my $ol (@profiles_oligo_lengths) {

    ## Compute positional profiles
    &RSAT::message::TimeWarn("Computing composition profiles", $ol."nt") if ($main::verbose >= 2);
    my $cmd =$SCRIPTS."/position-analysis -v 1";
    $cmd .= " -i ".$main::outfile{$seq_type."_purged"};
    $cmd .= " -format fasta";
    $cmd .= " -sort ";
    $cmd .= " -return chi,sig,distrib,graphs,rank";
    $cmd .= " -max_graphs ".$main::param{profiles_max_graphs} if ($main::param{profiles_max_graphs}  > 0);
    $cmd .= " ".$main::param{profiles_strand};
    $cmd .= " ".$main::param{profiles_noov};
    $cmd .= " -seqtype dna";
    $cmd .= " -l ".$ol;
    $cmd .= " -ci ".$main::param{profiles_ci};
    $cmd .= " -img_format ".$main::param{img_format};
    $cmd .= " -title '".$main::param{title}."'";
    $cmd .= " -origin center ";
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_profiles"};
    &one_command($cmd, 1, $main::outfile{$ol."nt_".$seq_type."_profiles"}."_time.txt");

    ## Draw the XY graph with composition profiles
    my $color_file = $ENV{RSAT}."/perl-scripts/lib/color_palettes/".$ol."nt".$main::param{profiles_strand}."_colors.tab";
    my $col_nb = 4**$ol + 1;
    $cmd = 'grep -v ";" '.$main::outfile{$ol."nt_".$seq_type."_profiles"}.' | sort | '.$SCRIPTS.'/transpose-table | grep -P \'(^id)|(^\-?\d+)\'';
    $cmd .= " | ".$SCRIPTS."/XYgraph -xcol 1 -ycol 2-".$col_nb;
    $cmd .= " -format ".$main::param{img_format};
    $cmd .= " -lines -pointsize 0 -legend -header";
    if (-e $color_file) {
      $cmd .= " -colors ".$color_file;
    } else {
      &RSAT::message::Warning("Cannot find residue color specification file", $color_file);
   }
    #    $cmd .= " -symbols "; ## THIS OPTION IS NOT WORKING ANYMORE : THE LEGEND DISPLAYS SYMBOLS BUT NOT THE GRAPH. THIS HAS TO BE FIXED
    my $title = $ol."nt composition profiles : ";
    $title .= "; ".$seq_type." sequence";
    $title .= "; ".$main::param{title} if ($main::param{title});
    $cmd .= " -title '".$title."'";
    $cmd .= " -xleg1 'Position' -xsize 800";
    $cmd .= " -yleg1 'Occurrences' -ysize 300  -ymin 0";
    $cmd .= " -o ".$main::outfile{$ol."nt_".$seq_type."_profiles_graph"};
    &one_command($cmd, 1);
  }
}


################################################################
## Run oligo-analysis on the test set
sub OligoAnalysis {
  for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {
    for my $markov ($main::param{oligo_min_mkv}..$main::param{oligo_max_mkv}) {
      ## Convert negative markov orders relative to the oligo length
      $markov += $len if ($markov < 0);
      my $pattern_type = 'oligos_'.$len.'nt'.'_mkv'.$markov;
      &RSAT::message::TimeWarn("Running oligo-analysis", $len."nt", "markov=".$markov) if ($main::verbose >= 2);
      my $cmd = $SCRIPTS."/oligo-analysis -v 1";
      $cmd .= " -quick";
      $cmd .= " -i ".$main::outfile{"test_purged"};
      $cmd .= " -format fasta";
      $cmd .= " -sort -lth ratio ".$main::param{min_ratio};
      $cmd .= " -sort -lth occ_sig ".$main::param{min_sig};
      $cmd .= " -uth rank ".$main::param{patterns_max_rank};
      $cmd .= " -return occ,proba,rank";
      $cmd .= " ".$main::param{strand};
      $cmd .= " ".$main::param{disco_noov};
      $cmd .= " -seqtype dna";
      $cmd .= " -l ".$len;
#      if ($main::infile{ctrl_seq}) {
#	$cmd .= " -expfreq ".;
#      } else {
      $cmd .= " -markov ".$markov;
#      }
      $cmd .= " -pseudo 0.01";
      $cmd .= " -o ".$main::outfile{$pattern_type};
      &one_command($cmd, 1, $main::outfile{$pattern_type}."_time.txt");
      &MatrixFromPatterns($main::outfile{$pattern_type}, $main::outfile{$pattern_type.'_pssm'}, $pattern_type, 0);
    }
  }
}

################################################################
## Run oligo-diff to compare the test set to the control set
sub OligoDiff {
  for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {
    my $pattern_type = 'oligo_diff_'.$len.'nt';
    &RSAT::message::TimeWarn("Running oligo-diff", $len."nt") if ($main::verbose >= 2);
    my $cmd = $SCRIPTS."/oligo-diff -v 2";
    $cmd .= " -test ".$main::outfile{"test_purged"};
    $cmd .= " -ctrl ".$main::outfile{"ctrl_purged"};
    $cmd .= " -nopurge"; ## The input sequences have already been purged
    $cmd .= " -l ".$len;
    $cmd .= " ".$main::param{strand};
    $cmd .= " ".$main::param{disco_noov};
    $cmd .= " -side test";
    #    $cmd .= " -sort";
    $cmd .= " -lth ratio ".$main::param{min_ratio};
    $cmd .= " -lth occ_sig ".$main::param{min_sig};
    $cmd .= " -uth rank ".$main::param{patterns_max_rank};
    #    $cmd .= " -return occ,proba,rank";
    #    $cmd .= " -seqtype dna";
    #    $cmd .= " -pseudo 0.01";
    $cmd .= " -o ".$main::outfile{$pattern_type};
    &one_command($cmd, 1, $main::outfile{$pattern_type}."_time.txt");
    &MatrixFromPatterns($main::outfile{$pattern_type}, $main::outfile{$pattern_type.'_pssm'}, $pattern_type, 0);
  }
}


################################################################
## Run dyad-analysis on the test set
sub DyadAnalysis {
  &RSAT::message::TimeWarn("Running dyad-analysis") if ($main::verbose >= 2);
  my $cmd = $SCRIPTS."/dyad-analysis -v 1";
  $cmd .= " -i ".$main::outfile{"test_purged"};
  $cmd .= " -quick";
  $cmd .= " -format fasta";
  $cmd .= " -sort -lth ratio ".$main::param{min_ratio};
  $cmd .= " -sort -lth occ_sig ".$main::param{min_sig};
  $cmd .= " -uth rank ".$main::param{patterns_max_rank}." -return occ,proba,ratio,zscore,rank";
  $cmd .= " ".$main::param{strand};
  $cmd .= " ".$main::param{disco_noov};
  $cmd .= " -seqtype dna";
  $cmd .= " -l 3 -sp 0-20 ";
  $cmd .= " -bg monads";
  $cmd .= " -pseudo 0.01";
  $cmd .= " | perl -pe 's/n\\{0\\}//g'"; ## Suppress zero spacings for merging with oligos
  $cmd .= " >".$main::outfile{dyads};
  &one_command($cmd, 1, $main::outfile{dyads}."_time.txt");
  &MatrixFromPatterns($main::outfile{dyads}, $main::outfile{dyads_pssm}, "dyads", 0);
}

################################################################
## Run position-analysis to discover oligonucleotides with positional
## biases
sub PositionAnalysis {
  for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {
    my $pattern_type = 'positions_'.$len.'nt';
    &RSAT::message::TimeWarn("\n Running position-analysis", $len."nt") if ($main::verbose >= 2);
    my $cmd = $SCRIPTS."/position-analysis -v 2";
    $cmd .= " -i ".$main::outfile{"test_purged"};
    $cmd .= " -format fasta";
    $cmd .= " -sort ";
    $cmd .= " -return chi,sig,distrib,graphs,rank";
    $cmd .= " -max_graphs ".$main::param{patterns_max_rank};
    $cmd .= " ".$main::param{strand};
    $cmd .= " ".$main::param{disco_noov};
    $cmd .= " -seqtype dna";
    $cmd .= " -l ".$len;
    $cmd .= " -ci ".$main::param{positions_ci};
    $cmd .= " -lth_occ ".$main::param{positions_min_occ};
    $cmd .= " -lth_sig ".$main::param{min_sig};
    $cmd .= " -uth_rank ".$main::param{patterns_max_rank};
    $cmd .= " -img_format ".$main::param{img_format};
    $cmd .= " -title '".$main::param{title}."'";
    $cmd .= " -origin center ";
    $cmd .= " -max_graphs ".$main::param{positions_max_graphs} if ($main::param{positions_max_graphs}  > 0);
    $cmd .= " -o ".$main::outfile{$pattern_type};
    &one_command($cmd, 1, $main::outfile{$pattern_type}."_time.txt");
    &MatrixFromPatterns($main::outfile{$pattern_type}, $main::outfile{$pattern_type."_pssm"}, $pattern_type, 0);
  }
}


################################################################
## Run local-word-analysis to discover locally over-represented dyads
## on the test set
sub LocalDyads {
  my $center_pos = 0;

  ## Convert negative markov orders relative to the oligo length
  my $pattern_type = 'local_dyads';
  &RSAT::message::TimeWarn("Running local-word-analysis --dyad") if ($main::verbose >= 2);
  my $cmd = $PYTHON."/local-word-analysis -v 3";
  $cmd .= " --dyad  --length=3 --spacing=0:20";
  $cmd .= " -i ".$main::outfile{"test_purged"};
  #  $cmd .= " -format fasta";
  $cmd .= " --min=occ_sig ".$main::param{min_sig};
  $cmd .= " --sort=-occ_sig";
  $cmd .= " --max=rank ".$main::param{patterns_max_rank};
  $cmd .= " --max=w_rank ".1; ## Only return the most significant window for each word
  # " -return occ,proba,rank";
  if ($main::param{strand} eq "-1str") {
    $cmd .= " +";
  } else {
    $cmd .= " +-";
  }
  $cmd .= " --overlap" if ($main::param{disco_noov} eq "ovlp");
  $cmd .= " --center=".$center_pos;
  $cmd .= " --windowgroup=".$main::param{local_words_windowgroup};
  #      $cmd .= " --window=".$main::param{local_words_window};
  #      $cmd .= " --heuristic=".$main::param{local_words_heuristic};
  #  $cmd .= " -seqtype dna";
#      $cmd .= " --markov ".$markov; ## Option --markov crashes the program. Seems incompatible with the windowgroup mode
  #  $cmd .= " -pseudo 0.01";
  $cmd .= " > ".$main::outfile{$pattern_type};
  &one_command($cmd, 1, $main::outfile{$pattern_type}."_time.txt");
  &MatrixFromPatterns($main::outfile{$pattern_type}, $main::outfile{$pattern_type.'_pssm'}, $pattern_type, 0);
}


################################################################
## Run local-word-analysis on the test set
sub LocalWords {
#  my $center_pos = &RSAT::util::round($main::param{local_words_window}/2);
  my $center_pos = 0;
  for my $len ($main::param{oligo_min_len}..$main::param{oligo_max_len}) {
    ## I suppress the loop on Markov orders since --markov is incompatible with --windowgroup
#    for my $markov ($main::param{oligo_min_mkv}..$main::param{oligo_max_mkv}) {
      ## Convert negative markov orders relative to the oligo length
#      $markov += $len if ($markov < 0);
#      my $pattern_type = 'local_words_'.$len.'nt'.'_mkv'.$markov;
      my $pattern_type = 'local_words_'.$len.'nt';
      &RSAT::message::TimeWarn("Running local-word-analysis", $len."nt") if ($main::verbose >= 2);
      my $cmd = $PYTHON."/local-word-analysis -v 3";
      $cmd .= " -i ".$main::outfile{"test_purged"};
      #  $cmd .= " -format fasta";
      $cmd .= " --min=occ_sig ".$main::param{min_sig};
      $cmd .= " --sort=-occ_sig";
      $cmd .= " --max=rank ".$main::param{patterns_max_rank};
      $cmd .= " --max=w_rank ".1; ## Only return the most significant window for each word
      # " -return occ,proba,rank";
      if ($main::param{strand} eq "-1str") {
	$cmd .= " +";
      } else {
	$cmd .= " +-";
      }
      $cmd .= " --overlap" if ($main::param{disco_noov} eq "ovlp");
      $cmd .= " --center=".$center_pos;
      $cmd .= " --windowgroup=".$main::param{local_words_windowgroup};
#      $cmd .= " --window=".$main::param{local_words_window};
#      $cmd .= " --heuristic=".$main::param{local_words_heuristic};
      #  $cmd .= " -seqtype dna";
      $cmd .= " --len ".$len;
#      $cmd .= " --markov ".$markov; ## Option --markov crashes the program. Seems incompatible with the windowgroup mode
      #  $cmd .= " -pseudo 0.01";
      $cmd .= " > ".$main::outfile{$pattern_type};
      &one_command($cmd, 1, $main::outfile{$pattern_type}."_time.txt");
      &MatrixFromPatterns($main::outfile{$pattern_type}, $main::outfile{$pattern_type.'_pssm'}, $pattern_type, 0);
#    }
  }
}


################################################################
## Concatenate the options for MEME and compute the suffix
sub CalcMemeOptions {
  $meme_options = ""; ## Must be a global variable
  $meme_suffix = ""; ## Must be a global variable

  ## input file
  $meme_options .= " ".$main::outfile{"test_purged"};

  ## strands
  $meme_suffix .= $main::param{strand};
  unless ($main::param{strand} eq "-1str") {
    $meme_options .= " -revcomp";
  }

  ## If bacgrkound sequences have been specified, check that
  ## background model file exists.
  if ($main::outfile{meme_bg_file}) {
    $meme_options .= " -bfile ".$main::outfile{meme_bg_file};
  }


  foreach my $option (@MEME_options) {
#    &RSAT::message::Debug("adding meme option", $option, $main::param{"meme_".$option}) if ($main::verbose >= 10);
    $meme_options .= " -".$option." ".$main::param{"meme_".$option};
    unless ($option eq "bfile") {
      $meme_suffix .= "_".$option.$main::param{"meme_".$option};
    }
  }
}

################################################################
## Compute background for MEME
sub CalcMemeBackground {
  my $cmd = "cat ".$main::outfile{"test_purged"}." | ";
  $cmd .= &RSAT::server::GetProgramPath("fasta-get-markov", $die_on_error);
  $cmd .= " -m ".$main::param{bg_meme_markov};
  $cmd .= " > ".$main::outfile{meme_bg_file};
  &one_command($cmd, 1, $main::outfile{meme_bg_file}."_time.txt");
}


################################################################
## Run MEME
sub RunMEME {
  my $die_on_error = 0;
  my $meme_cmd = &RSAT::server::GetProgramPath("meme", $die_on_error);
  my $cmd = "";

  ################################################################
  ## MEME does not accept sequences shorter than 8
  ## However, peas are usually smaller than 8bp -> we skip this conversion
#  my $min_seq_len = &max(8, $main::param{meme_minw});
#  $cmd = "convert-seq ";
#  $cmd .= " -from fasta -to fasta -skip_short ".$min_seq_len;
#  $cmd .= " -i ".$main::outfile{"test_purged"};
#  $cmd .= " -o ".$main::outfile{"test_purged_noshort"};

  ## Run the MEME command
  $cmd .= $meme_cmd;
  $cmd .= " ".$meme_options;
  $cmd .= " > ".$main::outfile{meme};
  &one_command($cmd, 1, $main::outfile{meme}."_time.txt");

  ## Convert the matrix
  $cmd = $SCRIPTS."/convert-matrix -i ".$main::outfile{meme};
  $cmd .= " -from meme";
  $cmd .= " -to transfac";
  $cmd .= " -return counts,parameters,logo";
  $cmd .= " -logo_file ".$main::outfile{meme_logos};
  $cmd .= " -o ".$main::outfile{meme_tf};
  &one_command($cmd);
}

################################################################
## Run ChIPMunk
sub RunChIPMunk {
  my $die_on_error = 0;
  my $chipmunk_dir = $ENV{chipmunk_dir};
  unless ($chipmunk_dir) {
   &RSAT::error::FatalError("ChIPMunk directory must be specified in the file RSAT_config.props (chipmunk_dir=[ChIPMunk installation path]). ");
  }


  my $cmd = "java -cp '".$chipmunk_dir."'";
  $cmd .= " ru_genetika.ChIPMunk";


  $chipmunk_options .= " ".$main::param{chipmunk_start_motif_length};
  $chipmunk_options .= " ".$main::param{chipmunk_stop_motif_length};
  $chipmunk_options .= " ".$main::param{chipmunk_verbose};
  $chipmunk_options .= " ".$main::param{chipmunk_mode};
  $chipmunk_options .= " s:".$main::outfile{"test_purged"};

  ## Run the CHIPMUNK command
  $cmd .= " ".$chipmunk_options;
  $cmd .= " > ".$main::outfile{chipmunk};
  &one_command($cmd, 1, $main::outfile{chipmunk}."_time.txt");

#   ## Convert the matrix
#   $cmd = $SCRIPTS."/convert-matrix -i ".$main::outfile{chipmunk};
#   $cmd .= " -from chipmunk";
#   $cmd .= " -to transfac";
#   $cmd .= " -return counts,parameters,logo";
#   $cmd .= " -logo_file ".$main::outfile{chipmunk_logos};
#   $cmd .= " -o ".$main::outfile{chipmunk_tf};
#  &one_command($cmd);

}


################################################################
## Convert reference motif and generate logos
sub RefMotif {

  ## Copy the reference motif in the output directory
  my $cmd = "rsync -ruptL ";
  $cmd .= " ".$main::infile{ref_motifs};
  $cmd .= " ".$main::outfile{ref_motifs};
  $cmd .= " >& /dev/null";
  &one_command($cmd, 1);

  ## Convert the reference motif to TRANSFAC format, because this
  ## format holds information about the motif name, ID etc
  ## Export the logo and matrix parameters
  if ((lc($main::param{ref_motifs_format}) eq "transfac") ||
      (lc($main::param{ref_motifs_format}) eq "tf")) {
    $cmd = "cp ";
    $cmd .= " ".$main::infile{ref_motifs};
    $cmd .= " ".$main::outfile{ref_motifs_transfac};
  } else {
    $cmd = $SCRIPTS."/convert-matrix -v 0";
    $cmd .= " -i ".$main::infile{ref_motifs};
    $cmd .= " -from ".$main::param{ref_motifs_format};
    $cmd .= " -to transfac -decimals 1";
    $cmd .= " -return counts,consensus,parameters";
    $cmd .= " -o ".$main::outfile{ref_motifs_transfac};
  }
  &one_command($cmd, 1);

  ## Also export the matrices in tab-delimited format and export sequence logos
  $cmd = $SCRIPTS."/convert-matrix -v 0";
  $cmd .= " -i ".$main::infile{ref_motifs};
  $cmd .= " -from ".$main::param{ref_motifs_format};
  $cmd .= " -to tab";
  $cmd .= " -return counts,logo";
    $cmd .= " -logo_file ".$main::outfile{ref_motifs_logo};
  $cmd .= " -o ".$main::outfile{ref_motifs_tab};
  &one_command($cmd, 1);
}


################################################################
## Compare the significance of words (oligos, dyads) discovered by
## the different approaches
sub MergeWords {
  &RSAT::message::Debug("Merging words", join(",", @patterns_to_merge)) if ($main::verbose >= 2);
  my $cmd = $SCRIPTS."/compare-scores -v 1 ";
  my $file_nb = 0;
  foreach my $pattern_type (@patterns_to_merge) {
    &RSAT::message::Debug("Merging words of type", $pattern_type) if ($main::verbose >= 5);
    next if ($pattern_type eq "merged_words"); ## avoid recycling the merged words from a previous run in the compilation
    my $pattern_file = $main::outfile{$pattern_type};
    if (-e $pattern_file) {
      my $sig_col;
      $file_nb++;
      $cmd .= " -i ".$pattern_file;
      if ($pattern_type =~ /oligos_/) {
	$sig_col = 8;
      } elsif ($pattern_type =~ /oligo_diff/) {
	$sig_col = 11;
      } elsif ($pattern_type =~ /dyads/) {
	$sig_col = 8;
      } elsif ($pattern_type =~ /local_words/) {
	$sig_col = 9;
      } elsif ($pattern_type =~ /positions/) {
	$sig_col = 9;
      } else {
	&RSAT::message::Warning("Unknown score column for pattern type", $pattern_type);
	next;
      }
      $cmd .= " -sc".$file_nb." ".$sig_col;
      $cmd .= " -suppress ".$main::dir{output}."/"."results/".$pattern_type."/";
      #    &RSAT::message::Debug($file_nb, $pattern_type, $sig_col, "\n", $pattern_file) if ($main::verbose >= 5);
    } else {
      &RSAT::message::Warning("Missing pattern file", $pattern_type, $pattern_file) if ($main::verbose >= 2);
    }
  }
  $cmd .= " -ic 1";
  $cmd .= " -lc";
  $cmd .= " -null .";
  $cmd .= " -suppress ".$main::param{prefix}."_";
  $cmd .= " -suppress '\.tab'";
  $cmd .= " | ".$SCRIPTS."/row-stats -after 1 -sort ";
  $cmd .= " -o ".$main::outfile{merged_words};
  &one_command($cmd, 1, $main::outfile{merged_words}."_time.txt");

  ## Generate a HTML format of the table (convenient for sorting the
  ## words according to different columns)
  $cmd = $SCRIPTS."/text-to-html -i ".$main::outfile{merged_words};
  $cmd .= " -o ".$main::outfile{merged_words_html};
  &one_command($cmd, 1);

  ## Draw a heat map of the word significance table
  $cmd = $SCRIPTS."/draw-heatmap -min 0 -max 10  -out_format png";
  $cmd .= " -col_width 40 -rownames -gradient fire";
  $cmd .= " -row_height 16";
  $cmd .= " -i ".$main::outfile{merged_words};
  $cmd .= " -o ".$main::outfile{merged_words_heatmap};
  &one_command($cmd, 1);

  &RSAT::message::TimeWarn("Word comparison table", $main::outfile{merged_words}) if ($main::verbose >= 2);

  ## Extract position-specific scoring matrices from the merged words
  if ($disco{merged_words}) {
      my $pattern_type = "merged_words";
      &MatrixFromPatterns($main::outfile{$pattern_type}, $main::outfile{$pattern_type.'_pssm'}, $pattern_type, 4);
  }
}


################################################################
## Merge all discoverd motifs in a single file
sub MergeMotifs {
  &RSAT::message::TimeWarn("Merging discovered motifs", join(",", @pattern_types)) if ($main::verbose >= 2);

  ## Remove previous versions of the merged motif file
  my $cmd = "rm -f ".$main::outfile{motifs_discovered};

  ## Initialize the motif file with the reference motif if provided
#  if (defined($main::infile{ref_motifs})) {
#    $cmd = "cp -f";
#    $cmd .= " ".$main::outfile{ref_motifs_transfac};
#    $cmd .= " ".$main::outfile{motifs_discovered};
#  }

  ################################################################
  ## Concatenate all discovered motifs (matrices) in a single file.
  ## Use TRANSFAC format because if allows to associate a name to each
  ## matrix.

  foreach my $pattern_type (@pattern_types) {
    my $matrix_file = $main::outfile{$pattern_type.'_pssm_tf'};

#    unless (-e $matrix_file) {
#      ## TEMPORARY: ensure conversion for data sets of previous versions
#      my $convert_cmd = $SCRIPTS."/convert-matrix";
#      $convert_cmd .= " -i ".$main::outfile{$pattern_type.'_pssm_counts'};
#      $convert_cmd .= " -from tab -to transfac -return counts,consensus";
#      $convert_cmd .= " -prefix $pattern_type";
#      $convert_cmd .= " -o ".$matrix_file;
#      &one_command($convert_cmd, 1);
#   }

    if (-e $matrix_file) {
      $cmd .= "; cat ".$matrix_file." >> ".$main::outfile{motifs_discovered};
    } else {
      &RSAT::message::Warning("Missing matrix file", $pattern_type, $matrix_file) if ($main::verbose >= 2);
    }
  }
  &one_command($cmd, 1);

  &RSAT::message::TimeWarn("Merged discovered motifs", $main::outfile{motifs_discovered}) if ($main::verbose >= 2);
}


################################################################
## Load the discovered motifs from the merged .tf file
sub ReadDiscoveredMotifs {

  ## If motifs have already been read, avoir re-reading them
  return (@motifs) if ($motifs_read);

  ## make sure the list is empty since it is a global variable
  @motifs = ();


  ################################################################
  ## Load background model for computing the consensus and logo of each motif
  &RSAT::message::Info("Loading background model fril file", $main::param{scan_bg_file}) if ($main::verbose >= 3);
  local $bg_model = new RSAT::MarkovModel();

  if (-e $main::param{scan_bg_file}) {
    $bg_model->load_from_file($main::param{scan_bg_file}, "motifsampler");
  } else {
    &RSAT::message::Warning("Cannot generate synthesis by motifs because the background model has not been computed yet")
      if ($main::verbose >= 1);
    return;
  }

  ################################################################
  ## Read the motifs and compute their consensus
  if (-e $main::outfile{motifs_discovered}) {
    @motifs = &RSAT::MatrixReader::readFromFile($main::outfile{motifs_discovered}, "transfac");
    my $motif_nb = scalar(@motifs);

    foreach my $matrix (@motifs) {
      my $motif_id = $matrix->get_attribute("id");
      &SetFileNamesForMotif($motif_id);

      ## We compute the consensus and logos here with the same background model as used for sequence scanning
      $matrix->setMarkovModel($bg_model);
      $matrix->calcConsensus();
    }

    &RSAT::message::Info($motif_nb." discovered motifs read from file", $main::outfile{motifs_discovered}) if ($main::verbose >= 3);
  } else {
    &RSAT::message::Warning("Discovered motif file does not exist",  $main::outfile{motifs_discovered});
  }

  $motifs_read = 1;
  return @motifs;
}


################################################################
## Specify the names of all fiels associated to one discovered motif
## and index those file names for post-processing and synthesis.
sub SetFileNamesForMotif {
  my ($motif_id) = @_;

  my $motif_dir = $dir{output}."/results/discovered_motifs/".$motif_id;
  $motif_dir = "results/discovered_motifs/".$motif_id;

  ## Prefix for motif files
  $main::prefix{$motif_id."_prefix"} = &OutFileName($motif_dir, "", $motif_id);

  ## Output file for the motif (transfac format)
  ($main::outfile{$motif_id.'_tf'}, $subdir) = &OutFileName($motif_dir, ".tf", $motif_id);
#  $main::outfile{$motif_id.'_tf'} = $motif_file;
  #      &RSAT::message::Debug("motif subdir", $motif_id, $subdir) if ($main::verbose >= 5);


  ## Only index once the output directories because this routine may be
  ## called several times (by &SplitMotifs() and &ReadMotifTable()).
  unless (defined($main::dir{$motif_id})) {
    $main::dir{$motif_id} = $motif_dir; push @outdir, $motif_id;
    $main::dir{$motif_id.'_subdir'} = $subdir
  }
#  $main::prefix{$motif_id."_prefix"} = $motif_prefix;

#  $main::outfile{$motif_id."_tab"} = &OutFileName($motif_dir, ".tab", $motif_id);

  ## Tab-delimited matrix file
  $main::outfile{$motif_id."_tab"} = $main::prefix{$motif_id."_prefix"}.".tab";

  ## Sequence logos
  $main::prefix{$motif_id."_logo"} = $main::prefix{$motif_id."_prefix"}."_logo";
  $main::outfile{$motif_id."_logo"} = $main::prefix{$motif_id."_logo"}.".".$main::param{img_format};
  $main::prefix{$motif_id."_logo_rc"} = $main::prefix{$motif_id."_prefix"}."_logo_rc";
  $main::outfile{$motif_id."_logo_rc"} = $main::prefix{$motif_id."_logo_rc"}.".".$main::param{img_format};
#  $main::outfile{$motif_id."_logo_rc"} = $main::prefix{$motif_id."_prefix"}."_logo_rc";

  ## Predicted sites (sequence scanning)
  $main::outfile{$motif_id.'_pssm_sites'} = $main::prefix{$motif_id."_prefix"}."_sites.tab";
  $main::outfile{$motif_id.'_pssm_sites_genomic'} = $main::prefix{$motif_id."_prefix"}."_sites_genomic.bed";
  $main::outfile{$motif_id.'_pssm_site_distrib'} = $main::prefix{$motif_id."_prefix"}."_site_distrib.tab";
  $main::outfile{$motif_id.'_pssm_site_distrib_graph'} = $main::prefix{$motif_id."_prefix"}."_site_distrib.".$main::param{img_format};

  ## Enrichment in binding sites
  $main::outfile{$motif_id.'_pssm_enrichment'} = $main::prefix{$motif_id."_prefix"}."_scan_mkv".$main::param{scan_enrich_markov}."_enrichment.tab";
  $main::outfile{$motif_id.'_pssm_enrichment_png'} = $main::prefix{$motif_id."_prefix"}."_scan_mkv".$main::param{scan_enrich_markov}."_enrichment.png";

  ## Comparisons betwen discoverd and reference motifs
  $main::prefix{$motif_id."_vs_ref_prefix"} = &OutFileName("results/discovered_motifs/".$motif_id, "", $motif_id."_vs_ref");
  $main::outfile{$motif_id."_vs_ref"} = &OutFileName("results/discovered_motifs/".$motif_id, ".tab", $motif_id."_vs_ref");
  $main::outfile{$motif_id."_vs_ref_html"} = &OutFileName("results/discovered_motifs/".$motif_id, ".html", $motif_id."_vs_ref");

  ## Comparisons betwen discoverd and motif databases
  foreach my $db_name (@motif_databases) {
    $main::prefix{$motif_id.'_vs_db_'.$db_name."_prefix"} = &OutFileName("results/discovered_motifs/".$motif_id, "", $motif_id.'_vs_db_'.$db_name);
    $main::outfile{$motif_id.'_vs_db_'.$db_name} = &OutFileName("results/discovered_motifs/".$motif_id, ".tab", $motif_id.'_vs_db_'.$db_name);
    $main::outfile{$motif_id.'_vs_db_'.$db_name."_html"} = &OutFileName("results/discovered_motifs/".$motif_id, ".html", $motif_id.'_vs_db_'.$db_name);
  }
}

################################################################
## Split the discovered motifs in separate files in order to analyze
## them separately. One separate directory is created for each motif.
sub SplitMotifs {
  &RSAT::message::TimeWarn("Splitting discovered motifs in separate files") if ($main::verbose >= 2);

  @motifs = &ReadDiscoveredMotifs();
  my $m = 0;

  ## Create a table with the lists of motifs
  my $motif_table = &OpenOutputFile($main::outfile{motifs_discovered_table});

  ## Print the header of the motif table
  print $motif_table join("\t", "#nb", "identifier", "directory", "prefix", "transfac_format_file"), "\n";

  if (scalar(@motifs) == 0) {
    print $motif_table "; NO MOTIF FOUND\n";
    &RSAT::message::Warning("No motif found") if ($main::verbose >= 2);
  } else {
    ## Number of digits for the counter of split matrix files
    my $id_digits;
    $id_digits = POSIX::ceil(log(scalar(@motifs))/log(10));

    ## Store each motif in a separate directory and file
    foreach my $motif (@motifs) {
      $m++;
      ## Get the motif ID or, if not defined, create an ID
      my $motif_id = $motif->get_attribute("id");
      unless ($motif_id) {
	## Use the same number of digits for all matrices so that the
	## alphabetical order ot the splitted files corresponds to the
	## numerical order of the matrices.
	my $id_nb = sprintf "%${id_digits}s", $m;
	$id_nb =~ s/ /0/g;
	$motif_id = "discomotif_".$id_nb;
      }

      ## Define all the file names for the current motif
      &SetFileNamesForMotif($motif_id);

      ## Print the motif in transfac format
      $current_out = &OpenOutputFile($main::outfile{$motif_id.'_tf'});
      print $current_out $motif->toString(sep=>"\t",
					  type=>"counts",
					  format=>"transfac",
					 );
      close($current_out);

      print $motif_table join("\t", $m, $motif_id, $subdir, $main::prefix{$motif_id."_prefix"}, $main::outfile{$motif_id.'_tf'}), "\n";

      ## Convert the motif to a tab-delimited file
      my $cmd = $SCRIPTS."/convert-matrix -v 0";
      $cmd .= " -i ".$main::outfile{$motif_id.'_tf'};
      $cmd .= " -from transfac -to tab";
      $cmd .= " -return counts";
      $cmd .= " -o ".$main::outfile{$motif_id."_tab"};
      &one_command($cmd, 1);
      #    &RSAT::message::Debug("&SplitMotifs()", "motif", $m, $motif_id, $main::outfile{$motif_id.'_tf'}) if ($main::verbose >= 10);
    }
  }
  close $motif_table;

  &RSAT::message::Info("Motif table",$main::outfile{motifs_discovered_table}) if ($main::verbose >= 2);
}

################################################################
## Compare each discovered motifs to each other, and with the
## reference motif, identify clusters of similar motifs, and build
## consensus motifs.
sub ClusterMotifs {

  ## Comparison between discovered matrices, and with the reference motif
  my $cmd = $SCRIPTS."/compare-matrices -v ".$main::verbose." -mode matches";
  $cmd .= " -format transfac -file ".$main::outfile{motifs_discovered};
#  $cmd .= " -format2 transfac -file2 ".$main::outfile{motifs_discovered};;
#  $cmd .= " -format2 transfac -file2 ".$main::outfile{ref_motifs_transfac};
  $cmd .= " -DR -distinct";
  $cmd .= " -sort cor";
  $cmd .= " -uth rank 1"; ## Only report the best matching shift between a pair of matrices
  $cmd .= " -lth w ".$main::param{matrix_compa_min_w}; ## Min number of aligned columns
  $cmd .= " -lth cor ".$main::param{matrix_compa_min_cor}; ## Min correlation
  $cmd .= " -lth Ncor ".$main::param{matrix_compa_min_Ncor}; ## Min normalized correlation
  $cmd .= " -return matrix_name,strand,offset,".$main::param{matrix_compa_metrics}.",width,consensus";
  $cmd .= " -o ".$main::outfile{motifs_disco_compa};
  $cmd .= "; ".$SCRIPTS."/text-to-html -i ".$main::outfile{motifs_disco_compa};
  $cmd .= " -o ".$main::outfile{motifs_disco_compa_html};
  &one_command($cmd, 1);

  ## Generate a GML graph with the matrix comparison result (can be opened with CytoScape or Yed)
  $cmd = $SCRIPTS."/convert-graph -i ".$main::outfile{motifs_disco_compa};
  $cmd .= " -ewidth -ecolors fire";
  $cmd .= " -layout spring";
  $cmd .= " -from tab -to gml -scol 3 -tcol 4 -wcol 5";
  $cmd .= " -o ".$main::outfile{motifs_disco_compa_gml};
  &one_command($cmd, 1);

  ## Generate a figure of the motif comparison graph
  $cmd = $SCRIPTS."/display-graph";
  $cmd .= " -in_format gml -i ".$main::outfile{motifs_disco_compa_gml};
  $cmd .= " -ewidth";
  $cmd .= " -layout none";
  $cmd .= " -out_format png -o ".$main::outfile{motifs_disco_compa_png};
  &one_command($cmd, 1);

  ## Use MCL to partition the motif graph into clusters
  my $mcl_dir = $ENV{mcl_dir};
  unless ($mcl_dir) {
    &RSAT::error::FatalError("Motif comparison requires to install MCL and indicate its path in the file $ENV{RSAT}/RSAT_config.props");
  }
  $cmd = "grep -v '^;' ".$main::outfile{motifs_disco_compa}.">".$main::outfile{motifs_disco_compa}.".mcl";
  $cmd .= "; ".$mcl_dir."/mcl ".$main::outfile{motifs_disco_compa}.".mcl";
  $cmd .= " -I 1.8 --abc -V all ";
  $cmd .= " -o ".$main::outfile{motifs_disco_clusters_mcl};
  $cmd .= " ; ${SCRIPTS}/convert-classes -i ".$main::outfile{motifs_disco_clusters_mcl};
  $cmd .= " -from mcl -to tab ";
  $cmd .= " -o ".$main::outfile{motifs_disco_clusters_tab};
  &one_command($cmd, 1);

  ## Split the motif graph into clusters as defined by MCL, and
  ## compute the intra-cluster degree (k) and weighted degree (wk) of
  ## each node
  $cmd = $SCRIPTS."/graph-get-clusters -i ".$main::outfile{motifs_disco_compa};
  $cmd .= " -in_format tab -scol 3 -tcol 4 -wcol 5 -return clusters ";
  $cmd .= " -clusters ".$main::outfile{motifs_disco_clusters_tab};
  $cmd .= " -out_format tab -o ".$main::outfile{motifs_disco_clusters_graph};

  ## Identify graph components and count the intra-component degree of
  ## each node. Most connected nodes will serve as seeds for motif
  ## clustering.
  $cmd .= " ; ".$SCRIPTS."/graph-connex-components -v 1";
  $cmd .= " -i ".$main::outfile{motifs_disco_clusters_graph};
  $cmd .= " -wcol 5";
  $cmd .= " -o ".$main::outfile{motifs_disco_compa_cluster_intra_degree};
  &one_command($cmd, 1);

  ## Generate a GML graph with the matrix comparison result (can be opened with CytoScape or Yed)
  $cmd = $SCRIPTS."/convert-graph -i ".$main::outfile{motifs_disco_clusters_graph};
  $cmd .= " -ewidth -ecolors fire";
  $cmd .= " -layout spring";
  $cmd .= " -from tab -to gml -scol 3 -tcol 4 -wcol 5";
  $cmd .= " -o ".$main::outfile{motifs_disco_clusters_graph_gml};
  &one_command($cmd, 1);

  ## Generate a figure of the motif comparison graph
  $cmd = $SCRIPTS."/display-graph";
  $cmd .= " -in_format gml -i ".$main::outfile{motifs_disco_clusters_graph_gml};
  $cmd .= " -ewidth";
  $cmd .= " -layout none";
  $cmd .= " -out_format png -o ".$main::outfile{motifs_disco_clusters_graph_png};
  &one_command($cmd, 1);

}


################################################################
## Read the list of discovered motifs (produced by the method
## &SplitMotifs()) and index them. This is required for several
## post-processing methods.
sub ReadMotifTable {
  unless (-e $main::outfile{motifs_discovered_table}) {
    &RSAT::message::Warning("Discovered motif table does not exist", $main::outfile{motifs_discovered_table});
    return;
  }
  my ($in) = &OpenInputFile($main::outfile{motifs_discovered_table});
  local $motif_id;
  local $motif_dir;
  local $motif_file;
  while (<$in>) {
    next unless (/\S/); # Skip empty lines
    next if (/^;/); # skip comment lines
    next if (/^#/); # skip header line
    chomp();
    ($motif_nb, $motif_id, $motif_dir, $motif_prefix, $motif_file) = split("\t");
    push @motif_ids, $motif_id;
    push @motif_dirs, $motif_dir;
    push @motif_prefixes, $motif_prefix;
    push @motif_files, $motif_file;

    &SetFileNamesForMotif($motif_id);
#    &RSAT::message::Debug("Indexed motif", $motif_nb, $motif_id, $motif_dir, $motif_file) if ($main::verbose >= 10);
  }
  close ($in);
}

################################################################
## Compare discovered motifs to the referencemotif
sub MotifsVersusReference {
  ################################################################
  ## Compare separately each discovered motif to reference motif(s)
  &ReadMotifTable() unless (scalar(@motif_ids) > 1); ## Don't re-read the table if already done
  my $motif_nb = scalar(@motif_ids);

  &RSAT::message::TimeWarn("Comparing ".$motif_nb." discovered motifs with reference motif(s)") if ($main::verbose >= 2);

  foreach my $i (0..$#motif_ids) {
    my $m = $i+1;
    my $motif_id = $motif_ids[$i];
    my $motif_dir = $motif_dirs[$i];
    my $motif_file = $motif_files[$i];
    &RSAT::message::Info("comparing motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 4);

    ## Compare one discovered motif with the reference motif
    my $cmd = $SCRIPTS."/compare-matrices -v ".$main::verbose." -mode matches";
    $cmd .= " -format1 transfac -file1 ".$main::outfile{$motif_id.'_tf'};
    $cmd .= " -format2 transfac -file2 ".$main::outfile{ref_motifs_transfac};
    $cmd .= $main::param{motif_compa_options};
    $cmd .= " -o ".$main::prefix{$motif_id."_vs_ref_prefix"};
#    $cmd .= "; ".$SCRIPTS."/text-to-html -i ".$main::outfile{$motif_id."_vs_ref"};
#    $cmd .= " -o ".$main::outfile{$motif_id."_vs_ref_html"};
    &one_command($cmd, 1);
  }

  ################################################################
  ## TO DO: the all motifs against ref is redundant (comparisons are
  ## performed twice), but the result is used for drawing the ref
  ## versus discovered graph. The comparison table and graph should be
  ## recollected from the individual motif comparisons.

  ################################################################
  ## Compare all matrices to the reference motif
  my $cmd = $SCRIPTS."/compare-matrices -v ".$main::verbose." -mode matches";
  $cmd .= $main::param{motif_compa_options};
  $cmd .= " -format1 transfac -file1 ".$main::outfile{ref_motifs_transfac};
  $cmd .= " -format2 transfac -file2 ".$main::outfile{motifs_discovered};
  $cmd .= " -o ".$main::prefix{"motifs_vs_ref_prefix"};
## Not necessary here anymore, it is automatically done in the script compare-matrices
#  $cmd .= "; ".$SCRIPTS."/text-to-html -i ".$main::outfile{"motifs_vs_ref"};
#  $cmd .= " -o ".$main::outfile{"motifs_vs_ref_html"};
  &one_command($cmd, 1);

  &RSAT::message::TimeWarn("Discovered versus reference motif", $main::outfile{motifs_vs_ref}) if ($main::verbose >= 2);

  ## Generate a GML graph with the matrix comparison result (can be opened with CytoScape or Yed)
  $cmd = $SCRIPTS."/convert-graph -i ".$main::outfile{"motifs_vs_ref"};
  $cmd .= " -ewidth -ecolors fire";
  $cmd .= " -layout spring";
  $cmd .= " -from tab -to gml -scol 3 -tcol 4 -wcol 5";
  $cmd .= " -o ".$main::outfile{"motifs_vs_ref_gml"};
  &one_command($cmd, 1);

  ## Generate a figure of the motif comparison graph
  $cmd = $SCRIPTS."/display-graph";
  $cmd .= " -in_format gml -i ".$main::outfile{"motifs_vs_ref_gml"};
  $cmd .= " -ewidth";
  $cmd .= " -layout none";
  $cmd .= " -out_format png -o ".$main::outfile{"motifs_vs_ref_png"};
  &one_command($cmd, 1);
}

################################################################
## Compare discovered motifs to the motif database
sub MotifsVersusDatabase {
  foreach my $db_name (@motif_databases) {
    ################################################################
    ## Compare separately each discovered motif to reference motif(s)
    &ReadMotifTable() unless (scalar(@motif_ids) > 1); ## Don't re-read the table if already done
    my $motif_nb = scalar(@motif_ids);

    &RSAT::message::TimeWarn("Comparing ".$motif_nb." discovered motifs with database", $db_name) if ($main::verbose >= 2);
    foreach my $i (0..$#motif_ids) {
      my $m = $i+1;
      my $motif_id = $motif_ids[$i];
      my $motif_dir = $motif_dirs[$i];
      my $motif_file = $motif_files[$i];
      &RSAT::message::Debug("comparing motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);

      ## Compare one discovered motif with the reference motif
      my $cmd = $SCRIPTS."/compare-matrices -v ".$main::verbose." -mode matches";
      $cmd .= " -format1 transfac -file1 ".$main::outfile{$motif_id.'_tf'};
      $cmd .= " -format2 ".$main::motif_db_format{$db_name};
      $cmd .= " -file2 ".$main::infile{"motif_db_".$db_name};;
      $cmd .= $main::param{motif_compa_options};
      $cmd .= " -o ".$main::prefix{$motif_id.'_vs_db_'.$db_name."_prefix"};
## Not necessary here anymore, it is automatically done in the script compare-matrices
#      $cmd .= "; ".$SCRIPTS."/text-to-html -i ".$main::outfile{$motif_id.'_vs_db_'.$db_name};
#      $cmd .= " -o ".$main::outfile{$motif_id.'_vs_db_'.$db_name."_html"};
      &one_command($cmd, 1);
    }

    ################################################################
    ## TO DO: the coparison of all motifs against db is redundant
    ## (comparisons are performed twice), but the result is used for
    ## drawing the db versus discovered graph. The comparison table
    ## and graph should be recollected from the individual motif
    ## comparisons.

    ################################################################
    ## Comparison between all discovered matrices and the database
    my $cmd = $SCRIPTS."/compare-matrices -v ".$main::verbose." -mode matches";
    $cmd .= " -format1 transfac -file1 ".$main::outfile{motifs_discovered};
    $cmd .= " -format2 ".$main::motif_db_format{$db_name};
    $cmd .= " -file2 ".$main::infile{"motif_db_".$db_name};;
    $cmd .= $main::param{motif_compa_options};
    $cmd .= " -o ".$main::outfile{"motifs_vs_db_".$db_name};
## Not necessary here anymore, it is automatically done in the script compare-matrices
#    $cmd .= " ; ".$SCRIPTS."/text-to-html -i ".$main::outfile{"motifs_vs_db_".$db_name};
#    $cmd .= " -o ".$main::outfile{"motifs_vs_db_".$db_name."_html"};
    &one_command($cmd, 1, $main::outfile{"motifs_vs_db_".$db_name}."_time.txt");

    &RSAT::message::TimeWarn("Discovered versus DB", $db_name, $main::outfile{"motifs_vs_db_".$db_name}) if ($main::verbose >= 2);

    ## Generate a GML graph with the matrix comparison result (can be opened with CytoScape or Yed)
    $cmd = $SCRIPTS."/convert-graph -i ".$main::outfile{"motifs_vs_db_".$db_name};
    $cmd .= " -ewidth -ecolors fire";
    $cmd .= " -layout spring";
    $cmd .= " -from tab -to gml -scol 3 -tcol 4 -wcol 5";
    $cmd .= " -o ".$main::outfile{"motifs_vs_db_".$db_name."_gml"};
    &one_command($cmd, 1);

    ## Generate a figure of the motif comparison graph
    $cmd = $SCRIPTS."/display-graph";
    $cmd .= " -in_format gml -i ".$main::outfile{"motifs_vs_db_".$db_name."_gml"};
    $cmd .= " -ewidth";
    $cmd .= " -layout none";
    $cmd .= " -out_format png -o ".$main::outfile{"motifs_vs_db_".$db_name."_png"};
    &one_command($cmd, 1);
  }
}

################################################################
## Convert Word assemblies into PSSMs
sub MatrixFromPatterns {
  my ($pattern_file, $pssm_file, $pattern_type, $score_column) = @_;
  &RSAT::message::TimeWarn("Matrix from patterns", $pattern_type) if ($verbose >= 3);
  my $cmd = $SCRIPTS."/matrix-from-patterns -v 1 ";
  $cmd .= " -seq ".$infile{test_seq};
  $cmd .= " -pl ".$pattern_file;
  $cmd .= " -bgfile ".$main::param{scan_bg_file};
  $cmd .= " -toppat ".$main::param{asmb_toppat};
  $cmd .= " -max_asmb_nb ".$main::param{matrix_nmotifs};
  $cmd .= " -sc ".$score_column if ($score_column > 0);
  if ($pattern_type eq "dyads") {
    $cmd .= " -subst 0";
  } else {
    $cmd .= " -subst 1";
  }
  $cmd .= " -prefix ".$pattern_type;
  $cmd .= " -flanks 2";
  $cmd .= " -collect_method matrix-scan-quick";
  $cmd .= " -logo";
  $cmd .= " -o ".$pssm_file;
  &one_command($cmd, 1, $pssm_file."_time.txt");
}



################################################################
## Compute the profile of enrichment of the input peaks in hits
## for a given matrix (reference or discovered motif)
sub MotifEnrichment {
  my ($seq_file, $matrix_file, $out_file, $graph_file) = @_;

  ## Compute the significance of enrichment in matrix hits
  $cmd = $SCRIPTS."/matrix-scan -v 3";
  $cmd .= " -quick";	   ## TO DO : CHECK IF THIS AFFECTS THE RESULT
  $cmd .= " -seq_format fasta -i ".$seq_file;
  $cmd .= " -matrix_format transfac -m ".$matrix_file;
  #    $cmd .= " -bg_format inclusive -bgfile ".$main::param{scan_bg_file};
  $cmd .= " ".$main::param{scan_options};
  $cmd .= " ".$main::param{scan_enrich_options};
  $cmd .= " -return distrib -return occ_proba";
  #    $cmd .= " -lth occ_sig 0  -uth occ_sig_rank 1";
  $cmd .= " -o ".$out_file;
  #    &one_command($cmd, 1);
  &one_command($cmd, 1, $out_file."_time.txt");

  ## Draw a XY plot of enrichment in matrix hits
  $cmd = $SCRIPTS."/XYgraph ";
  $cmd .= " -i ".$out_file;
  $cmd .= " -xcol 2 -xleg1 'Weight score'";
  $cmd .= " -xmin ".$main::param{scan_enrich_min_score};
  $cmd .= " -xsize 800 -xgstep1 5 -xgstep2 1";
  $cmd .= " -hline red 100";
  $cmd .= " -hline violet 0";
  $cmd .= " -vline violet 5";
  $cmd .= " -ycol 11 -yleg1 'Binomial significance of hit number'";
  $cmd .= " -ysize 400";
  $cmd .= " -lines";
  $cmd .= " -o ".$graph_file;
  &one_command($cmd, 1);
}

################################################################
## Scan peak sequences with each discovered motif. Draw a positional
## profile of motif instances + assess the significance of the number
## of hits for each possible weight threshold value.
sub ScanSequences {

  &RSAT::message::TimeWarn("Scanning sequences") if ($main::verbose >= 2);

  &ReadMotifTable() unless (scalar(@motif_ids) > 1); ## Don't re-read the table if already done
  my $motif_nb = scalar(@motif_ids);
  foreach my $i (0..$#motif_ids) {
    my $m = $i+1;
    my $motif_id = $motif_ids[$i];
    my $motif_dir = $motif_dirs[$i];
#    my $motif_file = $motif_files[$i];

    &RSAT::message::Debug("Scanning sequences with motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);

    ################################################################
    ## Predict site positions

    ## Site prediction with matrix-scan (option -quick)
    my $cmd = $SCRIPTS."/matrix-scan -quick -v 1";
    $cmd .= " -seq_format fasta -i ".$infile{test_seq};
    $cmd .= " -matrix_format transfac -m ".$main::outfile{$motif_id."_tf"};
    $cmd .= " -bg_format inclusive -bgfile ".$main::param{scan_bg_file};
    $cmd .= " ".$main::param{scan_options};
    $cmd .= " ".$main::param{scan_sites_options};
    $cmd .= " -o ".$main::outfile{$motif_id.'_pssm_sites'};
    &one_command($cmd, 1, $main::outfile{$motif_id.'_pssm_sites'}."_time.txt");

#     ## Direct scan with matrix-scan-quick
#     my $cmd = $BIN."/matrix-scan-quick -v 1";
#     $cmd .= " -i ".$infile{test_seq};
#     $cmd .= " -m ".$main::outfile{$motif_id."_tab"};
#     $cmd .= " -bgfile ".$main::param{scan_bg_file};
#     $cmd .= " ".$main::param{scan_strands};
#     $cmd .= " -origin center -return sites";
#     $cmd .= " -t ".$main::param{scan_min_score};
#     $cmd .= " >".$main::outfile{$motif_id.'_pssm_sites'};
#     &one_command($cmd, 1, $main::outfile{$motif_id.'_pssm_sites'}."_time.txt");


    ## Compute the genomic coordinates
    if (($main::infile{coord})||($main::param{seq_source})) {
      &RSAT::message::TimeWarn("Computing genomic coordinates of predicted sites") if ($main::verbose >= 2);
      $cmd = $SCRIPTS."/convert-features";
      $cmd .= " -from ft -i ".$main::outfile{$motif_id.'_pssm_sites'};
      $cmd .= " -coord ".$main::infile{coord} if ($main::infile{coord});
      $cmd .= " -coord ".$main::outfile{"test_seq_coord"} if ($main::param{seq_source});
      $cmd .= " -origin center ";
      $cmd .= " -to bed ";
      my $new_bed_header= "track name=\"".$motif_id."\" description=\"RSAT peak-motifs: ".$motif_id."\" visibility=2 itemRgb=\"On\" use_score=1 color=247,73,2";
      $cmd .= " | sed '1s/^.*/".$new_bed_header."/' ";## change first line
      $cmd .= " > ".$main::outfile{$motif_id.'_pssm_sites_genomic'};
      &one_command($cmd,1);
    }

    ################################################################
    ## Compute the positional distribution of sites
    &RSAT::message::Debug("Computing positional distribution of motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);
    $cmd = "grep -v '^;'";
    $cmd .= " ".$main::outfile{$motif_id.'_pssm_sites'};
    $cmd .= " | grep -v '^#'";
    $cmd .= " | awk '{print \(\$6\+\$5\)/2}'";
    $cmd .= " | ".$SCRIPTS."/classfreq -v 1";
    $cmd .= " -ci ".$main::param{profiles_ci};
    $cmd .= " -o ".$main::outfile{$motif_id.'_pssm_site_distrib'};
    &one_command($cmd, 1);

    ## Draw the graph of predicted site positions
    &RSAT::message::Debug("Drawing positional distribution of motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);
    $cmd = $SCRIPTS."/XYgraph";
    $cmd .= " -format ".$main::param{img_format};
    $cmd .= " -i ".$main::outfile{$motif_id.'_pssm_site_distrib'};
    $cmd .= " -lines -xcol 3 -ycol 4";
    $cmd .= " -ysize 200 -ycol 4 -yleg1 'Number of sites'";
    $cmd .= " -xsize 800 -xcol 3 -xleg1 'Sequence position relative to peak center'";
    $cmd .= " -title1 'Predicted sites : $motif_id'";
    $cmd .= " -o ".$main::outfile{$motif_id.'_pssm_site_distrib_graph'};
    &one_command($cmd, 1);

    ################################################################
    ## Analyze enrichment of input sequences for the discovered motif
    &RSAT::message::Debug("Estimating enrichment of input sequences for motif", $m."/".$motif_nb, $motif_id, $motif_dir) if ($main::verbose >= 2);
    &MotifEnrichment($infile{test_seq}, $main::outfile{$motif_id."_tf"},
		     $main::outfile{$motif_id.'_pssm_enrichment'},
		     $main::outfile{$motif_id.'_pssm_enrichment_png'});
  }
}


# ################################################################
# ## Scan peak sequences with the discovered motif
# ##
# ## BEWARE: THIS IS NOT YET WORKING, BECAUSE matrix-scan-quick ONLY
# ## USES THE FIRST MATRIX OF EACH FILE.
# sub old_ScanSequences {
#   $main::param{scan_min_score} = 7.5;
#   &RSAT::message::TimeWarn("Scanning sequences") if ($main::verbose >= 2);
# #  foreach my $seq_type (@seq_types) {
#     foreach my $pattern_type (@pattern_types) {
#       my $cmd = $BIN."/matrix-scan-quick -v 1";
#       $cmd .= " -i ".$infile{test_seq};
#       $cmd .= " -m ".$main::outfile{$pattern_type.'_pssm_counts'};
#       $cmd .= " -bgfile ".$main::param{scan_bg_file};
#       $cmd .= " ".$main::param{scan_strands};
#       $cmd .= " -origin center -return sites";
#       $cmd .= " -t ".$main::param{scan_min_score};
#       $cmd .= " >".$main::outfile{$pattern_type.'_pssm_sites'};
#       &one_command($cmd, 1, $main::outfile{$pattern_type.'_pssm_sites'}."_time.txt");

#       ## Compute the positional distribution of sites
#       $cmd = "awk '{print \(\$6\+\$5\)/2}'";
#       $cmd .= " ".$main::outfile{$pattern_type.'_pssm_sites'};
#       $cmd .= " | ".$SCRIPTS."/classfreq -v 1";
#       $cmd .= " -ci ".$main::param{profiles_ci};
#       $cmd .= " -o ".$main::outfile{$pattern_type.'_pssm_site_distrib'};
#       &one_command($cmd, 1);

#       ## Draw the graph of predicted site positions
#       $cmd = $SCRIPTS."/XYgraph";
#       $cmd .= " -format ".$main::param{img_format};
#       $cmd .= " -i ".$main::outfile{$pattern_type.'_pssm_site_distrib'};
#       $cmd .= " -lines -xcol 3 -ycol 4";
#       $cmd .= " -ysize 200 -ycol 4 -yleg1 'Number of sites'";
#       $cmd .= " -xsize 800 -xcol 3 -xleg1 'Sequence position relative to peak center'";
#       $cmd .= " -title1 'Predicted sites : $pattern_type'";
#       $cmd .= " -o ".$main::outfile{$pattern_type.'_pssm_site_distrib_graph'};
#       &one_command($cmd, 1);
#     }
# #  }
# }


################################################################
## Generate a table summarizing the time spent in the different tasks
## on the basis of the unix "time" command.
sub TimeTable {
  my $timetable = &OpenOutputFile($main::outfile{timetable});
  foreach my $key (@timetable_keys) {
    my $file = $main::outfile{$key}."_time.txt";
    my $time = "NA";
    if (-e $file) {
      my ($in) = &OpenInputFile($file);
      while (<$in>) {
	if (/real\s+(\S+)/) {
	  $time = $1;
	  if ($time =~ /^(\d+)m(\d+)\.(\d+)s$/) {
	    my $minutes = $1;
	    my $seconds = $2;
	    my $millisecs = $3;
	    $time = $minutes*60 + $seconds + $millisecs/1000;
	  }
	}
      }
    } else {
      &RSAT::message::Warning("Missing time file", $file) if ($main::verbose >= 2);
    }
    print $timetable $key, "\t", $time, "\n";
    &RSAT::message::Info("TimeTable", $key, $time) if ($main::verbose >= 4);
  }
  &RSAT::message::TimeWarn("Time table file", $main::outfile{timetable}) if ($main::verbose >= 2);
}

################################################################
## Generate a table summarizing the time spent in the different tasks
## on the basis of the time report in RSAT programs.
sub TimeLog {
  my $timelog = &OpenOutputFile($main::outfile{timelog});

  my $prefix = "NA";
  if (defined($main::param{prefix})) {
    $prefix = $main::param{prefix};
#    $prefix =~ s/_$//;
  }

  print $timelog join("\t", "#start_time       ", "done_time        ", "elapsed", "seconds", "task", "prefix", "file"), "\n";

#  foreach my $pattern_type (@pattern_types) {
#    foreach my $key ($pattern_type, $pattern_type."_2pssm") {
  foreach my $key (@timelog_keys) {
    my $file = $main::outfile{$key};
    my $start = "NA";
    my $done = "NA";
    my $elapsed = "NA";
    my $seconds = "NA";
    if (-e $file) {
      my ($in) = &OpenInputFile($file);
      while (<$in>) {
	if (/^;\s*Job started\s+(\S+)/i) {
	  $start = $1;
	} elsif (/^;\s*Job done\s+(\S+)/i) {
	  $done = $1;
	} elsif (/^;\s*Seconds\s+(\S+)/i) {
	  $seconds = $1;
	}
      }
      close $in;
    }
    if ($start =~ /(\d{4})_(\d{2})_(\d{2}).(\d{2})(\d{2})(\d{2})/) {
      my ($start_year, $start_month, $start_day, $start_hour, $start_min, $start_sec) =
	($1, $2, $3, $4, $5, $6);
      if ($done =~ /(\d{4})_(\d{2})_(\d{2}).(\d{2})(\d{2})(\d{2})/) {
	my ($done_year, $done_month, $done_day, $done_hour, $done_min, $done_sec) =
	  ($1, $2, $3, $4, $5, $6);
	if ($done_month > $start_month) {
	  $done_day = $start_day +1; ## Quick and tricky treatment of
	  ## month-overlapping tasks
	}
	$elapsed = ($done_sec - $start_sec)
	  + ($done_min - $start_min)*60
	    + ($done_hour - $start_hour)*3600
	      + ($done_day - $start_day)*86400;
      }
    }
    print $timelog join("\t", $start, $done, $elapsed, $seconds, $key, $prefix, $file), "\n";
  }
  #}
  close $timelog;
  &RSAT::message::TimeWarn("Time log file", $main::outfile{timelog}) if ($main::verbose >= 2);

  ## Create a HMTL version of the timelog table
  my $cmd = $SCRIPTS."/text-to-html";
  $cmd .= " -font variable";
  $cmd .= " -i ".$main::outfile{timelog};
  $cmd .= " -o ".$main::outfile{timelog_html};
  &one_command($cmd, 1);
  &RSAT::message::TimeWarn("Time log html", $main::outfile{timelog_html}) if ($main::verbose >= 2);
}



################################################################
## Generate a HTML table of content of the results with links to the
## different parts
sub ResultTOC {
  my $result_toc = "";

  $result_toc .=  "<div id='Menu'>\n";
  $result_toc .=  "<h3>Results</h3>\n";

  if ((-e $main::outfile{archive}) || ($task{archive})) {
    my $archive_short = &RSAT::util::ShortFileName($main::outfile{archive});
    my $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{archive});
    $result_toc .=  "<span class='menulink'>";
    $result_toc .=  "[<a href='".$link."'> Download all results (".$archive_short.")</a> ]</span> \n";
    $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{motifs_discovered});
    $result_toc .=  "<span class='menulink'>";
    $result_toc .=  "[<a href='".$link."'> Download all matrices (transfac format)</a> ]</p></span>\n";
  }

  $result_toc .=  "<a href='#seq_composition'>Sequence composition & statistics </a><br/>";
  $result_toc .=  "<a href='#ref_motifs'>Reference motifs</a><br/>" if (defined($main::infile{ref_motifs}));
  @motifs = &ReadDiscoveredMotifs();
  my $motif_nb = scalar(@motifs);

  $result_toc .=  "<a href='#motifs_by_algo'>Discovered motifs (by algorithm) - </a>".$motif_nb."<br/>";
  $result_toc .=  "<a href='#discovered_motifs'>Discovered motifs (with motif comparison) - </a>".$motif_nb."<br/>";

  $result_toc .=  " <table> ";
  $result_toc .=  " <tr> ";
  $result_toc .=  " <td> ";
  ## Display the motifs
  for my $mot (1..$motif_nb) {
    my $matrix = $motifs[$mot-1];
    my $motif_id= $matrix->get_attribute("id");
    my $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$motif_id."_tf"});
    my $consensus = $matrix->get_attribute("consensus.IUPAC");
    my $colored_consensus = &ColorConsensus($consensus, bold=>1, iupac=>$main::param{iupac_coloring});
    $result_toc .=  "<span class='menulink'><a href='#discovered_motifs_".$mot."'>Motif ".$mot."</a></span>: <a href='".$link."'>[matrix]</a> ".$colored_consensus."<br/> ";
    if ($mot == 5) {
    	$result_toc .=  " </td> ";
    	$result_toc .=  " <td> ";
    }
    if ($mot == 10) {
      if (($motif_nb-$mot) > 0) {
	$result_toc .=  "<a href='#discovered_motifs_".$mot."'>".($motif_nb-$mot)." more motifs</a>";
      }
      last;
       }
 }
 $result_toc .=  " </td> ";
  $result_toc .=  " </tr></table> ";

  $result_toc .=  "<a href='#motif_compa'>Motif comparisons</a><br/>";
  $result_toc .=  "<a href='#logs'>Logs & Commands</a><br/>";
  $result_toc .=  "</div>";


  return($result_toc);
}

################################################################
## Colorize a consensus with same colors as sequence logos
sub ColorConsensus {
  my ($consensus, %args) = @_;
  my $color_consensus = "";
  my %color = (
	       ## Nucleotides
	       A=>'#00CC00',
	       C=>'#0000DD',
	       G=>'#FFBB00',
	       T=>'#DD0000',

	       other=>'#888888',
	      );


  if ($args{iupac}) {
    ## Two-nucleotide IUPAC
#    $color{K} = '#FF6600';
#    $color{W} = '#BB8800';
#    $color{R} = '#88BB00';
#    $color{S} = '#6688DD';
#    $color{Y} = '#880088';

    $color{K} = '#CC5500';
    $color{W} = '#884400';
    $color{R} = '#666600';
    $color{S} = '#4466BB';
    $color{Y} = '#660066';
  }

  ## Case-insensitive coloring
  foreach my $letter (keys %color) {
    $color{lc($letter)} = $color{$letter};
  }

  $color_consensus = "<b>"if ($args{bold});
  foreach my $l (0..length($consensus)) {
    my $letter = substr($consensus, $l, 1);
    my $color;
    if (defined($color{$letter})) {
      $color = $color{$letter};
    } else {
      $color = $color{other};
    }
    $color_consensus .= "<span style='font-family: courier, sans-serif; font-weight:bold ; font-size: 14px; color:".$color."'>";
    $color_consensus .= $letter;
    $color_consensus .= "</span>";
  }
  $color_consensus .= "</b>"if ($args{bold});
  return $color_consensus;
}


################################################################
## Generate a HTML table with the first matches between one motif and a set of reference motifs or a database.
sub SynthesisOneMotifComparison {
  ## Read the DB matches for the current motif
  my ($db_name, $file,$colored_consensus) = @_;
  my @match_fields = qw(name2 id2 strand w Wr cor Ncor consensus1 consensus2);
  my @match_fields_names = ("name" ,"id" ,"strand", "Nb overlap", "%", "Pearson", "Normalized");
  my @match_fields_names2 = ("&nbsp;" ,"&nbsp;", "&nbsp;", "columns" ,"aligned" ,"correlation", " cor");
  my ($db_compa) = &OpenInputFile($file);
  my %col_index = ();

  ################################################################
  ## Print the header of the DB match table
  my $db_match_table ="<table class='whitebg'>\n";
  $db_match_table .= "<tr style='font-size:90%;'>\n";
  foreach my $field (@match_fields_names) {
    $db_match_table .= "<th style='border-bottom-style:none;'>".$field."</th>\n";
  }
  $db_match_table .= "<th style='border-bottom-style:none;'>aligned col. motif</th>\n";
  $db_match_table .= "<th style='border-bottom-style:none;'>aligned col. match</th>\n";
  $db_match_table .= "</tr>\n";
    $db_match_table .= "<tr style='font-size:90%;'>\n";
  foreach my $field (@match_fields_names2) {
    $db_match_table .= "<th>".$field."</th>\n";
  }
  $db_match_table .= "<th>".$colored_consensus."</th>\n";
  $db_match_table .= "<th></th>\n";
  $db_match_table .= "</tr>\n";

  my $match_nb = 0;
  my $max_match_nb = 3;	## Max number of matches to display in the synthetic table.

  ## Read the comparison file
  while (<$db_compa>) {
    chomp();
    next unless (/\S/); ## Skip empty lines
    next if (/^;/); ## Skip comment lines

    ## Parse the column contents
    if (/^#/) {
      s/^#//;
      my @header_fields = split("\t");
      for my $h (0..$#header_fields) {
	my $field = &RSAT::util::trim($header_fields[$h]);
	$col_index{$field} = $h;
      }
      next;
    }

    ################################################################
    ## Only report the three first matches
    $match_nb++;
    next if ($match_nb > $max_match_nb);
    ## Add a row to the DB match table
    $db_match_table .= "<tr>\n";
    my @fields = split("\t");
    #&RSAT::message::Debug("DB matches", join ("; ", @fields)) if ($main::verbose >= 10);
    my @values = ();
    foreach my $field (@match_fields) {
      my $value = "NA";
      if (defined($col_index{$field})) {
	my $column = $col_index{$field};
	$value = $fields[$column];
	if ($field =~ /^consensus/) {
	  $value = &ColorConsensus($value, bold=>1, iupac=>$main::param{iupac_coloring});
	}
      }
      $db_match_table .= "<td>".$value."</td>\n";
      #&RSAT::message::Debug("DB matches", $db_name, $match_nb, $field, "col=".$column, "value=".$value, $file) if ($main::verbose >= 10);
    }
    $db_match_table .= "</tr>\n";
  }

  ## Report number of matches
  my $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $file);
  $db_match_table .= "<tr>\n";
  $db_match_table .= "<td colspan=9><a href='".$link."'>Total maches= ".$match_nb."</a>";
  if ($match_nb > $max_match_nb) {
    my $more_matches = $match_nb -$max_match_nb;
    $db_match_table .= " (".$more_matches." more)";
  }
  $db_match_table .= "</td>\n";
  $db_match_table .= "</tr>\n";
  $db_match_table .="</table>\n";
  return ($db_match_table, $match_nb);
}


################################################################
## Generate a HTML table summarizing the results for each discovered
## motif. this table is inserted in the synthetic report.
sub SynthesisByMotif {
  print $syn &open_menu_heading($menu_nb++, "<a name='discovered_motifs'></a><h3>Discovered motifs (with motif comparison)</h3>\n", 0);
  print $syn "<table class='whitebg'>\n";

  ## Header line
  &SyntheticTableAddHeaderRow("Motif discovery");


  &ReadMotifTable();

  @motifs = &ReadDiscoveredMotifs();

#   ## Load background model for computing the consensus and logo of each motif
#   &RSAT::message::Info("Loading background model fril file", $main::param{scan_bg_file}) if ($main::verbose >= 3);
#   local $bg_model = new RSAT::MarkovModel();

#   if (-e $main::param{scan_bg_file}) {
#     $bg_model->load_from_file($main::param{scan_bg_file}, "motifsampler");
#   } else {
#     &RSAT::message::Warning("Cannot generate synthesis by motifs because the background model has not been computed yet")
#       if ($main::verbose >= 1);
#     return;
#   }

  ## Options for generating logos
  my $logo_opt = "";


  ## Treat each motif
  my $motif_nb = scalar(@motifs);
  for my $m (1..$motif_nb) {
    my $matrix = $motifs[$m-1];
    my $motif_id = $matrix->get_attribute("id");
    &RSAT::message::TimeWarn("Synthesis for discovered motif", $m."/".$motif_nb, $motif_id) if ($main::verbose >= 3);
#    $matrix->setMarkovModel($bg_model);

    ## We compute the consensus and logos here with the same background model as used for sequence scanning
#    $matrix->calcConsensus();
    my $consensus = $matrix->get_attribute("consensus.IUPAC");
    my $colored_consensus = &ColorConsensus($consensus, bold=>1, iupac=>$main::param{iupac_coloring});
    my $consensus_rc = $matrix->get_attribute("consensus.IUPAC.rc");
    my $colored_consensus_rc = &ColorConsensus($consensus_rc,bold=>1, iupac=>$main::param{iupac_coloring});

    ## Compute matrix logos
    $matrix->makeLogo($main::prefix{$motif_id."_logo"},$main::param{img_format},$logo_opt, 0); ## Generate the logo image
    $matrix->makeLogo($main::prefix{$motif_id."_logo_rc"},$main::param{img_format},$logo_opt, 1); ## Generate the logo image

    print $syn "<tr>\n";
    print $syn "<td><a name='discovered_motifs_".$m."'></a><b> Motif ", $m,"</b> \n";
    print $syn "&nbsp;"x2, $matrix->get_attribute("name"),"</td>\n";
	print $syn "<td><table align='center' ><tr>";
    ## Sequence logo
    my $file = $main::outfile{$motif_id."_logo"};
    if (-e $file) {
      my $logo_link = &RSAT::util::RelativePath($main::outfile{synthesis}, $file);
      print $syn ("<td align='center' style='border-bottom-style:none;' >", $colored_consensus, "<br>\n",
	     "<a href='", $logo_link, "'>",
	     "<img border='".$main::param{img_border}."' height='",$main::param{logo_table_height},
	     "' src='",$logo_link,"'>","</a>","</td>\n");
    } else {
      print $syn "<td><font color='red'>No logo</font>","</td>\n";
    }

    ## Sequence logo, reverse complementary
    $file = $main::outfile{$motif_id."_logo_rc"};
    if (-e $file) {
      my $logo_link = &RSAT::util::RelativePath($main::outfile{synthesis}, $file);
      print $syn ("<td align='center' style='border-bottom-style:none;border-right-style:none;' >", $colored_consensus_rc, "<br>\n",
		 "<a href='", $logo_link, "'>",
		 "<img border='".$main::param{img_border}."' height='",$main::param{logo_table_height},
		 "' src='",$logo_link,"'>","</a>","</td>\n");
    } else {
      print $syn "<font color='red'>No logo</font>","</td>\n";
    }
    print $syn "</tr></table></td>\n";
	  &PrintFileLinks_New(
	  	"matrix", ["tab format",$main::outfile{$motif_id."_tab"},
	  					"transfac format",$main::outfile{$motif_id."_tf"}
	  					]
			 );
    print $syn "</tr>\n";

    ## Motif comparison results
    my @compa_names = ();
    my @compa_files = ();
    my @compa_prefixes = ();

    if (defined($main::infile{ref_motifs})) {
      push @compa_names, "Reference motifs";
      push @compa_files, $main::outfile{$motif_id.'_vs_ref'};
      push @compa_prefixes, $main::prefix{$motif_id.'_vs_ref_prefix'};
    }
    foreach my $db_name (@motif_databases) {
      push @compa_names, $db_name;
      push @compa_files, $main::outfile{$motif_id.'_vs_db_'.$db_name};
      push @compa_prefixes, $main::prefix{$motif_id.'_vs_db_'.$db_name.'_prefix'};
    }
    for my $f (0..$#compa_names) {
      my $db_name = $compa_names[$f];
      my $file = $compa_files[$f];
      my $prefix = $compa_prefixes[$f];
      if (-e $file) {
	my ($db_match_table, $match_nb) = &SynthesisOneMotifComparison($db_name, $file,$colored_consensus);

	## Print the top matches + link to the complete file
	print $syn "<tr><td colspan='2' style='border-bottom-style:none;'>\n";
	print $syn "<h2 class='motif'>",$db_name,"\n";
	#print $syn "<tr>\n";
	#print $syn "<td valign='top'>", $db_name, "</td>\n";
	#print $syn "<td valign='top'>", $db_match_table, "</td>\n";
	print $syn $db_match_table, "\n";
	if ($match_nb > 0) {
	  &PrintFileLinks_New(
	  	"match table", ["html",$prefix.".html",
	  					"text",$prefix.".tab"
	  					],
	  	"alignments (logos)", ["html", $prefix."_alignments_1ton.html",
	  					"text",$prefix."_alignments_1ton.tab"
	  					],
			 );
	} else {
	  print $syn "<td>No match</td>\n";
	}
	print $syn "</tr>\n";
      }  else {
	print $syn "<tr>\n";
	print $syn "<td>", $db_name, "</td>\n";
	print $syn "<td>", "<font color='red'>File not found</font>", "</td>\n";
	print $syn "</tr>\n";
      }

    }
    print $syn "</td></tr>\n";

    ## Scan results
    my $scan_file = $main::outfile{$motif_id.'_pssm_sites'};
    if (-e $scan_file) {
      my $table = "<table><tr>";
      my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$motif_id.'_pssm_site_distrib_graph'});
      $table .= "<td style='border-bottom-style:none;border-right-style:none;'>Distribution of sites<br><a  href='".$img."'><img border='".$param{img_border}."' width=300 src='".$img."'></a></td>";
      $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$motif_id.'_pssm_enrichment_png'});
      $table .= "<td style='border-bottom-style:none;border-right-style:none;'>Enrichment in binding sites<br><a  href='".$img."'><img border='".$param{img_border}."' width=200 src='".$img."'></a></td>";
      $table .= "</tr></table>";

      ## Display UCSC track
      ## If from fasta sequence: first get assembly
      my $genome_assembly ="";
      if ($main::param{seq_source}) {
      	## also store the genome assembly
      	$genome_assembly = ` grep '^>' $infile{test_seq} | head -n 1 ` ;
      	chomp($genome_assembly);
      	if ($main::param{seq_source} eq "galaxy") {
	  $genome_assembly =~ s/>([^_]+)_.*$/$1/;
	}
	&RSAT::message::Info("Assembly from fasta header", $main::param{seq_source},$genome_assembly) if ($main::verbose >= 3);
	## IF from coord file
      } elsif ($main::param{coord_assembly}) {
	$genome_assembly = $main::param{coord_assembly};
      }

      if (-e $main::outfile{$motif_id.'_pssm_sites_genomic'}) {
	## UCSC link
	my $BED_URL = $ENV{rsat_www}."/tmp/".&RSAT::util::RelativePath($main::dir{output}, $main::outfile{$motif_id.'_pssm_sites_genomic'});
	my $browser_url = "<a target='_blank' href='";
	$browser_url .= "http://genome.ucsc.edu/cgi-bin/hgTracks?";
	$browser_url .= "db=".$genome_assembly;
	$browser_url .= "&hgt.customText=".$BED_URL;
	$browser_url .= "'><img border=0 height='20' src='images_html/UCSC_icon.jpg' style='vertical-align:text-bottom;' alt='UCSC'></a>";

	&SyntheticTableAddRow_New("Predicted sites on input peaks",
				  $table,
				  "view in genome browser ", [$browser_url,,
							     ],
				  "sites", ["text",$main::outfile{$motif_id.'_pssm_sites'},
					    "BED (UCSC track)",$main::outfile{$motif_id.'_pssm_sites_genomic'}
					   ],
				  "distribution", ["text",$main::outfile{$motif_id.'_pssm_site_distrib'},
						  ],

				  "enrichment", [ "text",$main::outfile{$motif_id.'_pssm_enrichment'},
						]
				 );
      } else {			## without UCSC track
	&SyntheticTableAddRow_New("Predicted sites on input peaks",
				  $table,
				  "sites", ["text",$main::outfile{$motif_id.'_pssm_sites'},
					   ],
				  "distribution", ["text",$main::outfile{$motif_id.'_pssm_site_distrib'},
						  ],
				  "enrichment", [ "text",$main::outfile{$motif_id.'_pssm_enrichment'},
						]
				 );
      }
    }

    ## add a black line
    print $syn "<tr><td colspan=5 style='border-top-style:none;border-bottom-color:black;border-bottom-width:3px;'><pre/></td></tr>";
  }
  print $syn "</table>\n";
  print $syn "</table><p>\n";
  print $syn &close_menu_heading();
}


################################################################
## Open the HTML page for the synthesis
sub OpenSynthesis {
  ## Create a directory to store the arrow icons
  $main::dir{images_html} = $dir{output}."/images_html";
  &RSAT::util::CheckOutDir($main::dir{images_html});
  my $cmd = "rsync -ruptl ";
  $cmd .= $ENV{RSAT}."/perl-scripts/lib/images/arrow*.gif ";
  $cmd .= $ENV{RSAT}."/perl-scripts/lib/images/UCSC_icon.jpg ";
  $cmd .= " ".$main::dir{images_html};
  &one_command($cmd, 1);

  ## Open the HTML file
  $syn = &OpenOutputFile($main::outfile{synthesis});
  $synthesis_path = `dirname $main::outfile{synthesis}`;
  chomp($synthesis_path);

  print $syn &PrintHtmlResultHeader(program=>"peak-motifs", "title"=>$main::param{title}, "result_toc"=>1);
}


################################################################
## Report results of motif comparisons
sub SynthesisMotifCompa {
  print $syn &open_menu_heading($menu_nb++, "<a name='motif_compa'></a><h3>Motif comparisons</h3>", 0);
  print $syn "<table class='whitebg'>\n";


  ## Word comparisons
  &SynthesisWordCompa() if ($task{merge_words});

  ## Comparison between discovered motifs
#  &SynthesisMotifsVsMotifs();

  ## Comparison between discovered motifs and reference motif
  &SynthesisMotifsVersusReference() if (defined($main::infile{ref_motifs}));


  ## Comparison between discovered motifs and database(s)
  &SynthesisMotifsVersusDatabase() if (scalar(@motif_databases) > 0);
  print $syn "</table><p>\n";
  print $syn &close_menu_heading();
}


################################################################
## Report the list if input/output files. This has to be done at the
## end of the process, since it includes a list of discovered motifs
## is not known before having run the motif discovery algorithms.
sub SynthesisParamsAndFiles {

  ## Write the HTML file with the list of input/output files and the parameters
  &ReportParamsAndFiles();

  ## Log files
  print $syn &open_menu_heading($menu_nb++, "<a name='logs'></a><h3>Logs and parameter files</h3>", 1);
  print $syn "<table class='whitebg'>\n";
  &SyntheticTableAddHeaderRow("Log files");
  &SyntheticTableAddRow("Parameters and files",
			"",
 			"txt"=>$main::outfile{log},
 			"html"=>$main::outfile{links},
 		       );
  &SyntheticTableAddRow("Time log",
			"",
 			"time table"=>$main::outfile{timetable},
 			"time log"=>$main::outfile{timelog},
 			"html"=>$main::outfile{timelog_html},
 		       );
  ## Close the synthetic table
  print $syn "</table></p>\n";
  print $syn &close_menu_heading();

}


################################################################
## Generate a compressed archive with all the results
sub Archive {
  my ($remove_first, $to_archive) = @_;

  ## By default, archive the whole output directory
  $to_archive = $main::dir{output} unless $to_archive;

  &RSAT::message::TimeWarn("\n; Archiving data and results") if ($main::verbose >= 2);

  ## Delete previous version of the archive to avoid including the old archive in the new one
  $cmd .= "rm -f ".$main::outfile{archive}."; " if ($remove_first);

  ## Archive all data and results
  my ($archive_dir, $archive) = &SplitFileName($main::outfile{archive});
  my ($archive_dir_dir, $archive_dir_base) = &SplitFileName($archive_dir);
#  $to_archive_rel_path = &RSAT::util::RelativePath($main::outfile{archive}, $to_archive);
  my $to_archive_rel_path = &RSAT::util::RelativePath($archive_dir_dir, $to_archive);
  my $archive_rel_path = &RSAT::util::RelativePath($archive_dir_dir, $main::outfile{archive});

#   &RSAT::message::Debug("&Archive()",
# 			"\n\tto_archive", $to_archive,
# 			"\n\tarchive_dir", $archive_dir,
# 			"\n\tarchive_dir_dir", $archive_dir_dir,
# 			"\n\tto_archive_rel_path", $to_archive_rel_path,
# 			"\n\tarchive_rel_path", $archive_rel_path,
#		       ) if ($main::verbose >= 10);

  my $cmd = "";
  if ($main::param{archive_format} eq "zip") {
    $cmd .= "(cd ".$archive_dir_dir." ; ";
    $cmd .= " zip -ryq ".$archive_rel_path." ".$to_archive_rel_path;
    $cmd .= " -x ".$archive;
    $cmd .= ")";
  } elsif (($main::param{archive_format} eq "tar") ||
	   ($main::param{archive_format} eq "tgz")) {
    $cmd .= "tar -cpf ".$main::outfile{archive};
    $cmd .= " -z" if ($main::param{archive_format} eq "tgz");
    $cmd .= " -C ".$archive_dir_dir; ## Avoid including the whole path in the archive paths
    $cmd .= " --exclude ".$archive;
    $cmd .= " ".$to_archive_rel_path;
  #$cmd .= " ".$to_archive;
  } else {
    &RSAT::error::FatalError($main::param{archive_format}, "Invalid archive format. Supported: zip, tar, tgz.");
  }

#  $cmd .= "zip -q ".$archive." ".$to_archive;
  &one_command($cmd, 1);

  &RSAT::message::TimeWarn("Archive", $main::outfile{archive}) if ($main::verbose >= 2);
}


################################################################
## Generate a HTML report with summarizing the main results with
## links to the iriginal result files.
sub Synthesis {
  &RSAT::message::TimeWarn("\n; Generating HTML synthesis") if ($main::verbose >= 2);

  &OpenSynthesis() unless ($progressive_synthesis);

  ## Header of the synthetic table
  &SynthesisSequenceComposition() unless ($progressive_synthesis);

  ## Reference motifs
  &SynthesisRefMotif() unless ($progressive_synthesis);

  ## Discovered motifs (by algorithm)
  &SynthesisMotifsByAlgo() unless ($progressive_synthesis);

  ## Discovered motifs (by motif)
  &SynthesisByMotif() unless ($progressive_synthesis);

  ## Motif comparisons
  &SynthesisMotifCompa() unless ($progressive_synthesis);

  ## Parameters and files
  &SynthesisParamsAndFiles() unless ($progressive_synthesis);

  ## End of the HTML file
  print $syn &ResultTOC();
  print $syn "</body>\n";
  print $syn "</html>\n";
  close $syn;

  ## Add the HTML synthesis report to the archive
  &Archive(0, $main::outfile{synthesis}) if ($task{archive});

  &RSAT::message::TimeWarn("Synthetic report", $main::outfile{synthesis}) if ($main::verbose >= 2);
}

################################################################
## Add a row to the syhnthetic table
sub SyntheticTableAddRow {
  my ($type, $summary, @files) = @_;
  print $syn "<tr>\n";
  print $syn "<td>$type</td>\n";
  print $syn "<td>$summary</td>\n";
  &PrintFileLinks(@files);
  print $syn "</tr>\n";
}
################################################################
## Add a row to the syhnthetic table
sub SyntheticTableAddRow_New {
  my ($type, $summary, @files) = @_;
  print $syn "<tr>\n";
  print $syn "<td style='border-top: 1px solid #cbcbb4;'>$type</td>\n";
  print $syn "<td style='border-top: 1px solid #cbcbb4;'>$summary</td>\n";
  &PrintFileLinks_New(@files);
  print $syn "</tr>\n";
}

################################################################
## Taking as input a key-file list, print a HTML table cell with a
## list of links to the files
sub PrintFileLinks_New {
  my (@files) = @_;
  print $syn "<td >\n";
  my $title;
  my $key;
  my $file;

 for (my $i=0; $i<=$#files; $i+=2){
 	$title = $files[$i];
 	print $syn "[ ".$title.": \n";

	## treat each link
 	my $links = $files[$i+1];
    while ($key = shift (@$links)){
    last unless $key;
    $file = shift(@$links);
    &RSAT::message::Debug("&SyntheticTableAddRow()", $key, $file) if ($main::verbose >= 3);
    if (($file)&&(-e $file)) {
      my $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $file);
      print $syn "<a href='".$link."'>".$key."</a>\n";
    } else {
      print $syn $key."\n";
    }
  }
  print $syn " ]<br/>";
}
  print $syn "</td>\n";
}

################################################################
## Taking as input a key-file list, print a HTML table cell with a
## list of links to the files
sub PrintFileLinks {
  my (@files) = @_;
  print $syn "<td>\n";
  my $key;
  my $file;
  while ($key = shift (@files)){
    last unless $key;
    $file = shift(@files);
#    &RSAT::message::Debug("&SyntheticTableAddRow()", $key, $file) if ($main::verbose >= 5);
    if (-e $file) {
      my $link = &RSAT::util::RelativePath($main::outfile{synthesis}, $file);
      print $syn "<a href='".$link."'>[".$key."]</a><br>\n";
    } else {
      print $syn "<font color='red'>[".$key."]</font><br>\n";
    }
  }
  print $syn "</td>\n";
}

################################################################
## Add a header row to the synthetic table
sub SyntheticTableAddHeaderRow {
  my ($header) = @_;
  print $syn "<tr>\n";
  print $syn "<th colspan=3>\n";
  print $syn $header;
  print $syn "</th>\n";
  print $syn "</tr>\n";
}

################################################################
## Add sequence composition on the synthetic report
sub SynthesisSequenceComposition {
  print $syn &open_menu_heading($menu_nb++, "<a name='seq_composition'></a><h3>Sequence composition</h3>", 0);
  print $syn "<p><table class='whitebg'>\n";

  foreach my $seq_type (@seq_types) {

    ## Header line
    &SyntheticTableAddHeaderRow("Sequence composition (peak sequences)");


    my $peak_nb = "NA";
    my $mean_peak_len = "NA";
    my $min_peak_len = "NA";
    my $max_peak_len = "NA";
    my $seq_size = "NA";

    if (-e $main::outfile{$seq_type."_seqlen_distrib"}) {
      &RSAT::message::Info("Collecting sequence length statistics from file", $main::outfile{$seq_type."_seqlen_distrib"}) if ($main::verbose >= 3);

      ## Get number of peaks from sequence length distribution
      $peak_nb = `grep '; count' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($peak_nb);
      $peak_nb =~ s/.*count\s*//;

      ## Get min peak length
      $min_peak_len = `grep '; min' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($min_peak_len);
      $min_peak_len =~ s/.*min\s*//;

      ## Get mean peak length
      $mean_peak_len = `grep '; mean' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($mean_peak_len);
      $mean_peak_len =~ s/.*mean\s*//;

      ## Get max peak length
      $max_peak_len = `grep '; max' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($max_peak_len);
      $max_peak_len =~ s/.*max\s*//;

      ## Get total sequence size from sequence length distribution
      $seq_size = `grep '; sum' $main::outfile{$seq_type."_seqlen_distrib"}`;
      chomp($seq_size);
      $seq_size =~ s/.*sum\s*//;
      $seq_size = round($seq_size/1000);

      &RSAT::message::Debug("Nb of peaks=".$peak_nb,
			    "sequence size=".$seq_size) if ($main::verbose >= 5);
    } else {
      &RSAT::message::Warning("Sequence length file does not exist. Cannot collect statistics for the synthesis.\n", $main::outfile{$seq_type."_seqlen_distrib"});
    }

    ## Sequence lengths
    my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$seq_type."_seqlen_distrib_graph"});
    my $seq_stats = "<u>Nb of peaks:</u> ".$peak_nb;
    $seq_stats .= "<br>\n<u>Total seq. size:</u> ".$seq_size." kb";
    $seq_stats .= "<br>\n<u>Min length:</u> ".$min_peak_len." bp";
    $seq_stats .= "<br>\n<u>Mean length:</u> ".$mean_peak_len." bp";
    $seq_stats .= "<br>\n<u>Max length:</u> ".$max_peak_len." bp";

    ## Display peaks UCSC track.
    ## If from fasta sequence: first get assembly.
    my $genome_assembly ="";
    if ($main::param{seq_source}) {
      ## also store the genome assembly
      $genome_assembly = ` grep '^>' $infile{test_seq} | head -n 1 ` ;
      chomp($genome_assembly);
      if ($main::param{seq_source} eq "galaxy") {
	$genome_assembly =~ s/>([^_]+)_.*$/$1/;
      }
      &RSAT::message::Info("Assembly from fasta header", $main::param{seq_source},$genome_assembly) if ($main::verbose >= 3);
      ## IF from coord file
    } elsif ($main::param{coord_assembly}) {
      $genome_assembly = $main::param{coord_assembly};
    }

    if ((($main::infile{coord})&&(-e $main::infile{coord}))
      	||(($main::outfile{"test_seq_coord"})&&(-e $main::outfile{"test_seq_coord"}))) {
      ## UCSC link
      my $BED_URL = $ENV{rsat_www}."/tmp/";
      $BED_URL .= &RSAT::util::RelativePath($main::dir{output}, $main::infile{coord}) if ($main::infile{coord});
      $BED_URL .= &RSAT::util::RelativePath($main::dir{output}, $main::outfile{"test_seq_coord"}) if ($main::outfile{"test_seq_coord"});
      my $browser_url = "<a target='_blank' href='";
      $browser_url .= "http://genome.ucsc.edu/cgi-bin/hgTracks?";
      $browser_url .= "db=".$genome_assembly;
      $browser_url .= "&hgt.customText=".$BED_URL;
      $browser_url .= "'><img border=0 height='20' src='images_html/UCSC_icon.jpg' style='vertical-align:text-bottom;' alt='UCSC'></a>";

      my $BED = "";
      $BED = $main::infile{coord} if ($main::infile{coord});
      $BED = $main::outfile{"test_seq_coord"} if ($main::outfile{"test_seq_coord"});
      &SyntheticTableAddRow_New($seq_stats,
				"<a  href='".$img."'><img border='".$param{img_border}."' height='150' src='".$img."'></a>",
				"view peaks in genome browser ", [$browser_url,,
								 ],
				"coordinates", ["UCSC BED track",$BED,
					       ],
				"seq",["converted",$main::outfile{$seq_type."_converted"},
				       "purged" , $main::outfile{$seq_type."_purged"}
				      ],
				"lengths",['list', $main::outfile{$seq_type."_seqlen"},
					   'distrib',$main::outfile{$seq_type."_seqlen_distrib"},
					   'graph',$main::outfile{$seq_type."_seqlen_distrib_graph"}
					  ]
			       );
    } else {			## without UCSC track
      &SyntheticTableAddRow_New($seq_stats,
				"<a  href='".$img."'><img border='".$param{img_border}."' height='150' src='".$img."'></a>",
				"seq",["converted",$main::outfile{$seq_type."_converted"},
				       "purged" , $main::outfile{$seq_type."_purged"}
				      ],
				"lengths",['list', $main::outfile{$seq_type."_seqlen"},
					   'distrib',$main::outfile{$seq_type."_seqlen_distrib"},
					   'graph',$main::outfile{$seq_type."_seqlen_distrib_graph"}
					  ]
			       );

    }

    ## Residue composition
    for my $ol (@profiles_oligo_lengths) {
      # my $table = "<table class='whitebg'><tr>";
      my $table = "<table><tr>";
      my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$ol."nt_".$seq_type."_heatmap"});
      $table .= "<td style='border-bottom-style:none;border-right-style:none;'>Transition frequencies<br><a  href='".$img."'><img border='".$param{img_border}."' width=200 src='".$img."'></a></td>";
      $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{$ol."nt_".$seq_type."_profiles_graph"});
      $table .= "<td style='border-bottom-style:none;border-right-style:none;'>Position profile<br><a  href='".$img."'><img border='".$param{img_border}."' height='150' src='".$img."'></a></td>";
      $table .= "</tr></table>";
      &SyntheticTableAddRow_New($ol."nt composition",
				$table,
				$ol."nt", ["freq",$main::outfile{$ol."nt_".$seq_type."_freq"},
					   "transitions",$main::outfile{$ol."nt_".$seq_type."_transitions"}
					  ],
			   	"bg model", [ "Inclusive format",$main::outfile{$ol."nt_".$seq_type."_inclusive"}
					    ],
				"profile", [ "table",$main::outfile{$ol."nt_".$seq_type."_profiles"},
					     "html(individual)",$main::outfile{$ol."nt_".$seq_type."_profiles_index"}
					   ]
			       );
    }
  }
  print $syn "</table><p>\n";
  print $syn &close_menu_heading();
}

################################################################
## Add the reference motif to the report
sub SynthesisRefMotif {
  if (defined($main::infile{ref_motifs})) {
    print $syn &open_menu_heading($menu_nb++, "<a name='ref_motifs'></a><h3>Reference motifs</h3>", 0);
    print $syn "<p><table class='whitebg'>\n";
    &SyntheticTableAddHeaderRow("Reference motif(s)");

    my $logo_table = "<table class='whitebg' align='center' style='border-style:none;'>";

    ## get the names of all reference logo files
    my @logo_files = glob($main::outfile{ref_motifs_logo}."_m*.".$main::param{img_format});
    @logo_files = grep {!/_rc\./} @logo_files;
    #    my @logo_files = glob($main::outfile{ref_motifs_logo}."_m\d+.".$main::param{img_format});

    ## Prepare a table with the logos
    for my $i (1..scalar(@logo_files)) {
      $logo_table .= "<tr>\n";
      $logo_table .= "<td style='border-right-style:none;>Ref motif ".$i."</td>\n";
      my $logo_file = $main::outfile{ref_motifs_logo}."_m".$i.".".$main::param{img_format};
      my $logo_file_rc = $main::outfile{ref_motifs_logo}."_m".$i."_rc.".$main::param{img_format};
      if (-e $logo_file) {
	my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $logo_file);
	$logo_table .= "<td align='right'><a  href='".$img."'><img border='".$param{img_border}."' height='".$main::param{logo_table_height}."' src='".$img."'></a></td>\n";
      } elsif ($main::verbose >= 2) {
	&RSAT::message::Warning("Cannot find reference logo", $logo_file);
      }
      if (-e $logo_file_rc) {
	my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $logo_file_rc);
	$logo_table .= "<td align='left' style='border-right-style:none;'><a  href='".$img."'><img border='".$param{img_border}."' height='".$main::param{logo_table_height}."' src='".$img."'></a></td>\n";
      } elsif ($main::verbose >= 2) {
	&RSAT::message::Warning("Cannot find reverse complementary logo for the reference motif", $logo_file_rc);
      }
      $logo_table .= "</tr>\n";
    }
    $logo_table .= "</table>\n";

    &SyntheticTableAddRow("Reference motif",
			  $logo_table,
			  #			  $main::param{ref_motifs_format}, &RSAT::util::RelativePath($main::outfile{synthesis}, $main::infile{ref_motifs}),
			  "transfac format",$main::outfile{ref_motifs_transfac},
			  "tab format",$main::outfile{ref_motifs_tab},
			 );
  }
  print $syn "</table><p>\n";
  print $syn &close_menu_heading();
}


################################################################
## Report discovered motifs by algorithms
sub SynthesisMotifsByAlgo {
  &RSAT::message::TimeWarn("Synthesis per motif discovery algorithm") if ($main::verbose >= 2);
  print $syn &open_menu_heading($menu_nb++, "<a name='motifs_by_algo'></a><h3>Discovered motifs (by algorithm)</h3>", 1);
  print $syn "<table class='whitebg'>\n";
  &SyntheticTableAddHeaderRow("motif discovery");
  foreach my $pattern_type (@pattern_types) {
    &SynthesisPatternDisco($pattern_type);
  }
  print $syn "</table><p>\n";
  print $syn &close_menu_heading();
}

################################################################
## Synthesis for one motif discovery algorithm
## Usage:
##   &SynthesisPatternDisco($pattern_type);
## Where pattern type can be oligos, dyads, local_words, oligo-diff
sub SynthesisPatternDisco {
  my ($pattern_type) = @_;
  &RSAT::message::TimeWarn("Synthesis for pattern type", $pattern_type) if ($main::verbose >= 3);

  if (-e $main::outfile{$pattern_type.'_asmb'}) {
    ## Synthesize results of pattern assembly (assembly consensus + sig scores)
    my ($asmb) = &OpenInputFile($main::outfile{$pattern_type.'_asmb'});
    my $pattern_nb = 0;
    my $asmb_or_isol = "";
    my %pattern_info = ();
    while (<$asmb>) {
      next if /^#/;		## Skip header line
      next unless /\S/;		## Skip empty lines
      if (/assembly\s+\#\s+(\d+)/i) {
	## Assembly number
	$pattern_nb = $1;
	$asmb_or_isol = "asmb";
	next;
      } elsif (/Isolated patterns/) {
	## Isolated patterns at the end of the assembly file
	$asmb_or_isol = "isol";
	next;
      }
      next if (/^;/);		## Skip comments
      if ($asmb_or_isol eq "isol") {
	$pattern_nb++;
      } elsif (!/consensus/) {
	next;
      }
      my ($word, $rc_word, $score) = split "\t";
      $pattern_info[$pattern_nb]->{type} = $asmb_or_isol;
      $pattern_info[$pattern_nb]->{word} = $word;
      $pattern_info[$pattern_nb]->{rc_word} = $rc_word;
      $pattern_info[$pattern_nb]->{score} = $score;
    }
    close $asmb;

    ## Synthesize matrix logos
    my $pattern_table = "<table class='whitebg' align='center' style='border-style:none;'>";

    ################################################################
    ## THERE SEEMS TO BE A BUG HERE: SOMETIMES NOT DEFINED: TO CHECK
    ################################################################
#    foreach my $logo_nb (0..$#pattern_info) {
#    foreach my $logo_nb (1..$#pattern_info) {
    foreach my $logo_nb (1..$main::param{matrix_nmotifs}) {
      $pattern_table .= "<tr>\n";
      my $logo_file = $main::outfile{$pattern_type.'_pssm_logo'.$logo_nb};
      my $logo_file_rc = $main::outfile{$pattern_type.'_pssm_logo_rc'.$logo_nb};
      my $start_format="";
      my $end_format="";
      my $score = $pattern_info[$logo_nb]->{score};
      if (&IsReal($score)) {
	if ($score >10) {
	  $start_format .= '<b>';
	  $end_format .= '</b>';
	  if ($score >= 75) {
	    $start_format .= "<font color='red'>";
	    $end_format = "</font>".$end_format;
	  }
	}
      }
      &RSAT::message::Debug("Logo", $logo_nb, $logo_file) if ($main::verbose >= 5);
      if (-e $logo_file) {
	$pattern_table .= "<td style='border-right-style:none;'>".$pattern_type."_m".$logo_nb."</td>";
	my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $logo_file);
	$pattern_table .= "<td align='right'>";
	$pattern_table .= $start_format;
	$pattern_table .= "$asmb_or_isol";
	################################################################
	## THERE SEEMS TO BE A BUG HERE: SOMETIMES NOT DEFINED: TO CHECK
	################################################################
	$pattern_table .= " (sig=".$pattern_info[$logo_nb]->{score}.")";
	$pattern_table .= "&nbsp;"x5;
	################################################################
	## THERE SEEMS TO BE A BUG HERE: SOMETIMES NOT DEFINED: TO CHECK
	################################################################
	$pattern_table .= $pattern_info[$logo_nb]->{word};
	$pattern_table .= "<br><a  href='".$img."'><img  border='".$param{img_border}."' height='".$main::param{logo_table_height}."' src='".$img."'></a>";
	$pattern_table .= $start_format;
	$pattern_table .= "</td>\n";
      } elsif ($main::verbose >= 2) {
	&RSAT::message::Warning("Cannot find logo", $logo_file) if ($main::verbose >= 3);
      }
      if (-e $logo_file_rc) {
	my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $logo_file_rc);
	$pattern_table .= "<td align='left' style='border-right-style:none;'>";
	$pattern_table .= $start_format;
	################################################################
	## THERE SEEMS TO BE A BUG HERE: SOMETIMES NOT DEFINED: TO CHECK
	################################################################
	$pattern_table .= $pattern_info[$logo_nb]->{rc_word};
	$pattern_table .= "<br><a  href='".$img."'><img border='".$param{img_border}."' height='".$main::param{logo_table_height}."' src='".$img."'></a>";
	$pattern_table .= "</td>\n";
      } elsif ($main::verbose >= 2) {
	&RSAT::message::Warning("Cannot find reverse complementary logo", $logo_file_rc) if ($main::verbose >= 3);
      }
      #    $pattern_table .= "<td align='right'>".$pattern_info[$logo_nb]->{score}."</td>";
      $pattern_table .= "</tr>\n";
    }
    $pattern_table .= "</table>\n";

#    &SyntheticTableAddRow($pattern_type,
#			  $pattern_table,
#			  $pattern_type,$main::outfile{$pattern_type},
#			  "assembly",$main::outfile{$pattern_type.'_asmb'},
#			  "sig matrices",$main::outfile{$pattern_type.'_pssm_sig'},
#			  "matrices - tab format",$main::outfile{$pattern_type.'_pssm_counts'},
#			  "matrices - transfac format",$main::outfile{$pattern_type.'_pssm_tf'},
##			  "sites",$main::outfile{$pattern_type.'_pssm_sites'},
##			  "site distrib",$main::outfile{$pattern_type.'_pssm_site_distrib'},
##			  "site distrib graph",$main::outfile{$pattern_type.'_pssm_site_distrib_graph'},
#			 );
	   &SyntheticTableAddRow_New($pattern_type,
			  $pattern_table,
			  "discovered words",["text",$main::outfile{$pattern_type},
			  			],
			  "assembly",['text', $main::outfile{$pattern_type.'_asmb'},
    					'sig matrices',$main::outfile{$pattern_type.'_pssm_sig'},
			 			 ],
			 "matrices",['tab format', $main::outfile{$pattern_type.'_pssm_counts'},
    					'transfac format',$main::outfile{$pattern_type.'_pssm_tf'},
			 			 ],
			 );

  } else {
    &SyntheticTableAddRow($pattern_type,
			  "<font color='red'red>Missing files</font>",
			  $pattern_type,$main::outfile{$pattern_type},
			  "assembly",$main::outfile{$pattern_type.'_asmb'},
			  "sig matrix",$main::outfile{$pattern_type.'_pssm_sig'},
			  "matrices",$main::outfile{$pattern_type.'_pssm_counts'},
#			  "sites",$main::outfile{$pattern_type.'_pssm_sites'},
#			  "site distrib",$main::outfile{$pattern_type.'_pssm_site_distrib'},
#			  "site distrib graph",$main::outfile{$pattern_type.'_pssm_site_distrib_graph'},
			 );
  }
}

################################################################
## Synthesis of word comparisons
sub SynthesisWordCompa {
  my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{merged_words_heatmap});

  &SyntheticTableAddRow("Word comparisons",
			"<a  href='".$img."'><img border='".$param{img_border}."' height=150 src='".$img."'></a>",
			"tab",$main::outfile{merged_words},
			"html",$main::outfile{merged_words_html},
			"heatmap",$main::outfile{merged_words_heatmap},
		       );
}

################################################################
## Synthesis of motif comparisons
sub SynthesisMotifsVsMotifs {
  my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{motifs_disco_compa_png});
  &SyntheticTableAddRow("Motif comparisons",
			"<a  href='".$img."'><img border='".$param{img_border}."' height=300 src='".$img."'></a>",
			"motifs", $main::outfile{motifs_discovered},
			"table (txt)",$main::outfile{motifs_disco_compa},
			"table (html)",$main::outfile{motifs_disco_compa_html},
			"gml (for CytoScape)",$main::outfile{motifs_disco_compa_gml},
			"png (low resolution)",$main::outfile{motifs_disco_compa_png},
			"MCL clusters" ,$main::outfile{motifs_disco_clusters_tab},
			"intra-cluster degree", $main::outfile{motifs_disco_compa_cluster_intra_degree},
			"MCL cluster graph (tab)", $main::outfile{motifs_disco_clusters_graph},
			"MCL cluster graph (gml)", $main::outfile{motifs_disco_clusters_graph_gml},
			"MCL cluster graph (png)", $main::outfile{motifs_disco_clusters_graph_png},
		       );
}

################################################################
## Synthesis of comparisons betwween discovered motifs and reference motif
sub SynthesisMotifsVersusReference {
  &SyntheticTableAddHeaderRow("Discovered motifs versus reference motif");
  my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{"motifs_vs_ref_png"});

  &SyntheticTableAddRow_New("Motifs versus reference motif",
			    "<a  href='".$img."'><img border='".$param{img_border}."' height=300 src='".$img."'></a>",
			    "Reference motif",["matrix",$main::infile{ref_motifs}],
			    "table", ["html",$main::outfile{"motifs_vs_ref_html"},
				      "txt",$main::outfile{"motifs_vs_ref"}
				     ],
			    "alignments (logo)", [ "html",$main::outfile{"motifs_vs_ref_alignments_1ton_html"},
						   "text",$main::outfile{"motifs_vs_ref_alignments_1ton"}
						 ],

			    "graph", [ "gml (for CytoScape)",$main::outfile{"motifs_vs_ref_gml"},
				       "png (low resolution)",$main::outfile{"motifs_vs_ref_png"}
				     ]
			   );
}

################################################################
## Synthesis of comparisons betwween discovered motifs and motif databases
sub SynthesisMotifsVersusDatabase {
  &SyntheticTableAddHeaderRow("Discovered motifs versus transcription factor databases");
  foreach my $db_name (@motif_databases) {
    my $img = &RSAT::util::RelativePath($main::outfile{synthesis}, $main::outfile{"motifs_vs_db_".$db_name."_png"});
    &SyntheticTableAddRow_New("Motifs versus ".$db_name." database",
			      "<a  href='".$img."'><img border='".$param{img_border}."' height=300 src='".$img."'></a>",
			      "table", ["html",$main::outfile{"motifs_vs_db_".$db_name."_html"},
					"txt",$main::outfile{"motifs_vs_db_".$db_name}
				       ],
			      "alignments (logo)", [ "html",$main::outfile{"motifs_vs_db_".$db_name."_alignments_1ton_html"},
						     "text",$main::outfile{"motifs_vs_db_".$db_name."_alignments_1ton"}
						   ],

			      "graph", [ "gml (for CytoScape)",$main::outfile{"motifs_vs_db_".$db_name."_gml"},
					 "png (low resolution)",$main::outfile{"motifs_vs_db_".$db_name."_png"}
				       ]
			     );
  }
}

################################################################
## Delete purged sequence files after analysis has been completed.
sub CleanSequences {
  foreach my $seq_type (@seq_types) {
    &RSAT::message::TimeWarn("Cleaning sequences", $seq_type) if ($main::verbose >= 2);
    &one_command("rm -f ".$main::outfile{$seq_type."_purged"}, 1);
  }
}


################################################################
## Read arguments
sub ReadArguments {
  &RSAT::message::TimeWarn("Reading arguments") if ($main::verbose >= 2);
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);
    ## Verbosity

=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
	$main::verbose = shift(@arguments);
      } else {
	$main::verbose = 1;
      }

=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();

=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();

=pod

=item B<-i test_seq_file>

Test peak sequence file (mandatory).

For single-set analysis, this file contains the peak sequences of the
unique set.  For test versus control analysis, it contains the test
sequences.

=cut
    } elsif ($arg eq "-i") {
      $main::infile{test_seq} = shift(@arguments);

=pod

=item B<-source sequence_source>

Enter the source of the fasta sequence file.

Supported source: galaxy

When the sequence file comes from Galaxy, peak coordinates embedded in
the fasta headers are extracted and used to convert predicted site
coordinates (relative to peak center) to genomic coordinates (in the
form of a bed file), which can then be uploaded to the UCSC genome
browser as an annotation track.

This option is incompatible with -coord.

=cut
    } elsif ($arg eq "-source") {
      $main::param{seq_source} = shift(@arguments);
      &RSAT::message::Warning($main::param{seq_source}, "invalid sequence source, will be ignored. Supported: ".$supported_seq_sources)
	unless ($supported_seq_source{$main::param{seq_source}});

=pod

=item B<-coord assembly peak_coordinate_file>

Specify a file in bed format indicating the coordinates of each peak .
The name of the file must be after the assembly corresponding to this
file.  Assembly are encoded in the form of uscsc (eg. mm9, hg19 ...)

Example: -coord hg19 path/to/bed/file.bed

When a coordinate file is provided, predicted site coordinates
(relative to peak center) are also converted to genomic coordinates
(in the form of a bed file), which can be uploaded to the UCSC genome
browser as an annotation track.

The 4th column of the BED file must correspond to the fasta
headers. See the documentation of the UCSC Genome Browser for the
specification of the bed format.

=cut
    } elsif ($arg eq "-coord") {
      $main::param{coord_assembly} = shift(@arguments);
      $main::infile{coord} = shift(@arguments);
       &RSAT::error::FatalError($main::param{coord_assembly}, $main::infile{coord}, "are not valid values for -coord option. Should be -coord assembly path/to/bed/file.bed")
	if ($main::infile{coord} =~ /^-/);


=pod

=item B<-ctrl control_seq_file>

Control peak sequence file (optional).

The control sequence file is used:
- as control sequence for oligo-diff
- to estimate the background models for oligo-analysis and
  dyad-analysis.

Control sequences are supposed to contain a large number of sequences
without particular enrichment for any motif. The choice of appropriate
background sequences is crucial to detect relevant motifs.

The file should be sufficiently large (several Mb) to provide a robust
estimate of prior probabilities (frequencies expected at random) for
oligonucleotides and dyads.

Typical examples of control sequences:

- random fragments of the genome of interest
  (e.g. obtained with I<random-genome-fragments>)

- sets of sequences pulled down in a mock experiment (without the
  antibody) and characterized by ChIP-seq or ChIP-chip.

- sets of peaks for a compendium of transcription factors different
  from the factor of interest.

=cut
    } elsif ($arg eq "-ctrl") {
      $main::infile{ctrl_seq} = shift(@arguments);


=pod

=item B<-max_seq_len msl>

Maximal sequence length. larger sequences are truncated at the
specified length around the sequence center (from -msl/2 to +msl/2).

=cut
    } elsif ($arg eq "-max_seq_len") {
      my $max_seq_len = shift(@arguments);
      &RSAT::error::FatalError($max_seq_len, "is not a valid value for max sequence length. Should be a Natural number.")
	unless ((&IsNatural($max_seq_len)));
      if ($max_seq_len > 0) {
	$main::param{max_seq_len} = $max_seq_len;
      } else {
	&RSAT::message::Info("Max seq len = 0 interpreted as no limit") if ($main::verbose >= 2);
      }

=pod

=item B<-top_peaks N>

Restrict the analysis to the N peaks at the top of the input sequence
file. Some peak calling programs return the peaks sorted by score. In
such case, the -top_peaks option allows to restrict the analysis to
the highest scoring peaks. In some cases, the top-scoring peaks might
contain a higher density of binding sites, allowing to detect motifs
with a higher significance.

This option can also be convenient for performing quick tests,
parameter selection and debugging before running the full analysis of
large sequence sets.

=cut
    } elsif ($arg eq "-top_peaks") {
      my $top_peaks = shift(@arguments);
      &RSAT::error::FatalError($top_peaks, "is not a valid value for max sequence length. Should be a Natural number.")
	unless ((&IsNatural($top_peaks)));
      if ($top_peaks > 0) {
	$main::param{top_peaks} = $top_peaks;
      } else {
	&RSAT::message::Info("Top peak number = 0 interpreted as no limit") if ($main::verbose >= 2);
      }

=pod

=item B<-ref_motifs reference_motif>

Reference motif (optional).

In some cases, we already dispose of a reference motif, for example
the motif annotated in some transcription factor database
(e.g. RegulonDB, Jaspar, TRANSFAC)
 for the transcription factor of
interest. These annotations may come from low-throughput experiments,
and rely on a poor number of sites, but the reference motif may
nevertheless be informative, because it is based on several
independent studies.

Each discovered motif can be compared to the reference motif, in order
to evaluate its correspondence with the binding motif of the factor of
interest.

Reference motifs should be provided in TRANSFAC format (see
I<convert-matrix> for interconversions between matrix formats).

=cut
    } elsif ($arg eq "-ref_motifs") {
      $main::infile{ref_motifs} = shift(@arguments);


=pod

=item B<-motif_db db_name db_format db_file>

File containinf a database of transcription factor binding motifs
(e.g. JASPAR, TRANSFAC, RegulonDB, ...) which will be compared to the
discovered motifs (task motifs_vs_db).

The option requires three arguments:

 - DB name

 - matrix format. same supported formats as convert-matrices, but we
   recommend to use a format that includes an ID and a name for each
   motif (e.g. TRANSFAC)

 - file containing the DB motifs

The option can be called iteratively on the same command line in order
to compare discovered motifs with several databases.

Examples:

 -motif_db TRANSFAC transfac transfac_download_dir/cgi-bin/data/matrix.dat

   will load a file containing all matrices from the TRANSFAC
   database.

 -motif_db JASPAR jaspar jaspar_file.tf

   will load a file containing motifs from the JASPAR database that
   have previously been converted to TRANSFAC format.

=cut

    } elsif ($arg eq "-motif_db") {
      my $db_name = shift(@arguments);
      $db_name =~ s/\s/_/g;
      push @main::motif_databases, $db_name;
      $main::motif_db_format{$db_name} = shift(@arguments);
      my $db_file = shift(@arguments);
      unless (-e $db_file) {
	&RSAT::error::FatalError("Motif DB file does not exist.", $db_file);
      }
      $main::infile{"motif_db_".$db_name} = $db_file;

=pod

=item	B<-outdir output_directory>

Output directory (mandatory).

The result files and index files produced by the different programs
will be stored in this directory.

=cut
    } elsif ($arg eq "-outdir") {
      $main::dir{output} = shift(@arguments);


=pod

=item	B<-prefix output_prefix>

Prefix for the output files.

=cut
    } elsif ($arg eq "-prefix") {
      $main::param{prefix} = shift(@arguments);

=pod

=item	B<-title graph_title>

Title displayed on top of the graphs.

=cut
    } elsif ($arg eq "-title") {
      $main::param{title} = shift(@arguments);

=pod

=item	B<-img_format img_format>

Image format.

All the formats supported by XYgraph can be used.

=cut
    } elsif ($arg eq "-img_format") {
      $main::param{img_format} = shift(@arguments);


=pod

=item B<-disco oligos|dyads|positions|local_words|merged_words|meme|chipmunk>

Specify the software tool(s) that will be used for motif discovery.

Several algorithms can be specified either by using the option
iteratively:

  -disco oligos -disco dyads

or by entering a comma-separated list of algorithms:

  -disco oligos,dyads

B<Default motif discovery algorithms>

=back

=item I<oligos>

Run I<oligo-analysis> to detect over-represented oligonucleotides of a
given length (k, specified with option -l) in the test set (van Helden
et al., 1998). Prior frequencies of oligonucleotides are taken from
Markov model of order m (see option -markov) estimated from the test
set sequences themselves.

=item I<dyads>

Run I<dyad-analysis> to detect over-represented dyads, i.e. pairs of
short oligonucleotides (monads) spaced by a region of fixed width but
variable content (van Helden et al., 2000). Spaced motifs are typical
of certain classes of transcription factors forming homo- or
heterodimers.

By default, peak-motifs analyzes pairs of trinucleotides with
any spacing between 0 and 20.

The expected frequency of each dyad is estimated as the product of its
monad frequencies in the input sequences (option -bg monads of
dyad-analysis).

=item I<positions>

Run I<position-analysis> to detect oligonucleotides showing a
positional bias, i.e. have a non-homogeneous distribution in the peak
sequence set.

This method was initially developed to analyze termination and
poly-adenylation signals in downstream sequences (van Helden et al.,
2001), and it turns out to be very efficient for detecting motifs
centred on the ChIP-seq peaks. For ChIP-seq analysis, the reference
position is the center of each sequence.

Note that I<peak-motifs> also uses I<position-analysis> for the
task B<composition>, in order to detect compositional biases (residues,
dinucleotides) in the test sequence set.

=item I<local_words>

Run I<local-word-analysis> to detect locally over-represented
oligonucleotides and dyads.

The program I<local-word-analysis> (Matthieu Defrance,unpublished)
tests the over-representation of each possible word (oligo, dyad)
in positional windows in the input sequence set.

Two types of background models are supported: (i) Markov model of
order m estimated locally (within the window under consideration; (ii)
the frequency observed for a word in the whole sequence set is used as
estimator of the prior probability of this word in the window.

After our first trials, this program gives excellent results in
ChIP-seq datasets, because its senstivitity increases with large
number of sequences (several hundreds/thousands), and its background
model is more stringent than for programs computing the global
over-representation (oligo-analysis, dyad-analysis).

=item I<merged_words>

Extract a position-specific scoring matrix (using
I<matrix-from-patterns>) from all the words discovered by the selected
string-based motif disovery algorithms (oigos,dyads,positions and/or
local_words).


=cut
    } elsif ($arg eq "-disco") {
      my @requested_discos = split ",", shift (@arguments);
      foreach my $disco (@requested_discos) {
	next unless $disco;
	if ($supported_disco{$disco}) {
	  $disco{$disco} = 1;
	} else {
	  &RSAT::error::FatalError("Motif discovery algorithm '$disco' is not supported. \n\tSupported: $supported_discos");
	}
      }




=pod

=item B<-task>

Specify a subset of tasks to be executed.

By default, the program runs all necessary tasks. However, in some
cases, it can be useful to select one or several tasks to be executed
separately.

Beware: task selection requires expertise, because most tasks depends
on the prior execution of some other tasks in the workflow. Selecting
tasks before their prerequisite tasks have been completed will provoke
fatal errors.

B<Default tasks>

=over

=item I<all> (default)

Run all the default tasks.

=item I<purge>

Purge input sequences (test set and, if specified, control set) to
mask redundant fragments before applying pattern discovey
algorithms. Sequence purging is necessary because redundant fragments
would violate the hypothesis of independence underlying the binomial
significance test, resulting in a large number of false positive
patterns.

=item I<seqlen>

Compute sequence lengths and their distribution.

Sequence lengths are useful for the negative control (selection of
random genome fragments).

Sequence length distribution is informative to get an idea about the
variability of peak lengths.

=item I<composition>

Compute compositional profiles, i.e. distributions of residues and
dinucleotide frequencies per position (using I<position-analysis>).

Residue profiles may reveal composition biases in the neighborhood of
the peak sequences. Dinucleotide profiles can reveal (for example) an
enrichment in CpG island.

Note that I<peak-motifs> also runs I<position-analysis> with
larger oligonucleotide length (see option -l) to detect motifs on the
basis of positionally biased oligonucleotides (see task B<positions>).

=item I<ref_motifs>

This task combines various operations.

=over

=item Formating of the reference motif

Perform various format conversion for the reference motif (compute
parameters, consensus, logo).

=item Motif enrichment

Generate an enriched motif by scanning the peak sequence set with the
reference motif.

=item Motif comparison

Compare all discovered motifs with the reference motif.

=item I<disco>

Run the motif discovery algorithms. See option -disco for the
selection of motif discovery algorithm(s).

=item I<meme>

Run the motif discovery program MEME on the input sequences.

B<Beware>: the complexity of MEME is quadratic: the computing time
increases as the square of sequence size. It is thus not recommended
to use MEME for data sets exceeding 1Mb. If the input set contains
many peaks, the option -task meme can be combined with a restriction
on the number of top peaks to be considered (e.g. -top_peaks 500).

MEME reference: Bailey, T. L. and Elkan, C. (1994). Fitting a mixture
model by expectation maximization to discover motifs in
biopolymers. Proc Int Conf Intell Syst Mol Biol 2, 28-36.

=item I<chipmunk>

Run the motif discovery program ChIPMunk.

ChIPMunk reference: Kulakovskiy, I. V., Boeva, V. A., Favorov,
A. V. and Makeev, V. J. (2010). Deep and wide digging for binding
motifs in ChIP-Seq data. Bioinformatics 26, 2622-3.

=item I<merge_words>

Merge the words (oligos or dyads) discovered by the different
string-based motif discovery algorithms.

The table of merged words has one row per word (oligo or dyad) and one
column per motif discovery program. This table is convenient to
analyze the consistency between the words detected by different
approaches, e.g. show that a word is both over-represented
(oligo-analysis, dyad-analysis) and positionally biased
(position-analysis, local-words). A heatmap is also exported to
provide a graphical representation of the significance of each word
(row) for each algorthm (column).

The merged words can optionally be used as seeds for extracting
position-specific scoring matrices from the sequences, using the
program I<matrix-from-patterns> (see option -disco merged_words).

=item I<motifs_vs_ref>

Compare each discovered motif to the reference motifs.

=item I<motifs_vs_db>

Compare each discovered motif to a database of known motifs
(e.g. Jaspar, TRANSFAC, RegulonDB, UniProbe, ...)

=item I<timelog>

Generate a log file summarizing the time spent in the different tasks.

=item I<synthesis>

Generate the HTML file providing a synthesis of the results and
pointing towards the individual result files.

=back

B<Extra tasks>

A few extra tasks are available, which are not executed by default. Those
tasks are executed only when they are explicitly invoked with the option
I<-task>, they are not called with the option "-task all".

=over

=item I<clean_seq>

Delete the purged sequence files after the analysis, in order to save
space.


=item I<meme_bg>

Compute meme background model from the input sequences.

=back

=cut
    } elsif ($arg eq "-task") {
      my @requested_tasks = split ",", shift (@arguments);
      foreach my $task (@requested_tasks) {
	next unless $task;
	if ($supported_task{$task}) {
	  $task{$task} = 1;
	} else {
	  &RSAT::error::FatalError("Task '$task' is not supported. \n\tSupported: $supported_tasks");
	}
      }

=pod

=item B<-nmotifs max_motif_number>

Maximal number of motifs (matrices) to return for motif discovery
algorithms. Note the distinction between the maximal number of motifs
(matrices) and the maximum number of patterns (words, dyads): a motif
generally corresponds to mutually overlapping several patterns (dyads,
words).

=cut

    } elsif ($arg eq "-nmotifs") {
      $main::param{matrix_nmotifs} = shift (@arguments);

=pod

=item B<-l oligo_len>


Oligonucleotide length for word-counting approaches (oligo-analysis,
position-analysis, local-word-analysis, oligo-diff).

In our experience, optimal results are obtained with hexanucleotides
and heptanucleotides.

Note: the monad length used for dyad-analysis is not affected by those
options. Instead it is fixed to to 3. Indeed, dyad-analysis can detect
larger motifs by sampling various spacings between the two
trinucleotide monads.

=item B<-minol oligo_min_len>

=item B<-maxol oligo_max_len>

Minimal (-minol) and maximal (-maxol) oligonucleotide lengths. If
those options are used, the program iterated over the specified range
of oligonucleotide lengths.

=cut
    } elsif ($arg eq "-l") {
      my $oligo_len = shift (@arguments);
      $main::param{oligo_min_len} = $oligo_len;
      $main::param{oligo_max_len} = $oligo_len;
    } elsif ($arg eq "-minol") {
      $main::param{oligo_min_len} = shift (@arguments);
    } elsif ($arg eq "-maxol") {
      $main::param{oligo_max_len} = shift (@arguments);

=pod

=item B<-markov>

Order of the Markov model used to estimate expected oligonucleotide
frequencies for I<oligo-analysis> and I<local-word-analysis>.

Higher order Markov models are more stringent, lower order are more
sensitive, but tend to return a large number of false positives.

Markov models can be specified with either a positive or a negative
value. Positive value indicate the length of the prefix in the
transition matrix. Negative value indicate the order of the Markov
model relative to the oligonucleotide length. For example, the option
-markov -2 gives a model of order m=k-2 (thus, an order 5 for
heptanucleotides, an order 4 for hexanucleotides).

The optimal Markov order depends on the number of sequences in the
test set. Since ChIP-seq data typically contain hundreds to thoursands
of peaks, high Markov orders are generally good, because they are
stringent and still sensitive enough.  In our experience, motifs are
well detected with the most stringent Markov order (-markov -2).

=item B<-min_markov min_markov_order>

=item B<-max_markov max_markov_order>

A miminal and a maximal value can be specified for the Markov
order. The program then iterates over all markov values between
min_markov_order and max_markov_order.


=cut
    } elsif ($arg eq "-markov") {
      $main::param{oligo_min_mkv} = $main::param{oligo_max_mkv} = shift (@arguments);
    } elsif ($arg eq "-min_markov") {
      $main::param{oligo_min_mkv} = shift (@arguments);
    } elsif ($arg eq "-max_markov") {
      $main::param{oligo_max_mkv} = shift (@arguments);

=pod

=item B<-1str | -2str>

Single-strand (-1str) or double-strand (-2str) analysis.

The default is double-strand analysis, since ChIP-seq results have no
particular strand orientation.

=cut
    } elsif ($arg eq "-1str") {
      $main::param{strand} = "-1str";
    } elsif ($arg eq "-2str") {
      $main::param{strand} = "-2str";

=pod

=item B<-noov | -ovlp>

Treatment of self-overlapping words for motif discovery: count (-ovlp)
or do not count (-noov) overlapping occurrences. In -noov mode, only
renewing occurrences are counted.

It is recommended to use the -noov mode (default) to avoid the effect
of self-overlap, which violates the hypothesis of independence of
successive occurrences underlying the binomial significance test
(oligo-analysis, dyad-analysis).

B<Beware>: the options -noov and -ovlp only apply to motif discovery,
and not to compositional profiles. Dinucleotide frequencies are always
computed with the option -ovlp (count all occurrences), to avoid weird
effect. Since those compositin profiles further serve to estimate the
probability of larger words, which may include repeated residues, we
need to count all dinucleotide occurrences. Indeed with the -noov mode
(renewing occurrences only), the transition tables of the first order
Markov model would be unbalanced: the expected frequency of all the
repeated dinucleotides (AA, TT, CC, GG) would be under-estimated,
leading to an under-estimation of the expected frequency of
repeat-containing words (e.g. AAAAAA, AAAGGG, ...).

=cut
    } elsif ($arg eq "-noov") {
      $main::param{disco_noov} = "-noov";
    } elsif ($arg eq "-ovlp") {
      $main::param{disco_noov} = "-ovlp";

=pod

=item B<-ci class_interval>

Class interval for I<position-analysis>.

=cut

    } elsif ($arg eq "-ci") {
      $main::param{profiles_ci} = shift(@arguments);
      &RSAT::error::FatalError($main::param{profiles_ci}, "is not a valid value for class interval. Should be a strictly positive Natural number.")
	unless ((&IsNatural($main::param{profiles_ci})) && ($main::param{profiles_ci} > 0));

      ## Other parameters are not accepted
    } else {
      &FatalError(join("\t", "Invalid option", $arg));

    }
  }

=pod

=back

=cut

}

################################################################
## Check arguments
sub CheckArguments {
  &RSAT::message::TimeWarn("Checking arguments") if ($main::verbose >= 2);

  ## Input sequence file
  if ($infile{test_seq}) {
    unless (-e $infile{test_seq}) {
      &FatalError("Test sequence file does not exist", $infile{test_seq});
    }
    @main::seq_types = ("test");
  } else {
    &FatalError("You must define the test sequence set (option -i)");
  }

  if ($main::param{seq_source}) {
  	unless ($main::param{seq_source} eq "galaxy") {
      &FatalError("-from option: this sequence source is not supported", $main::param{seq_source});
    }
  }

  ## If control file has been specified, chec that it exists
  if ($infile{ctrl_seq}) {
    unless (-e $infile{ctrl_seq}) {
      &FatalError("Control sequence file does not exist", $infile{ctrl_seq});
    }
    push @main::seq_types, ("ctrl");
  }

  ## Output directory
  if ($main::dir{output}) {
    &RSAT::util::CheckOutDir($main::dir{output});
    push @outdir, "output";
  } else {
    &FatalError("You must define the output directory (option -outdir)");
  }

  ## Prefix
  unless ($main::param{prefix}) {
    &FatalError("You must define a prefix for the output files (option -prefix)");
  }

  ## Title
  unless ($main::param{title}) {
    $main::param{title} = $main::param{prefix};
  }

  ## Log files
  $main::outfile{log} = &OutFileName("reports", ".txt", "log");
  $main::outfile{links} = &OutFileName("reports", ".html", "links");
  $main::outfile{timelog} = &OutFileName("reports", ".txt", "timelog");
  $main::outfile{timelog_html} = &OutFileName("reports", ".html", "timelog");
  $main::outfile{timetable} = &OutFileName("reports", ".txt", "timetable");
  $main::outfile{archive} = &OutFileName("", ".".$main::param{archive_format}, "archive");

  ## Synthesis file
  $main::outfile{synthesis} = &OutFileName("", ".html", "synthesis");
  $progressive_synthesis = 0 unless ($task{synthesis});

  ## Modalities of motif comparisons
  if ($task{motif_compa}) {
    $task{merge_motifs} = 1;
    #   $task{cluster_motifs} = 1;
    $task{collect_motifs} = 1;
    $task{motifs_vs_ref} = 1 if (defined($main::infile{ref_motifs}));
    $task{motifs_vs_db} = 1 if (scalar(@motif_databases) > 0);
  }

  ## The task collect_motifs regroups merge_motifs and split_motifs
  if ($task{collect_motifs}) {
    $task{merge_motifs} = 1;
    $task{split_motifs} = 1;
  }

  ## Check that reference motif has been specified if required
  if ($task{motifs_vs_ref}) {
    &RSAT::error::FatalError("The task motifs_vs_dref requires to specify a file containing the reference motif (option -ref_motifs).")
      unless (defined($main::infile{ref_motifs}));
  }

  ## Check that motif DB has been specified if required
  if ($task{motifs_vs_db}) {
    &RSAT::error::FatalError("The task motifs_vs_db requires to specify at least one file containing database motifs (option -motif_db).")
      unless (scalar(@motif_databases) > 0)
  }


  ################################################################
  ## Check tasks

  ## If all tasks are requested or if no task is defined, execute all
  ## tasks.
  if ((scalar(keys(%task)) == 0) || ($task{all})) {
    foreach my $task (@default_tasks) {
      $task{$task} = 1;
    }

    delete($task{motifs_vs_ref}) unless (defined($main::infile{ref_motifs}));
    delete($task{motifs_vs_db}) unless (scalar(@motif_databases) > 0);
  }

  ## If no discovery algorithms has been specified, run the default ones
  if ((scalar(keys(%disco)) == 0) || ($disco{all})) {
    foreach my $disco (@default_discos) {
      $disco{$disco} = 1;
    }

    ## Adapt selection of discovery algorithms depending on whether a controls et has been provided
    if (defined($main::infile{ctrl_seq})) {
      delete($disco{oligos});
      delete($disco{dyads});
      delete($disco{positions});
      delete($disco{local_words});
    } else {
      delete($disco{oligo_diff});
    }
  }

  ################################################################
  ## Dependencies between tasks

  ## If motif discovery runs, the tasks that depend on it must re-run
## I hesitate to redo all the subsequent tasks, this would somewhat loose the advantage of the option -task
#  if ($task{disco}) {
#      $task{merge_words} = 1;
#      $task{collect_motifs} = 1;
#
#      $task{motifs_vs_ref} = 1;
#      $task{motifs_vs_db} = 1;
#      $task{scan} = 1;
#  }


  ## Dependency between task merge_words and pattern discovery type merged_words
  if (($task{disco}) && ($disco{merged_words})) {
      $task{merge_words} = 1;
  }

  ## Prepare a list of the requested tasks (by order of execution)
  foreach my $task (@supported_tasks) {
   push (@tasks, $task) if $task{$task};
  }
  &RSAT::message::Info("Tasks: ", join (",", @tasks)) if ($main::verbose >= 2);
}


################################################################
## Print a table row with link to a file (if it exists, otherwise
## print the file name in red).
sub PrintFileLink {
  my ($key, $file) = @_;

  ## HTML output with links
  print ($links "<tr>\n");
  print ($links "<td>", $key, "</td>\n");
  print ($links "<td>\n");
  if (-e $file) {
    &RSAT::message::Debug($key, $file) if ($main::verbose >= 5);
    my $link = &RSAT::util::RelativePath($main::outfile{links}, $file);
    print $links "<a href='".$link."'>".$file."</a><br>\n";
  } else {
    print $links "<font color='red'>".$file."</font><br>\n";
  }
  print ($links "</td>\n");
  print ($links "</tr>\n");
}

################################################################
## Report parameters and input/output files.
##
## This has to be executed after the processing because the verbose
## includes a list of all result files, and the motif list depend on
## the number of motifs returned by the different algorithms.
sub ReportParamsAndFiles {

  ## Print the command line
  print $syn "<pre>";
  print $syn "<b>Command:</b>  peak-motifs ";
  &PrintArguments($syn, 1);
  print $syn "</pre>";

  ## Open HTML file for links
  local $links = &OpenOutputFile($main::outfile{links});
  &PrintHtmlResultHeader($links);

  ## Read the list of directories and files for the discovered motifs
  &ReadMotifTable();

  print $links "<p><table class='sortable'>\n";

  print $main::out "; peak-motifs ";
  &PrintArguments($main::out, 1);
  printf $main::out "; %-22s\t%s\n", "Program version", $program_version;

  ## Parameter values
  print $main::out "; Parameter values\n";
  foreach my $param_name (@param_list) {
    print $main::out sprintf ";\t%-22s\t%s\n", $param_name, $param{$param_name};
  }

  print $out &PrintThresholdValues();

  ## Input file(s)
  if (%main::infile) {
    print $main::out "; Input files\n";
    print $links "<tr><th colspan=2>","Input files","</th></tr>";

#    while (my ($key,$file) = each %main::infile) {
    foreach my $key (sort keys  %main::infile) {
      my $file = $main::infile{$key};
      printf $main::out ";\t%-22s\t%s\n", $key, $file; ## Text output
      &PrintFileLink($key, $file); ## HTML output
    }
  }

  ## Directories
  if (scalar(@outdir) > 0) {
    print $main::out "; Directories\n";
    print $links "<tr><th colspan=2>","Directories","</th></tr>";
    foreach my $key (@outdir) {
      my $dir = $main::dir{$key};
      printf $main::out ";\t%-30s\t%s\n", $key, $dir; ## Text output
      &PrintFileLink($key, $dir); ## HTML output
    }
  }

  ## Output files
  if (%main::outfile) {
    print $main::out "; Output files\n";
    print $links "<tr><th colspan=2>","Output files","</th></tr>";
    foreach my $key (sort keys  %main::outfile) {
      my $file = $main::outfile{$key};
      printf $main::out ";\t%-30s\t%s\n", $key, $file; ## Text output
      &PrintFileLink($key, $file); ## HTML output
    }
  }

  ## Close the links file
  print $links "</table>\n";
  print $links "</body>";
  print $links "</html>";
  close $links;
}

=pod

=head1 REFERENCES

The program I<peak-motifs> combines a series of tried-and-tested
programs which have been detailed in the following publications.

=over

=item I<oligo-analysis>

van Helden, J., Andre, B. and Collado-Vides, J. (1998). Extracting
regulatory sites from the upstream region of yeast genes by
computational analysis of oligonucleotide frequencies. J Mol Biol 281,
827-42.

=item I<dyad-analysis>

van Helden, J., Rios, A. F. and Collado-Vides, J. (2000). Discovering
regulatory elements in non-coding sequences by analysis of spaced
dyads. Nucleic Acids Res 28, 1808-18.

=item I<position-analysis>

van Helden, J., del Olmo, M. and Perez-Ortin,
J. E. (2000). Statistical analysis of yeast genomic downstream
sequences reveals putative polyadenylation signals. Nucleic Acids Res
28, 1000-10.

=item I<matrix-scan>

Turatsinze, J. V., Thomas-Chollier, M., Defrance, M. and van Helden,
J. (2008). Using RSAT to scan genome sequences for transcription
factor binding sites and cis-regulatory modules. Nat Protoc 3,
1578-88.

=back

=head1 SEE ALSO

=over

=item I<oligo-analysis>

=item I<dyad-analysis>

=item I<position-analysis>

=item I<local-word-analysis>

=item I<compare-matrices>

=item I<matrix-scan>


=back

=head1 WISH LIST

=over

=item B<-return explanation>

Print guidelines for explaining the motifs, comparison tables and
graphs at the bottom of the result page.

=item B<background models from ctrl sequences>

Estimate background models from control sequences, for oligo-analysis,
dyad-analysis, and local-word-analysis. This should in principle
reduce the rate of false positive.

=item B<motif_cluster>

Compare all discovered motifs (plus reference motif if specified) and
cluster them in order to extract a consensus motif.

=item B<weeder>

Add a task to run Weeder on the peak sequences.

 weederlauncher.out input organism large S M T5

=item B<all_oligos>

Run oligo-analysis without any threshold in order to produce a plot of
observed versus expected occurrences for all the
oligonucleotides. This analysis is performed with the option
-two_tails, which detects both under- and over-represented patterns.

=item B<in the full HTML report>

- link from the result page to the link table returned by
  position-analysis (file *_graph_index.html).

=back


=cut

__END__
