#!/usr/bin/perl

die "multiple-family-analysis has been renamed gene-cluster-motifs, please use the new program name from now on\n";

## Ce script est une usine a gaz

# ############################################################
# #
# # $Id: multiple-family-analysis,v 1.294 2010/10/19 21:05:38 jvanheld Exp $
# #
# # Time-stamp: <2003-10-21 01:19:12 jvanheld>
# #
# ############################################################
# BEGIN {
#     if ($0 =~ /([^(\/)]+)$/) {
# 	push (@INC, "$`lib/");
#     }
#     require "RSA.lib";
#     require "RSA.seq.lib";
#     require "RSA.disco.lib";
#     require "RSA.help.pl";
#     require "RSA2.cgi.lib";
#     push @INC, "$ENV{RSAT}/perl-scripts/parsers/";
#     require "lib/load_classes.pl";
#     require "lib/util.pl";
#     require "lib/parsing_util.pl";
#     require "$ENV{RSAT}/perl-scripts/lib/RSA.classes";
# }

# #require RSAT::server; ## For program paths
# require RSAT::util;
# require RSAT::server;
# require RSAT::matrix;
# require RSAT::pattern;
# require RSAT::Analysis;
# require RSAT::Family;
# require RSAT::MatrixReader;

# package main;
# {

#   local $start_time = &RSAT::util::StartScript();
#   local $program_version = do { my @r = (q$Revision: 1.294 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };

#   ## Paths of the programs called by multiple-family-analysis
#   local $count_words_cmd = &RSAT::server::GetProgramPath("count-words");
#   local $matrix_scan_cmd = $ENV{RSAT}."/perl-scripts/matrix-scan";

#   $size_names = 0;

#   ################################################################
#   ## initialize global variables
#   $toppat = 50;
#   $organism = "";
#   $org_fam = 0;
#   $taxon = "";
#   $force_calib=1;
#   $null="NA";			## Null character for the exports
#   $skip=0;			## Skip the first clusters
#   $last=-1;			## Stop after a few clusters
#   $batch = 0;		   ## Run task in batch mode (on a PC cluster)
#   @batch_commands = ();	       ## Set of commands to run in batch mode
#   #$batch_script = ""; ## Script collecting the commands to be run on the same node of the PC cluster
#   $min_genes = 1;		## Min number of genes per family
#   $max_genes = -1;		## Min number of genes per family
#   $rel_w = 1;	  ## Threshold on relative weight for compare-patterns
#   $sliding_window_size = 0;
#   $die_on_error = 1;
#   $analyze_purged_sequences = 1;
#   $calibN_repet=100;		### Repetitions for the calibN
#   $db_site_name="";
#   %dir = ();
#   %lth = ();
#   $lth{occ} = 1;
#   %uth = ();
#   $dir{main} = `pwd`;
#   chomp($dir{main});
#   %family = ();			## Index of clusters per name
#   @families = ();		## Families
#   @selected = ();	  ## Clusters selected with the option -select

#   $markov = 0; ## Default method for oligos/dyads background model is not Markov. Set to 1 with the option -markov
#   $markov_order = 2;

#   $dir{calib1} = "calibrations_1gene";
#   %max_score = ();

#   ## Supported tasks
#   @supported_tasks = qw (upstream
#   		         sequences
# 			 upstream_ensembl
#   		         sequences_ensembl
# 			 purge

# 			 compute_bg
# 			 calibrate
# 			 calibN
# 			 oligos
# 			 merge_oligos
# 			 assemble_oligos
# 			 oligo_maps
# 			 db_match_oligos
# 			 sig_distrib_oligos
# 			 validate_oligos

# 			 dyads
# 			 dyad_maps
# 			 db_match_dyads
# 			 sig_distrib_dyads
# 			 validate_dyads

# 			 pssm
# 			 oligos_pssm
# 			 dyads_pssm
# 			 orm_pssm

# 			 orm
# 			 assemble_orm
# 			 orm_maps
# 			 sig_distrib_orm
# 			 validate_orm

# 			 merge_patterns
# 			 assemble_patterns
# 			 maps

# 			 slide

# 			 consensus
# 			 consensus_maps

# 			 gibbs
# 			 gibbs_maps

# 			 AlignACE
# 			 AlignACE_maps

# 			 infogibbs
# 			 infogibbs_maps

# 			 MotifSampler

# 			 meme
# 			 meme_maps

# 			 motifs_vs_db

# 			 report
# 			 synthesis
# 			 sql
# 			 clean
# 			 all
# 			);
#   foreach my $task (@supported_tasks) {
#     $supported_task{$task} = 1;
#   }
#   $supported_tasks = join ",", @supported_tasks;


#   ## Keys for sorting the results in the summary table
#   %supported_sort_key = (
# 			 "score"=>1,
# 			 "name"=>1
# 			);
#   $supported_sort_keys  = join ",", sort keys %supported_sort_key;


#   ## Background models for string-based pattern discovery
#   %supported_background = (
# 			   "intergenic"=>1,
# 			   "upstream"=>1,
# 			   "upstreamL"=>1,
# 			   "upstream-noorf"=>1,
# 			   "calib1"=>1,
# 			   "calibN"=>1,
# 			   "upstream-rm"=>1,
# 			   "upstream-noorf-rm"=>1
# 			  );
#   $supported_background  = join ",", sort keys %supported_background;

#   ## Background models
#   local %exp_freq_file = (); ## specified with the options -oligo_exp_freq and -dyad_exp_freq
#   local %bg_model_file = ();

#   ## input files
#   $family_file = "";
#   $sequence_file_list = "";
#   $all_seq_file = "";
#   $all_seq_format = "fasta"; ## Sequence format for the input sequences (onlyt useful in combination with the option -seq)
#   $bg_seq_file = "";

#   ## feature-map options
#   $htmaps=0;
#   $draw_maps=1;
#   $scalestep = 100;
#   $img_format = $ENV{rsat_img_format} || "png";
#   $map_origin = "-0";
#   $scan_origin = "end";
#   $quick_scan = "-quick";

#   ## retrieve-seq options
#   $noorf = "-noorf";
#   $repeat_masked = 0;
#   $seq_type = "upstream";
#   $seq_format = "fasta"; ## Sequence format for the output files (upstream and purged sequences)
#   $seq_dir = "";
#   $seq_ext = "fasta";

#   ## purge-seq options
#   $purge_ml = 40;
#   $purge_mis = 3;

#   ## pattern discovery options
#   $background = "upstream-noorf";
#   $noov = "-noov";
#   $sort_key = "score";
#   $two_tails = 0; ## two-tails test for oligo-analysis and dyad-analysis

#   ## oligo-analysis options
#   $min_oligo_len = 6;
#   $max_oligo_len = 6;

#   ## dyad-analysis options
#   $monad_length = 3;
#   $min_sp = 0;
#   $max_sp = 20;

#   ## Default matrix-based pattern discovery options
#   $matrix_width = 16;
#   $expected_sites_per_seq = 2;
#   $nmotifs = 3;
#   $seed = undef;
#   $matrix_pseudo = 1;

#   ## Program-specific keys for sorting matrices (PSSM)
#   ## Values are defined below for each matrix-based pattern discovery program
#   %pssm_sort_key = ();
#   %pssm_sort_order = ();

#   ## Default MotifSampler options
#   $MS_bg_order = 0;
#   if (defined($ENV{MOTIF_SAMPLER_DIR})) {
#     $MS_b =$ENV{MOTIF_SAMPLER_DIR}."/background_models/yeast_up800_nomit_noorf_o3.bg"; ### TEMPORARY
#   }
#   $MS_p = 0.2; ## Sets prior probability of 1 motif copy. (default 0.5).
#   $MS_M = 0; ## Maximal number of motif instances per sequence. (default unset = 0)
#   $MS_n = $nmotifs;	## Number of different motifs per sequence set
#   $MS_w = $matrix_width;
#   $MS_x = 1;			## allowed overlap between motifs
#   $MS_r = 1;	       ## Number of repetitions of the gibbs per motif
#   $pssm_sort_key{MotifSampler} = 'MS.ic';
#   $pssm_sort_order{MotifSampler} = 'desc';

#   ## Default MEME options
#   $MEME_options{text} = ""; push @MEME_options, "text";	## Ouptut format = text
#   $MEME_options{dna} = ""; push @MEME_options, "dna"; ## Sequence type == dna
#   $MEME_options{mod} = "anr"; push @MEME_options, "mod"; ## Accept any number of occurrences per sequence
#   $MEME_options{minw} = 8; push @MEME_options, "minw"; ## Minimal motif width
#   $MEME_options{maxw}=20; push @MEME_options, "maxw"; ## Maximal motif width
#   $MEME_options{nmotifs}=$nmotifs; push @MEME_options, "nmotifs"; ## Number of motifs
#   $MEME_options{evt}=1; push @MEME_options, "evt"; ## upper threshold on E-value
#   $pssm_sort_key{meme} = 'meme.E-value';
#   $pssm_sort_order{meme} = 'asc';

#   #$MEME_options{bfile}="";

#   ## Default orm options
#   $orm_lth_width = 10;	## Minimal window width for variable window size
#   $orm_uth_wrank = 1;		     ## Max rank per word
#   $orm_uth_rank=50;		     ## max rank (all words)
#   $orm_lth_occ_sig= 0;	  ## lower thresold on occurrence significance
#   #$orm_fixedsizewindow=0; ## Fixed window size
#   $orm_overlap=0; ## Allow overlapping occurrences for self-overlapping words
#   $orm_strand="+-";		   ## Strands
#   $orm_word_length=6;		   ## Word length
#   $orm_window=100;	       ## Window size for the background model
#   $orm_markov_order=-1;	      ## markov order for the background model

#   ## Default consensus options
#   $pssm_sort_key{consensus} = 'cons.ln.Eval';
#   $pssm_sort_order{consensus} = 'asc';

#   ## Default gibbs options
#   $pssm_sort_key{gibbs} = 'MAP.per.site';
#   $pssm_sort_order{gibbs} = 'desc';

#   ## Default AlignACE options
#   $AlignACE_gcback=0.4332384392880;
#   $pssm_sort_key{AlignACE} = 'MAP.per.site';
#   $pssm_sort_order{AlignACE} = 'desc';

#   ## Default infogibbs options
#   $pssm_sort_key{infogibbs} = 'total.information';
#   $pssm_sort_order{infogibbs} = 'desc';
#   $infogibbs_g = 1; ## Matthieu: a quoi sert cette option ?

#   ## Default infogiggs options
#   %infogibbs_options = ();
#   $infogibbs_options{v} = 1; push @infogibbs_options, "v"; ## Verbosity
# #  $infogibbs_options{l} = $matrix_width; push @infogibbs_options, "l"; ## Motif length is defined with option -width
# #  $infogibbs_options{m} = $nmotifs; push @infogibbs_options, "m"; ## Number of motifs to return is defined with option -nmotifs
# #  $infogibbs_options{e} = $expected_sites_per_seq; push @infogibbs_options, "e"; ## Expected number of sites per sequence is defined with option -sps
# #  $infogibbs_options{s} = $strands; push @infogibbs_options, "s"; ## Strand(s) is defined with option -1str or -2str
#   $infogibbs_options{n} = 1000; push @infogibbs_options, "n"; ## Number of iterations
#   $infogibbs_options{d} = 5; push @infogibbs_options, "d"; ## Minimal distance between 2 motif occurrences
#   $infogibbs_options{nrun} = 3; push @infogibbs_options, "nrun"; ## Number of runs per motif

#   ## Comparison between discovered and known motifs
#   $known_site_file = "";
#   $known_sites_provided = 0;
#   $min_matching_score = 4;
#   $known_site_max_len=50;	## ignore known sites larger than 50bp

#   ## miscellaneous
#   $verbose = 0;

#   #### data export options
#   $schema = "multifam";
#   $host="localhost";
#   $user = getlogin();
#   $password="";

#   ################################################################
#   ### MAIN

#   #### Read arguments
#   &ReadArguments();

#   if ($background eq 'upstream-noorf') {
#     unless ($noorf) {
#       $background = 'upstream';
#     }
#   }
#   if ($repeat_masked) {
#     $background .= '-rm';
#   }

#   #### Check parameters
#   &CheckParameters();

#   ################################################################
#   ## Create class factories

#   ## Unique prefix for each analysis, to allow merging them in a database
#   local $run_date = &AlphaDate();
#   local $run_prefix = `mktemp XXXXX`;
#   chomp($run_prefix);
#   system "rm $run_prefix" if (-f $run_prefix);

#   ## Class holder for the analysis
#   local $analysis_factory = classes::ClassFactory->new_class(object_type=>"RSAT::Analysis");
#   $current_analysis = $analysis_factory->new_object(id=>$run_prefix);

#   ## Class holder for clusters
#   local $family_factory = classes::ClassFactory->new_class(object_type=>"RSAT::Family",
# 							   prefix=>$run_prefix."_fam");
#   $family_factory->set_out_fields(qw(id analysis organism size name  genes));

#   ## Class holder for patterns (oligos and dyads)
#   local $pattern_factory = classes::ClassFactory->new_class(object_type=>"RSAT::pattern",
# 							    prefix=>$run_prefix."_pat");
#   @pattern_out_fields = qw(id
# 			   family_id
# 			   family
# 			   type
# 			   sequence
# 			   rev_compl
# 			   occ
# 			   exp_occ
# 			   occ_P
# 			   occ_E
# 			   occ_sig
# 			   rank
# 			   zscore
# 			   ratio
# 			  );
#   $pattern_factory->set_out_fields(@pattern_out_fields);

#   ## Class holder for the matrices (gibbs, AlignACE, meme, consensus, MotifSampler, infogibbs)
#   local $matrix_factory = classes::ClassFactory->new_class(object_type=>"RSAT::matrix",
# 							   prefix=>$run_prefix."_mat");
#   @matrix_scalar_out_fields = qw(id
# 				 family_id
# 				 family
# 				 program
# 				 sites
# 				 matrix.nb
# 				 ncol
# 				 nrow
# 				 type
# 				 alphabet.size

# 				 consensus.IUPAC
# 				 consensus.IUPAC.rc
# 				 consensus.strict
# 				 consensus.strict.rc
# 				 consensus.regexp
# 				 consensus.regexp.rc

# 				 total.information
# 				 info.log.base
# 				 information.per.column
# 				 max.bits
# 				 max.possible.info.per.col
# 				 min.prior

# 				 cons.Pval
# 				 cons.ln.Pval
# 				 cons.Eval
# 				 cons.ln.Eval
# 				 cons.adjusted.information
# 				 cons.unadjusted.information

# 				 MAP
# 				 MAP.per.site
# 				 gibbs.betaprior.map
# 				 gibbs.model.map
# 				 seed

# 				 meme.llr
# 				 meme.E-value

# 				 MS.ic
# 				 MS.ll
# 				 MS.cs
# 				 MS.consensus
# 				 MS.sequences

# 				 input_file
# 				 command
# 				);

#   @matrix_array_out_fields = qw(site_ids
# 				sequences
# 				alphabet
# 				prior
# 				parameters
# 				column.information
# 			       );
#   $matrix_factory->set_out_fields(@matrix_scalar_out_fields, @matrix_array_out_fields);

#   local %matrix_by_file = (); ## index for the matrices already treated

#   ################################################################
#   ## Read data

#   ## Known sites
#   if ($known_site_file) {
#     &ReadKnownSites($known_site_file);
#   }

#   ## List of sequence files
#   if ($sequence_file_list) {
#     $task{sequences} = 0;
#     &ReadSequenceList();

#   } else {
#     ## Read clusters

#     #### Check organism name
#     if ($organism_name) {
#       ### read gene name and identifiers
#       &RSAT::message::TimeWarn("Reading ORF information")
# 	if ($verbose >= 1);
#       ## initialize the organism, which will be used both for identifying features and for choosing  background models
#       $organism = new RSAT::organism();
#       $organism->check_name($organism_name);
#       $organism->set_attribute("name", $organism_name);

#       ## Load the features of the organism in order to identify the genes
#       unless ($all_seq_file) {
# 	$organism->DefineAcceptedFeatureTypes(sort keys %accepted_feature_types);
# 	$organism->LoadFeatures($annotation_table);
# 	$organism->LoadSynonyms();
#       }

#     } elsif ($org_fam) {
#       &RSAT::message::Info("Organisms will be read from the second column of the family file") if ($verbose >= 1);
#     } else {
#       &RSAT::error::FatalError("You must either specify an organism (-org), or an input sequence file (-seq)");
#     }

#     ### Read cluster file
#     &RSAT::message::TimeWarn("Reading gene clusters") if ($main::verbose >= 1);
#     if (($org_fam) || ($all_seq_file)) {
#       %family = &ReadClasses($family_file, 0);
#     } else {
#       %family = &ReadClasses($family_file, 1, $organism);
#     }
#     &RSAT::message::Info("Gene clusters read", scalar(keys(%family))) if ($main::verbose >= 1);
#   }


#   ################################################################
#   ## Delete some clusters depending on the options

#   my %family_to_delete = ();
#   @families = sort (keys (%family));


#   ## Check minimum and maximum number of genes
#   &RSAT::message::TimeWarn("Checking min and max number of genes") if ($main::verbose >= 1);
#   foreach my $family_name (@families) {
#     my $gene_nb = scalar(@{$family{$family_name}->{members}});
#     if ($gene_nb < $min_genes) {
#       &RSAT::message::Warning("Removing family",
# 			      $fam_count, $family_name,
# 			      $gene_nb." genes",
# 			      "< min = ".$min_genes), "\n" if ($verbose >= 2);
#       $family_to_delete{$family_name}++;
#     }
#     if (($max_genes >= 0) && ($gene_nb > $max_genes)) {
#       &RSAT::message::Warning("Removing family",
# 			      $fam_count, $family_name,
# 			      $gene_nb." genes",
# 			      "> max = ".$max_genes), "\n" if ($verbose >= 2);
#       $family_to_delete{$family_name}++;
#     }
#   }

#   ## Check selected clusters
#   if (scalar(@selected) > 0) {
#     &RSAT::message::Info("Selecting user-defined clusters") if ($main::verbose >= 1);
#     my %selected = ();
#     foreach my $family_name (@selected) {
#       $selected{$family_name}++;
#       &FatalError(join ("\t", "Selected family", $family_name, "is not found in the family file", $family_file))
# 	unless defined(($family{$family_name}));
#       &RSAT::message::Warning("Selected family", $family_name) if ($verbose >= 3);
#     }

#     ## Delete non-selected clusters
#     foreach my $family_name (keys (%family)) {
#       unless ($selected{$family_name}) {
# 	$family_to_delete{$family_name}++;
#       }
#     }
#   }

#   ## Update the list of clusters
#   foreach my $family_name (keys(%family_to_delete)) {
#     delete $family{$family_name};
#     &RSAT::message::Warning("Deleted family", $family_name) if ($verbose >= 2);
#   }
#   @families = sort keys %family;

#   &RSAT::message::Info("Remaining clusters after filtering", scalar(@families)) if ($verbose >= 1);

#   ################################################################
#   ## Skip the first or last clusters if required
#   if (($skip > 0) || ($last > 0)) {

#     ## Apply the options -skip (suppress first clusters)
#     if ($skip > 0) {
#       &RSAT::message::Info("Skipping", $skip, "first clusters") if ($main::verbose >= 1);
#       for my $f (1..$skip) {
# 	$family_name = $families[$f-1];
# 	$family_to_delete{$family_name}++;
# 	&RSAT::message::Debug("Skipping first family", $f, $family_name) if ($verbose >= 2);
#       }
#     }

#     ## Apply the options -last (suppress last clusters)
#     if ($last > 0) {
#       &RSAT::message::Debug("Skipping last", $last, "clusters", (scalar(@families)-$last)) if ($verbose >= 2);
#       for my $f ($last..$#families) {
# 	$family_name = $families[$f-1];
# 	$family_to_delete{$family_name}++;
# 	&RSAT::message::Debug("Skipping last family", $f, $family_name) if ($verbose >= 2);
#       }
#     }

#     ## Update the list of clusters
#     foreach my $family_name (keys(%family_to_delete)) {
#       delete $family{$family_name};
#       &RSAT::message::Warning("Deleted family", $family_name) if ($verbose >= 2);
#     }
#     @families = sort keys %family;

#     &RSAT::message::Info(join("\t", "Remaining clusters after applying options -first and -last", scalar(@families))) if ($verbose >= 1);
#   }

#   ## Set all the file names
#   &SetFileNames();

#   ## Create output directories
#   ##
#   ## Note: the main directory is defined as
#   ## absolute path. Output directotry is generally relative to the
#   ## main directory.
#   chdir($dir{main});
#   &RSAT::util::CheckOutDir($dir{output});
#   chdir($dir{output});
#   &MakeDirectories();

#   #### verbose
#   $verbose_message = &Verbose() if ($verbose >= 1);

#   ################################################################
#   ## Define the names of the background model files
#   if ($bg_seq_file) {
#     &define_bg_model_files();
#   }

#   ################################################################
#   ### perform analysis
#   &ComputeBG() if ($task{compute_bg});
#   &AnalyzeFamilies();
#   &MDCreport() if ($task{report});
#   &Validate() if ($task{validate});
#   &SigDistrib() if (($task{sig_distrib_oligos}) || ($task{sig_distrib_dyads}) || ($task{sig_distrib_orm}));
#   &ExportSQL() if ($task{sql});
#   &SyntheticTable() if ($task{synthesis});
#   &DatabaseMatch() if ($task{db_match});

#   ### Warn output dir and files
#   if ($verbose >= 1) {
#     &RSAT::message::Info("Results were stored in directory\t".$dir{output});
#     &RSAT::message::Info("Synthetic table\t", $dir{output}."/".$outfile{table});
#     if ($task{report}) {
#       &RSAT::message::Info("Result report (MDC 2004)\t", $dir{output}."/".$outfile{results});
#       &RSAT::message::Info("Parameter report (MDC 2004)\t", $dir{output}."/".$outfile{parameters});
#     }
#   }

#   my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
#   print $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
#   exit(0);

# }

# ################################################################
# #
# #                 SUBROUTINE DEFINITIONS
# #
# ################################################################

# ################################################################

# #### set the file names as a function of parameters
# sub SetFileNames {
#   ## Options for dyad-analysis
#   $dyad_spacing = $min_sp."-".$max_sp;
#   $dyad_options = "-v";
#   #    $dyad_options .= " -lth occ 1"; ## This is to avoid calculating P-value for patterns not rpesent in the dataset, and for having a correct correction for multi-testing.
#   $dyad_options .= " -sort";
#   $dyad_options .= " ".$strands;
#   $dyad_options .= " -type ".$dyad_type;
#   $dyad_options .= " ".$noov;
#   $dyad_options .= " -l ".$monad_length;
#   $dyad_options .= &ThresholdOptions();
#   #    $dyad_options .= " -lth occ_sig $lth{occ_sig} ";
#   $dyad_options .= " -sp ".$dyad_spacing;

#   $dyad_suffix = "_dyads";
#   if ($bg_seq_file) {
#     $dyad_suffix .= "_bgseq";
#   } elsif ($background) {
#     #	if ($repeat_masked) {        ### Should be here?
#     #	    $background .= "-rm";    ### Should be here?
#     #	}                            ### Should be here?
#     $dyad_suffix .= "_bg_".$background; ### TEMP
#   } else {
#     $dyad_suffix .= "_monad";	### TEMP
#   }
#   $dyad_suffix .= "_l".$monad_length;
#   $dyad_suffix .= "_sp".$dyad_spacing;
#   $dyad_suffix .= $strands;
#   $dyad_suffix .= "_".$dyad_type;
#   $dyad_suffix .= &ThresholdSuffix();
#   #    $dyad_suffix .= "_sig".$lth{occ_sig};
#   $dyad_suffix .= $noov;

#   #### Background model for dyad-analysis
#   if ($bg_seq_file) {
#     ## Background model estimated from user-specified background
#     ## sequences. If the background model file does not exist, compute
#     ## it from background sequences.
#     &define_bg_model_files() unless (defined($bg_model_file{"dyads"}));
# #    &compute_bg_dyads() unless (-e $bg_model_file{dyads});
#     $dyad_options .= " -expfreq ".$bg_model_file{"dyads"};

#   } elsif ($exp_freq_file{dyads}) {
#     ## Convert relative path in absolute path
#     unless ($exp_freq_file{dyads} =~ /^\//) {
#       $exp_freq_file{dyads} = $dir{main}."/".$exp_freq_file{dyads};
#     }
#     ### Manually specified expected frequencies
#     $dyad_options .= " -expfreq $exp_freq_file{dyads}";

#   } elsif ($background) {
#     $dyad_options .= " -bg $background ";
#     $dyad_options .= " -org ".$organism_name if ($organism_name);

#   } elsif ($background eq "mncf") {
#     $dyad_options .= " -mncf ";
#     $dyad_options .= " -org ".$organism_name if ($organism_name);
#     $dyad_suffix .= "_mncf";
#   }

#   #### suffix for the oligo-analysis file
#   $noorf_string = "_noorf" if ($noorf);
#   $oligo_suffix = "_oligos";
#   if ($bg_seq_file) {
#     $oligo_suffix .= "_bgseq";
#   } elsif ($background) {
#     $oligo_suffix .= "_bg_".$background;
#   } elsif ($markov) {
#     $oligo_suffix .= "_mkv".$markov_order;
#   }
#   $oligo_suffix .= "_".$min_oligo_len."-".$max_oligo_len."nt";
#   $oligo_suffix .= $strands;
#   $oligo_suffix .= &ThresholdSuffix();
#   #    $oligo_suffix .= "_sig".$lth{occ_sig};
#   #    if (&IsReal($lth{ms_freq})) {$oligo_suffix .= "_msf".$lth{ms_freq} ; }
#   #    if (&IsReal($lth{ms_sig})) {$oligo_suffix .= "_mssig".$lth{ms_sig} ; }
#   $oligo_suffix .= $noov;

#   foreach my $family_name (@families) {
#     my $gene_nb = scalar(@{$family{$family_name}->{members}});
#     #	warn join ("\t", "Number of genes", $family_name, $gene_nb), "\n";

#     my $expected_sites = $gene_nb*$expected_sites_per_seq;
#     $family{$family_name}->{family_file} = $family_name."/".$family_name."_".$feature_types.".fam";
#     $family{$family_name}->{known_site_file} = $family_name."/".$family_name."_known_sites";

#     ## Prefix for files
#     $family{$family_name}->{file_prefix} = $family_name;

#     unless ($all_seq_file) {
# 	if ($size_names) { ## Old naming system, maintained only for backward compatibility
# 	    $family{$family_name}->{file_prefix} .= "_up".$seq_length unless ($sequence_file_list);
# 	} else {
# 	    $family{$family_name}->{file_prefix} .= "_from".$from."_to".$to unless ($sequence_file_list);
# 	}
# 	$family{$family_name}->{file_prefix} .= "_".$feature_types;
# 	$family{$family_name}->{file_prefix} .= $noorf_string;
# 	unless ($size_names) {	## only for backward compatibility
# 	    $family{$family_name}->{file_prefix} .= '-rm' if ($repeat_masked);
# 	}
# 	if ($taxon) {
# 	    $family{$family_name}->{file_prefix} .= "_".$taxon;
# 	    $family{$family_name}->{ortho_file} = $family_name."/".$family_name."_".$feature_types."_orthologs_".$taxon.".fam";
# 	}
#     }

#     ## Sequence files	unless ($sequence_file_list) {
#     unless ($sequence_file_list) {
#       $family{$family_name}->{seq_file} = $family_name."/".$family{$family_name}->{file_prefix}.".".$seq_format;
#       $family{$family_name}->{seq_file_purged} = $family_name."/".$family{$family_name}->{file_prefix}."_purged.${seq_format}";
#     }
#     if ($analyze_purged_sequences) {
#       $family{$family_name}->{input_seq_file} = $family{$family_name}->{seq_file_purged};
#     } else {
#       $family{$family_name}->{input_seq_file} = $family{$family_name}->{seq_file};
#     }
#     $family{$family_name}->{seq_len_file} = $family_name."/".$family{$family_name}->{file_prefix}."_seq_len.tab";

#     ## Known sites for this family
#     if ($task{validate}) {
#       #	    $family{$family_name}->{known_sites} = $family_name."/".$family{$family_name}->{file_prefix}."_known_sites.tab";
#       $family{$family_name}->{known_sites} = $family_name."/".$family_name."_known_sites.tab";
#     }

#     ################################################################
#     ## oligo-analysis results
#     $family{$family_name}->{oligo_file} =  "${family_name}/oligos_${family_name}/".$family{$family_name}->{file_prefix}.$oligo_suffix;
#     $family{$family_name}->{oligo_assembly_file} = $family{$family_name}->{oligo_file}.".asmb";
#     $family{$family_name}->{oligo_pssm_file} = $family{$family_name}->{oligo_file}."_pssm";
#     $family{$family_name}->{oligo_selection} = $family{$family_name}->{oligo_file}."_selection";
#     $family{$family_name}->{oligo_ft_prefix} = $family{$family_name}->{oligo_file};
#     #	$family{$family_name}->{selection_ft_prefix} = $family{$family_name}->{oligo_ft_prefix}."_report";
#     $family{$family_name}->{oligo_ft_file} = $family{$family_name}->{oligo_ft_prefix}.".ft";
#     $family{$family_name}->{oligo_vs_known} = $family{$family_name}->{oligo_file}."__vs__known_relw".$rel_w.".tab";
#     $family{$family_name}->{oligo_vs_known_weight_table} = $family{$family_name}->{oligo_file}."__vs__known_weigth_table.tab";
#     $family{$family_name}->{oligo_vs_known_relw_table} = $family{$family_name}->{oligo_file}."__vs__known_relw_table.tab";
#     $family{$family_name}->{oligo_vs_db} = $family{$family_name}->{oligo_file}."__vs__db_".$db_site_name."_relw".$rel_w.".tab";
#     $family{$family_name}->{oligo_vs_db_weight_table} = $family{$family_name}->{oligo_file}."__vs__db_".$db_site_name."_weigth_table.tab";
#     $family{$family_name}->{oligo_vs_db_relw_table} = $family{$family_name}->{oligo_file}."__vs__db_".$db_site_name."_relw_table.tab";

#     ################################################################
#     ## dyad-analysis results
#     $family{$family_name}->{dyad_file} = "$family_name/dyads_${family_name}/".$family{$family_name}->{file_prefix}.$dyad_suffix;
#     $family{$family_name}->{dyad_assembly_file} = $family{$family_name}->{dyad_file}.".asmb";
#     $family{$family_name}->{dyad_pssm_file} = $family{$family_name}->{dyad_file}."_pssm";
#     $family{$family_name}->{dyad_ft_prefix} = $family{$family_name}->{dyad_file};
#     $family{$family_name}->{dyad_ft_file} = $family{$family_name}->{dyad_ft_prefix}.".ft";
#     $family{$family_name}->{fmap_file} = $family{$family_name}->{dyad_file}.".".$img_format;
#     $family{$family_name}->{htmap_file} = $family{$family_name}->{dyad_file}.".html";
#     $family{$family_name}->{dyad_vs_known} = $family{$family_name}->{dyad_file}."__vs__known_relw".$rel_w.".tab";
#     $family{$family_name}->{dyad_vs_known_weight_table} = $family{$family_name}->{dyad_file}."__vs__known_weight_table.tab";
#     $family{$family_name}->{dyad_vs_known_relw_table} = $family{$family_name}->{dyad_file}."__vs__known_relw_table.tab";
#     $family{$family_name}->{dyad_vs_db} = $family{$family_name}->{dyad_file}."__vs__db_".$db_site_name."relw".$rel_w.".tab";
#     $family{$family_name}->{dyad_vs_db_weight_table} = $family{$family_name}->{dyad_file}."__vs__db_".$db_site_name."weigth_table.tab";
#     $family{$family_name}->{dyad_vs_db_relw_table} = $family{$family_name}->{dyad_file}."__vs__db_".$db_site_name."relw_table.tab";

#     ################################################################
#     ## oligos + dyads
#     $family{$family_name}->{merged_file} =  "${family_name}/merged_${family_name}/".$family{$family_name}->{file_prefix}.$oligo_suffix.$dyad_suffix;
#     $family{$family_name}->{merged_ft_file} = $family{$family_name}->{merged_file}.".ft";

#     ################################################################
#     ## Andrew Neuwald's Gibbs 95 options and files
#     my $gibbs_options = "";
#     my $gibbs_suffix = "";

#     ## input file
#     $family{$family_name}->{gibbs_input_seq_file} = $family{$family_name}->{input_seq_file}.".gibbs";
#     $gibbs_options .= " ".$family{$family_name}->{gibbs_input_seq_file};

#     ## Matrix width
#     my @gibbs_widths = ();
#     my @gibbs_expected_sites = ();
#     my $gene_nb = scalar(@{$family{$family_name}->{members}});
#     my $expected_sites = $gene_nb*$expected_sites_per_seq;
#     for my $i (1..$nmotifs) {
#       push @gibbs_widths, $matrix_width;
#       push @gibbs_expected_sites, $expected_sites;
#     }
#     $gibbs_options .= " ".join(",",  @gibbs_widths);
#     $gibbs_suffix .= "-nmotifs".$nmotifs;
#     $gibbs_suffix .= "-L".$matrix_width;

#     ## Expected number of sites
#     $gibbs_options .= " ".join(",", @gibbs_expected_sites);
#     $gibbs_suffix .= "-n".$expected_sites;

#     ## DON'T use fragmentation (i.e., column sampler)
#     $gibbs_options .= " -d";
#     $gibbs_suffix .= "-d";

#     ## Use nucleic acid alphabet
#     $gibbs_options .= " -n";
#     $gibbs_suffix .= "-n";

#     ## Random seed
#     if (defined($seed)) {
#       $gibbs_options .= " -s ".$seed;
#       $gibbs_suffix .= "-s".$seed;
#     }

#     ## Assign gibbs options to the family
#     $family{$family_name}->{gibbs_suffix} = $gibbs_suffix;
#     $family{$family_name}->{gibbs_options} = $gibbs_options;
#     $family{$family_name}->{gibbs_dir} = ${family_name}."/gibbs_".${family_name};
#     $family{$family_name}->{gibbs_file} = $family{$family_name}->{gibbs_dir}."/".$family{$family_name}->{file_prefix}.$gibbs_suffix;

#     ################################################################
#     ## Roth's AlignACE
#     my $AlignACE_options = "";
#     my $AlignACE_suffix = "";

#     $family{$family_name}->{AlignACE_input_seq_file} = $family{$family_name}->{input_seq_file}.".AlignACE";

#     ## input file
#     $AlignACE_options .= " -i ".$family{$family_name}->{AlignACE_input_seq_file};

#     ## Matrix width
#     $AlignACE_options .= " -numcols ".$matrix_width;
#     $AlignACE_suffix .= "-L".$matrix_width;

#     ## Expected number of sites
#     my $gene_nb = scalar(@{$family{$family_name}->{members}});
#     my $expected_sites = $gene_nb*$expected_sites_per_seq;
#     $alignace_options .= " -expect ".$expected_sites;
#     $AlignACE_suffix .= "-n".$expected_sites;

#     ## background fractional GC content of input sequence (0.38)
#     $AlignACE_options .= " -gcback ".$AlignACE_gcback;
#     $AlignACE_suffix .= "-gcback".sprintf("%.2f",$AlignACE_gcback);

#     ## Random seed
#     if (defined($seed)) {
#       $AlignACE_options .= " -s ".$seed;
#       $AlignACE_suffix .= "-s".$seed;
#     }

#     ## Assign AlignACE options to the family
#     $family{$family_name}->{AlignACE_suffix} = $AlignACE_suffix;
#     $family{$family_name}->{AlignACE_options} = $AlignACE_options;
#     $family{$family_name}->{AlignACE_dir} = ${family_name}."/AlignACE_".${family_name};
#     $family{$family_name}->{AlignACE_file} = $family{$family_name}->{AlignACE_dir}."/".$family{$family_name}->{file_prefix}.$AlignACE_suffix;

#     ################################################################
#     ## infogibbs
#     ## version developed by Matthieu Defrance
#     my $infogibbs_options = "";
#     my $infogibbs_suffix = "";

#     ## input file
#     $infogibbs_options .= " -i ".$family{$family_name}->{input_seq_file};

#     ## Matrix width
#     $infogibbs_options .= " -l ".$matrix_width;
#     $infogibbs_suffix .= "-l".$matrix_width;

#     ## Expected number of sites
#     my $gene_nb = scalar(@{$family{$family_name}->{members}});
#     $infogibbs_options .= " -e ".$expected_sites_per_seq;
#     $infogibbs_suffix .= "-e".$expected_sites_per_seq;

#     ## Strand
#     $infogibbs_suffix .= $strands;
#     if ($strands eq "-2str") {
#       $infogibbs_options .= " -s +- ";
#     } else {
#       $infogibbs_options .= " -s + ";
#     }

#     ## Number of motifs is not yet implemented
#     #	$infogibbs_options .= " -m ".$nmotifs;
#     #	$infogibbs_suffix .= "-m".$nmotifs;

#     ## Add other options
#     foreach my $option (@infogibbs_options) {
#       if (length($option) == 1) {
# 	$infogibbs_options .= " -".$option." ".$infogibbs_options{$option};
#       } elsif (length($option) > 1) {
# 	$infogibbs_options .= " --".$option." ".$infogibbs_options{$option};
#       }
#       unless ($option eq "bfile") {
# 	$infogibbs_suffix .= "_".$option.$infogibbs_options{$option};
#       }
#     }

#     ## Amplification factor
#     #	$infogibbs_options .= " -e ".$infogibbs_e;
#     #	$infogibbs_suffix .= "-e".$infogibbs_e;

#     ## Random seed
#     # if (defined($seed)) {
#     #     $infogibbs_options .= " -s ".$seed;
#     #     $infogibbs_suffix .= "-s".$seed;
#     # }

#     ## Assign infogibbs options to the family
#     $family{$family_name}->{infogibbs_suffix} = $infogibbs_suffix;
#     $family{$family_name}->{infogibbs_options} = $infogibbs_options;
#     $family{$family_name}->{infogibbs_dir} = ${family_name}."/infogibbs_".${family_name};
#     $family{$family_name}->{infogibbs_file} = $family{$family_name}->{infogibbs_dir}."/".$family{$family_name}->{file_prefix}.$infogibbs_suffix;

#     ################################################################
#     ## Gert Thijs' MoifSampler options and files
#     my $MS_options = "";
#     my $MS_suffix = "";

#     ## input file
#     $MS_options = " -f ".$family{$family_name}->{input_seq_file};

#     ## background model
#     if ($MS_b) {
#       $MS_options .= " -b ".$MS_b;
#     } else {
#       $family{$family_name}->{MotifSamplerBackground} = $dir{output}."/".$family{$family_name}->{input_seq_file}."_MotifSampler_bg".$MS_bg_order;
#       $MS_options .= " -b ".$family{$family_name}->{MotifSamplerBackground};
#     }

#     ## strand
#     if ($strands eq "-1str") {
#       $MS_options .= " -s 0";
#       $MS_suffix .= "-s0";
#     } else {
#       $MS_options .= " -s 1";
#       $MS_suffix .= "-s1";
#     }

#     ## prior
#     $MS_options .= " -p ".$MS_p;
#     $MS_suffix .= "-p".$MS_p;

#     ## Maximal number of motif instances per sequence. (default unset = 0)
#     $MS_options .= " -M ".$MS_M;
#     $MS_suffix .= "-M".$MS_M;

#     ## Sets number of different motifs to search for (default 1).
#     $MS_options .= " -n ".$MS_n;
#     $MS_suffix .= "-n".$MS_n;

#     ## Sets length of the motif (default 8).
#     $MS_options .= " -w ".$matrix_width;
#     $MS_suffix .= "-w".$matrix_width;

#     ## Sets allowed overlap between different motifs. (default 1)
#     $MS_options .= " -x ".$MS_x;
#     $MS_suffix .= "-x".$MS_x;

#     ## Set number of times the MotifSampler should be repeated
#     $MS_options .= " -r ".$MS_r;
#     $MS_suffix .= "-r".$MS_r;

#     ## output file

#     $family{$family_name}->{MotifSampler_dir} = "${family_name}/MotifSampler_${family_name}";
#     $family{$family_name}->{MotifSampler_file} = $family{$family_name}->{MotifSampler_dir}."/".$family{$family_name}->{file_prefix}.$MS_suffix.".sites";
#     $MS_options .= " -o ".$family{$family_name}->{MotifSampler_file};

#     $family{$family_name}->{MotifSampler_matrix} = $family{$family_name}->{MotifSampler_dir}."/".$family{$family_name}->{file_prefix}.$MS_suffix.".matrix";
#     $MS_options .= " -m ".$family{$family_name}->{MotifSampler_matrix};

#     ## Assign MotifSampler options to the family
#     $family{$family_name}->{MotifSampler_suffix} = $MS_suffix;
#     $family{$family_name}->{MotifSampler_options} = $MS_options;


#     ################################################################
#     ## MEME
#     my $meme_options = "";
#     my $meme_suffix = "";

#     ## input file
#     $family{$family_name}->{meme_input_seq_file} = $family{$family_name}->{input_seq_file}.".meme";
#     $meme_options .= " ".$family{$family_name}->{meme_input_seq_file};

#     ## strands
#     $meme_suffix .= $strands;
#     unless ($strands eq "-1str") {
#       $meme_options .= " -revcomp";
#     }

#     ## If bacgrkound sequences have been specified, check that
#     ## background model file exists.
#     if ($bg_seq_file) {
#       &define_bg_model_files() unless (defined( $bg_model_file{"meme"}));
#       #	  &compute_bg_meme() unless (-e $bg_model_file{meme});
#       $meme_options .= " -bfile ".$bg_model_file{meme};
#     }

#     ## Background frequency file specified with the option -MEME_bfile
#     if ($MEME_bfile) {
#       $meme_options .= " -bfile ".$MEME_bfile;
#     }

#     foreach my $option (@MEME_options) {
#       $meme_options .= " -".$option." ".$MEME_options{$option};
#       unless ($option eq "bfile") {
# 	$meme_suffix .= "_".$option.$MEME_options{$option};
#       }
#     }

#     ## Output file
#     $family{$family_name}->{meme_dir} = ${family_name}."/"."meme_".${family_name};
#     $family{$family_name}->{meme_file} = $family{$family_name}->{meme_dir}."/".$family{$family_name}->{file_prefix}.$meme_suffix;
#     $meme_options .= " > ".$family{$family_name}->{meme_file};

#     $family{$family_name}->{meme_options} = $meme_options;
#     $family{$family_name}->{meme_suffix} = $meme_suffix;

#     ################################################################
#     ## orm

#     my $orm_options = "";

#     ## Options for orm
#     my $orm_suffix = "";

#     ## Word length
#     $orm_options .= " --length=".$orm_word_length;
#     $orm_suffix .= "w".$orm_word_length;

#     ## Location
#     #my $orm_location = $from.":".$to;
#     #$orm_options .= " --location=".$orm_location;
#     #$orm_suffix .= $from."_".$to;

#     ## Window size
#     $orm_options .= " --window=".$orm_window;
#     $orm_suffix .= "_W".$orm_window;

#     ## Strand
#     $orm_options .= " --strand=".$orm_strand;
#     if ($orm_strand eq "+-") {
#       $orm_suffix .= "-2str";
#     } else {
#       $orm_suffix .= "-1str";
#     }

#     ## Self-overlapping words
#     if ($orm_overlap) {
#       $orm_options .= " --overlap";
#       $orm_suffix .= "-ovlp";
#     } else {
#       $orm_suffix .= "-noov";
#     }

#     ## Markov model
#     if ($orm_markov_order >= 0) {
#       $orm_options .= " --markov=".$orm_markov_order;
#       $orm_suffix .= "_m".$orm_markov_order;
#     } elsif ($exp_freq_file{oligos}) {
#       ## Convert relative path in absolute path
#       unless ($exp_freq_file{oligos} =~ /^\//) {
# 	$exp_freq_file{oligos} = $dir{main}."/".$exp_freq_file{oligos};

#       }
#       $orm_options .= " --bgoligo=". $exp_freq_file{oligos};

#     }

#     ## Number of top motifs
#     $orm_options .= " --max=rank ".$orm_uth_rank;
#     $orm_suffix .= "_rank".$orm_uth_rank;

#     ## Occurrence significance
#     $orm_options .= " --min=occ_sig ".$orm_lth_occ_sig;
#     $orm_suffix .= "_occ_sig".$orm_lth_occ_sig;
#     $orm_options .= " --sort=-occ_sig";

#     ## Window width
#     $orm_options .= " --min=width ".$orm_lth_width;
#     $orm_suffix .= "_minwidth".$orm_lth_width;

#     ## Word rank
#     $orm_options .= " --max=w_rank ".$orm_uth_wrank;
#     $orm_suffix .= "_wrank".$orm_uth_wrank;

#     ## Fixed window size
#     #	if ($ORM_fixedsizewindow) {
#     #	    $orm_options .= " --fixedsizewindow";
#     #	    $orm_suffix .= "_z";
#     #	}

#     ## input file
#     $orm_options .= " --input=".$family{$family_name}->{input_seq_file};

#     ## Output file
#     $family{$family_name}->{orm_dir} = ${family_name}."/"."orm_".${family_name};
#     $family{$family_name}->{orm_file} = $family{$family_name}->{orm_dir}."/".$family{$family_name}->{file_prefix}."_".$orm_suffix;
#     $orm_options .= " --output=".$family{$family_name}->{orm_file};
#     $family{$family_name}->{orm_options} = $orm_options;
#     $family{$family_name}->{orm_suffix} = $orm_suffix;

#     $family{$family_name}->{orm_assembly_file} = $family{$family_name}->{orm_file}.".asmb";
#     $family{$family_name}->{orm_pssm_file} = $family{$family_name}->{orm_file}."_pssm";
#     #	$family{$family_name}->{orm_selection} = $family{$family_name}->{orm_file}."_selection";
#     $family{$family_name}->{orm_ft_prefix} = $family{$family_name}->{orm_file};
#     #	$family{$family_name}->{selection_ft_prefix} = $family{$family_name}->{orm_ft_prefix}."_report";
#     $family{$family_name}->{orm_ft_file} = $family{$family_name}->{orm_ft_prefix}.".ft";
#     $family{$family_name}->{orm_vs_known} = $family{$family_name}->{orm_file}."__vs__known_relw".$rel_w.".tab";
#     $family{$family_name}->{orm_vs_known_weight_table} = $family{$family_name}->{orm_file}."__vs__known_weigth_table.tab";
#     $family{$family_name}->{orm_vs_known_relw_table} = $family{$family_name}->{orm_file}."__vs__known_relw_table.tab";

#     ################################################################
#     ## Consensus options and files
#     my $consensus_options = "";
#     my $consensus_suffix = "";

#     ## Number of matrices to print
#     $consensus_options .= " -pf 1 -pt 1";

#     ## strands
#     if ($strands eq "-1str") {
#       $consensus_options .= " -c0"; ## Single-strand search
#       $consensus_suffix .= "-c0";
#     } else {
#       $consensus_options .= " -c2"; ## Double-strand search
#       $consensus_suffix .= "-c2";
#     }

#     ## Matrix width
#     $consensus_options .= " -L ".$matrix_width;
#     $consensus_suffix .= "-L".$matrix_width;

#     ## alphabet
#     $consensus_options .= " -A a:t 0.325 c:g 0.175";

#     ## Use designated prior frequencies
#     #    $consensus_options .= " -d";

#     ## Expected number of sites
#     $consensus_options .= " -n ".$expected_sites;
#     $consensus_suffix .= "-n".$expected_sites;
#     $family{$family_name}->{consensus_suffix} = $consensus_suffix;
#     $family{$family_name}->{consensus_options} = $consensus_options;
#     $family{$family_name}->{consensus_dir} = "${family_name}/consensus_${family_name}";
#     $family{$family_name}->{consensus_file} = $family{$family_name}->{consensus_dir}."/".$family{$family_name}->{file_prefix}.$consensus_suffix;
#   }


#   ################################################################
#   ## Suffix for the synthetic table
#   if ($family_file) {
#     $table_suffix = $family_file;
#   } elsif ($sequence_file_list) {
#     $table_suffix .= $sequence_file_list;
#     $table_suffix =~  s/\.tab$//;
#     $table_suffix =~  s/\.txt$//;
#   } else {
#     $table_suffix = $organism_name;
#   }
#   if ($bg_seq_file) {
#     $table_suffix .= "_bgseq";
#   } elsif ($background) {
#     $table_suffix .= "_bg_${background}";
#   } else {
#     $table_suffix .= "_mkv${markov_order}";
#   }

#   if ($family_file) {
#     $table_suffix .= "_up".$from     if (defined($from));
#     $table_suffix .= "_".$to     if (defined($to));
#     $table_suffix .= "_".$feature_types  if (defined($feature_types));
#     $table_suffix .= $noorf_string;
#   }
#   if ($analyze_purged_sequences) {
#     $table_suffix .= "-purge";
#   } else {
#     $table_suffix .= "-nopurge";
#   }
#   $table_suffix .= "_".$min_oligo_len."nt";
#   $table_suffix .= "_".$max_oligo_len."nt";
#   $table_suffix .= $noov;
#   $table_suffix .= $strands;
#   $table_suffix .= &ThresholdSuffix();
#   #    $table_suffix .= "_sig".$lth{occ_sig};
#   $table_suffix .= "_mtx_width".$matrix_width;
#   $table_suffix = `basename $table_suffix`;
#   chomp $table_suffix;

#   ################################################################
#   ## Name of the report for the Motif Discovery Competition 2004
#   $dir{mdc_report} = "mdc_report";
#   $outfile{results} = $dir{mdc_report}."/".$table_suffix."_results.txt";
#   $outfile{parameters} = $dir{mdc_report}."/".$table_suffix."_parameters.txt";
# }

# ################################################################
# ##################### SUBROUTINE DEFINITION ####################


# ## ##############################################################
# ## Store the sequences for one family if sequences have been provided
# ## with the option -all_seq
# sub StoreSequences  {
#   my @members = @{$family{$family_name}->{members}};
#   $out = &OpenOutputFile($family{$family_name}->{seq_file});
#   foreach my $member (@members) {
#     if (defined($all_sequences{lc($member)})) {
#       my $current_seq = $all_sequences{lc($member)};
#       my $current_id = $member;
#       &PrintNextSequence($out, $seq_format, 0, $current_seq, $current_id);
#     } else {
#       &RSAT::message::Warning("No sequence with ID", $member,"in  file", $all_seq_file);
#     }
#   }
#   close $out;
#   &RSAT::message::Warning($family_name, "Stored sequence in file", $family{$family_name}->{seq_file}) if ($main::verbose >= 2);
# }

# ################################################################
# #### retrieve upstream sequences
# sub RetrieveSequences {
#   &RSAT::message::TimeWarn ("Retrieving upstream sequences for family ".$family_name, $organism_name) if ($verbose >= 2);
#   my $command;
#   if ($taxon) {
#     $command = "get-orthologs";
#     $command .= " -org ". $organism_name;
#     $command .= " -taxon ". $taxon;
#     $command .= " -i ".$family{$family_name}->{family_file};
#     $command .= " -o ".$family{$family_name}->{ortho_file};
#     $command .= "; retrieve-seq-multigenome";
#     $command .= " -label organism_name,id,name ";
#     $command .= " -i ".$family{$family_name}->{ortho_file};
#   } else {
#     $command = "retrieve-seq";
#     $command .= " -label id,name ";
#     $command .= " -i ".$family{$family_name}->{family_file};
#     $command .= " -org ". $organism_name;
#   }
#   $command .= " -imp_pos -type ".$seq_type;
#   $command .= " -noorf " if ($noorf);
#   $command .= " -rm " if ($repeat_masked);
#   $command .= " -o ".$family{$family_name}->{seq_file};
#   $command .= " -from ".$from if (defined($from));
#   $command .= " -to ".$to if (defined($to));
#   $command .= " -feattype $feature_types " if ($feature_types);
#   $command .= " -format $seq_format";
#   if ($batch) {
#     push @main::batch_commands, $command;
#   } else {
#     &doit($command, $dry_run, $die_on_error, $verbose);
#   }
# }

# sub RetrieveSequencesEnsEMBL {
#   &RSAT::message::TimeWarn ("Retrieving upstream sequences for family ".$family_name, $organism_name) if ($verbose >= 2);
#   $organism_name =~ s/_EnsEMBL//;
#   my $command;
#   $command = "retrieve-ensembl-seq.pl";
#   #    $command .= " -label id,name ";
#   $command .= " -ensemblhost xserve2 -alltranscripts";
#   $command .= " -i ".$family{$family_name}->{family_file};
#   $command .= " -org ". $organism_name;
#   $command .= " -type upstream -maskcoding";
#   $command .= " -noorf " if ($noorf);
#   $command .= " -rm " if ($repeat_masked);
#   $command .= " -o ".$family{$family_name}->{seq_file};
#   $command .= " -from ".$from if (defined($from));
#   $command .= " -to ".$to if (defined($to));
#   $command .= " -feattype $feature_types " if ($feature_types);
#   #    $command .= " -format $seq_format";
#   if ($batch) {
#     push @main::batch_commands, $command;
#   } else {
#     &doit($command, $dry_run, $die_on_error, $verbose);
#   }
# }

# ################################################################
# #### Purge input sequences
# sub PurgeSequences {
#   &RSAT::message::TimeWarn("Purging sequences for family\t".$family_name) if ($verbose >= 2);

#   ## Remove sequences of length 0
#   $command = "convert-seq -dna -from fasta -to fasta -lw 0 -skip_short 1";
#   $command .= " -i ".$family{$family_name}->{seq_file};
#   $command .= " | purge-sequence -format fasta -ml ".$purge_ml." -mis ".$purge_mis;
#   $command .= " -o ".$family{$family_name}->{seq_file_purged};
#   if ($batch) {
#     push @main::batch_commands, $command;
#   } else {
#     &doit($command, $dry_run, $die_on_error, $verbose);
#   }
# }


# ################################################################
# ### Calculate options and file names according to the selected
# ### parameters
# sub CalcOligoOptions {
#   my ($oligo_len) = @_;
#   $oligo_options = "-v";
#   #   $oligo_options .= " -lth occ 1 "; ## This is to avoid calculating P-value for patterns not rpesent in the dataset, and for having a correct correction for multi-testing.
#   $oligo_options .= " -two_tails" if ($two_tails);
#   $oligo_options .= " -pseudo ".$oligo_pseudo if ($oligo_pseudo);
# #  $oligo_options .= " -quick ";
#   $oligo_options .= " -sort ";
#   $oligo_options .= " ".$strands;
#   $oligo_options .= " -l ".$oligo_len;
#   $oligo_options .= &ThresholdOptions();
#   #   $oligo_options .= " -lth occ_sig ".$lth{occ_sig};
#   #   if (&IsReal($lth{ms_freq})) {$oligo_options .= " -thmsf ".$lth{ms_freq} ; }
#   #   if (&IsReal($lth{ms_sig})) {$oligo_options .= " -thmssig ".$lth{ms_sig} ; }

#   if ($mask) {
#     $oligo_options .= " -mask ".$mask;
#   }

#   if ($bg_seq_file) {
#     ## Background model estimated from user-specified background
#     ## sequences. If the background model file does not exist, compute
#     ## it from background sequences.
#     my $bg_model_file = $bg_model_file{"oligos_".$oligo_len."nt"};
#     &define_bg_model_files() unless (defined($bg_model_file));
#     &compute_bg_oligos() unless (-e $bg_model_file);
#     $oligo_options .= " -expfreq ".$bg_model_file;

#   } elsif ($exp_freq_file{oligos}) {
#     ### Manually specified expected frequencies

#     ## Convert relative path in absolute path
#     unless ($exp_freq_file{oligos} =~ /^\//) {
#       $exp_freq_file{oligos} = $dir{main}."/".$exp_freq_file{oligos};
#     }
#     $oligo_options .= " -expfreq ".$exp_freq_file{oligos};

#   } elsif ($background eq "calib1") {
#     ### Single-sequence based calibrated occurrences (mean and var per sequence)
#     my $calib_length = $family{$family_name}->{calib_length};
#     my $calib_file = &CalibrationPrefix($calib_length, $oligo_len);
#     $calib_file .= "_negbin.tab";
#     ### Check whether the calibration file exists
#     unless ((-e $calib_file) || (-e $calib_file.".gz")) {
#       &RSAT::error::FatalError ("Calibration file not found\t".$calib.file);
#     }
#     $oligo_options .= " -calib1 ".$calib_file;
#     &RSAT::message::Info("Oligonucleotide calibration", $calib_length, $calib_file) if ($verbose >= 3);

#   } elsif ($background eq "calibN") {
#     ### Set-based calibrated occurrences (mean and var per set of N sequences)
#     my $calib_length = $family{$family_name}->{calib_length};
#     my $N = scalar(@{$family{$family_name}->{members}});
#     my $calib_file = &CalibrationPrefix($calib_length, $oligo_len, $N);
#     $calib_file .= "_negbin.tab";

#     ### Check whether the calibration file exists
#     unless ((-e $calib_file) || (-e $calib_file.".gz")) {
#       ### temporary: if there is no R10000 file, try R1000
#       $calib_file =~ s/10000/1000/g;
#       if ((-e $calib_file) || (-e $calib_file.".gz")) {
# 	&RSAT::message::Warning("Using calibration with 1000 repetitions\t".$calib_file);
#       } else {
# 	### temporary: if there is no R1000 file, try R100
# 	$calib_file =~ s/1000/100/g;
# 	if ((-e $calib_file) || (-e $calib_file.".gz")) {
# 	  &RSAT::message::Warning("Using calibration with 100 repetitions\t".$calib_file);
# 	} else {
# 	  &RSAT::error::FatalError ("Calibration file not found\t".$calib_file);
# 	}
#       }
#     }
#     $oligo_options .= " -calibN ".$calib_file;
#     &RSAT::message::Info("Oligonucleotide calibration", $calib_length, $calib_file) if ($verbose >= 3);

#   } elsif (($organism_name) && ($background)) {
#     ### Pre-calculated expected frequency files
#     $oligo_options .= " -bg ".$background;
#     $oligo_options .= " -org ".$organism_name;

#   } elsif ($markov) {
#     ### Markov chain model
#     $oligo_options .= " -markov ".$markov_order;

#   } else {
#     &RSAT::error::FatalError("You must specify a method for estimating the background model.");
#   }
#   $oligo_options .= " -return occ,mseq,freq,proba,rank,zscore,ratio";
#   $oligo_options .= " ".$noov;

#   my $oligo_suffix = "_oligos";
#   if ($bg_seq_file) {
#     $oligo_suffix .= "_bgseq";
#   } elsif ($background) {
#     $oligo_suffix .= "_bg_".$background;
#   } elsif ($markov) {
#     $oligo_suffix .= "_mkv".$markov_order;
#   }
#   $oligo_suffix .= "_${oligo_len}nt";
#   $oligo_suffix .= $strands;
#   $oligo_suffix .= &ThresholdSuffix();
#   #   $oligo_suffix .= "_sig".$lth{occ_sig};
#   #   $oligo_suffix .= "_thmsf".$lth{ms_freq} if (&IsReal($lth{ms_freq}));
#   #   $oligo_suffix .= "_thmssig".$lth{ms_sig} if (&IsReal($lth{ms_sig}));
#   $oligo_suffix .= $noov;

#   $oligo_file = "$family_name/oligos_${family_name}/".$family{$family_name}->{file_prefix}.$oligo_suffix;
#   $command = "oligo-analysis -i $family{$family_name}->{input_seq_file} -format $seq_format -o $oligo_file $oligo_options";

#   return ($command, $oligo_file, $oligo_suffix, $oligo_options);
# }


# ################################################################
# ### oligo-analysis
# sub OligoAnalysis {
#   &RSAT::message::TimeWarn("Analyzing oligonucleotides for family $family_name",
# 			   "min len: $min_oligo_len",
# 			   "max len: $max_oligo_len") if ($verbose >= 2);
#   my $dir = "${family_name}/oligos_${family_name}";

#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir);
#   &RSAT::error::FatalError( "Cannot read file $family{$family_name}->{input_seq_file}")
#     unless (($batch)  || ($dry_run) || ($dry_run) || (-r $family{$family_name}->{input_seq_file}));

#   #    system "echo '' > $family{$family_name}->{oligo_file}"; # if ($verbose >= 2);

#   ## Analyze all oligo lengths
#   my @oligo_commands = ();
#   for $oligo_len ($min_oligo_len..$max_oligo_len) {
#       my ($one_oligo_command, $one_oligo_file) = &CalcOligoOptions($oligo_len);
#       push @oligo_commands, $one_oligo_command;
#       #	&doit($command, $dry_run, $die_on_error, $verbose);
#       #	system "cat $oligo_file >> $family{$family_name}->{oligo_file}";
#   }
  
#   ## Merge the results obtained with different oligo lengths
#   my $merge_command = &MergeOligoLengths();
#   push @oligo_commands, $merge_command;

#   ## Assemble the merged patterns
#   my $assemble_command = &AssembleOligos();
#   push @oligo_commands, $assemble_command;
  
#   if ($batch) {
#       push @main::batch_commands, @oligo_commands;
#   } else {
#       my $command = join "; \\\n", @oligo_commands;
#       &doit($command, $dry_run, $die_on_error, $verbose);
#   }
# }

# ################################################################
# ## Merge the results of oligo-analysis for different oligo lengths
# sub MergeOligoLengths {
#   &RSAT::message::TimeWarn("Merging oligonucleotides for family ".$family_name,
# 			   "min len: ".$min_oligo_len,
# 			   "max len:".$max_oligo_len,
# 			   $family{$family_name}->{oligo_file}) if ($verbose >= 2);
#   my $dir = "${family_name}/oligos_${family_name}";

#    chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir);
#    &RSAT::error::FatalError("Cannot read file ".$family{$family_name}->{input_seq_file}) unless (($batch) || ($dry_run) ||(-r $family{$family_name}->{input_seq_file}));

#   ## initialize the oligo file
# #  my @merge_commands = "echo '' > $family{$family_name}->{oligo_file}";
#   my @merge_commands = "rm -f ".$family{$family_name}->{oligo_file};
#   for $oligo_len ($min_oligo_len..$max_oligo_len) {
#     ($command, $oligo_file) = &CalcOligoOptions($oligo_len);
#     push @merge_commands, "cat ".$oligo_file." >> ".$family{$family_name}->{oligo_file};
#   }

#    my $merge_command = join "; \\\n", @merge_commands;
#    if ($task{oligos}) {
#        return $merge_command;
#    } else {
#        if ($batch) {
# 	   push @main::batch_commands, @merge_commands;
#        } else {
# 	   &doit($merge_command, $dry_run, 0, $verbose);
#        }
#    }
# }


# ################################################################
# ### Merge the results of oligo-analysis for different oligo lengths
# sub AssembleOligos {
#   &RSAT::message::TimeWarn("Merging oligonucleotides for family ".$family_name,
# 			   "min len: ".$min_oligo_len,
# 			   "max len:".$max_oligo_len,
# 			   $family{$family_name}->{oligo_file}) if ($verbose >= 2);
#   my $dir = "${family_name}/oligos_${family_name}";

#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir);
#   &RSAT::error::FatalError("Cannot read file ".$family{$family_name}->{input_seq_file}) unless (($batch) || ($dry_run) ||(-r $family{$family_name}->{input_seq_file}));
#   my @assemble_commands = ();

#   #    ## initialize the oligo file
#   #    my @assemble_commands = "echo '' > $family{$family_name}->{oligo_file}"; # if ($verbose >= 2);
#   #    for $oligo_len ($min_oligo_len..$max_oligo_len) {
#   # 	($command, $oligo_file) = &CalcOligoOptions($oligo_len);
#   # 	push @assemble_commands, "cat ".$oligo_file." >> ".$family{$family_name}->{oligo_file};
#   #    }

#   ### pattern assembly
#   &RSAT::message::TimeWarn("Assembling oligo patterns", $family{$family_name}->{oligo_assembly_file})
#     if ($verbose >= 2);
#   my $assembly_command = "pattern-assembly -v 1";
#   $assembly_command .= " -maxfl 1 -toppat ".$toppat." -subst 1 ".$strands;
#   $assembly_command .= " -max_asmb_nb ".$nmotifs;
#   $assembly_command .= " -i ".$family{$family_name}->{oligo_file};
#   $assembly_command .= " -o ".$family{$family_name}->{oligo_assembly_file};
#   push @assemble_commands, $assembly_command;

#   ### extract PSSM from assembled patterns
#   #    &RSAT::message::TimeWarn("Matrix from oligo patterns", $family{$family_name}->{oligo_pssm_file})
#   #        if ($verbose >= 2);
#   #    my $pssm_command = "matrix-from-patterns -v 1 ";
#   #    $pssm_command .= " -seq ".$family{$family_name}->{seq_file};
#   #    $pssm_command .= " -asmb ".$family{$family_name}->{oligo_assembly_file};
#   #    $pssm_command .= " -bginput -markov 0 ";
#   #    $pssm_command .= " -o ".$family{$family_name}->{oligo_pssm_file};
#   if ($task{pssm}) {
#       $pssm_command = &MatrixFromPatterns($family{$family_name}->{oligo_assembly_file},
# 					  $family{$family_name}->{oligo_pssm_file}, "oligos");
#   }
#   push @assemble_commands, $pssm_command if ($task{pssm});

#   my $assemble_command = join "; \\\n", @assemble_commands;
#   if ($task{oligos}) {
#     return $assemble_command;
#   } else {
#     if ($batch) {
#       push @main::batch_commands, @assemble_commands;
#     } else {
#       &doit($assemble_command, $dry_run, 0, $verbose);
#     }
#   }
# }


# ################################################################
# ## Match discovered patterns (oligos and dyads only)
# sub MatchPatterns {
#    my ($oligo_file, $fmap_prefix, $options) = @_;

#    ## check file names
#    $oligo_file = $family{$family_name}->{oligo_file} unless ($oligo_file);
#    $fmap_prefix = $oligo_file unless ($fmap_prefix);

#    ### pattern matching
#    &RSAT::error::FatalError("Cannot read file $family{$family_name}->{seq_file}") unless (($batch) || ($dry_run) || (-r $family{$family_name}->{seq_file}));
#    my $feature_file = $fmap_prefix.".ft";
#    &RSAT::message::Info("Matching patterns",
# 			"\n;\t", $oligo_file,
# 			"\n;\t", $feature_file,
# 		       )  if ($verbose >= 2);

#    my $command = "dna-pattern -i ".$family{$family_name}->{seq_file};
#    $command .= " -format ".$seq_format;
#    $command .= " -pl ".$oligo_file;
#    $command .= " -origin ".$map_origin;
#    $command .= " ".$strands;
#    $command .= " -return sites,limits ";
#    $command .= $options;
#    $command .= " | features-from-dnapat -o ".$feature_file;

#    ## Check if pattern matching is OK (sometimes there are no patterns)
#    if ($batch) {
#        push @main::batch_commands, $command;
#    } else {
#        my $error = &doit($command, $dry_run, 0, $verbose);
#        if ($error) {
# 	   &RSAT::message::Warning("Feature map skipped because dna-pattern returned an error");
# 	   return;
#        }
#    }
# }


# ################################################################
# ## Scan the sequences with a position-specific scoring matrix
# sub ScanSequences {
#    my ($matrix_file, $matrix_format, $fmap_prefix, $options) = @_;

#    ## check file names
#    &RSAT::error::FatalError("ScanSequences require to specify a matrix file") unless ($matrix_file);
#    &RSAT::error::FatalError("ScanSequences require to specify a matrix format") unless ($matrix_format);
#    &RSAT::error::FatalError("ScanSequences require to specify a prefix for the scanning result") unless ($fmap_prefix);

#    ### pattern matching
#    &RSAT::error::FatalError("Cannot read file $family{$family_name}->{seq_file}") unless (($batch) || ($dry_run) || (-r $family{$family_name}->{seq_file}));
#    my $feature_file = $fmap_prefix.".ft";
#    &RSAT::message::Info("Scaning sequences with matrix",
# 			"\n;\t", $matrix_file,
# 			"\n;\t", $feature_file,
# 		       )  if ($verbose >= 2);

#    my $command = "$matrix_scan_cmd -v 1 -bginput -markov 1 -i ".$family{$family_name}->{seq_file};
#    $command .= " ".${quick_scan};
#    $command .= " -seq_format ".$seq_format;
#    $command .= " -matrix_format ".$matrix_format;
#    $command .= " ".$strands;
#    $command .= " -m ".$matrix_file;
#    $command .= " -consensus_name";
#    $command .= " -origin ".$scan_origin;
#    $command .= " -return limits,sites,pval,normw ";
#    $command .= " -o ".$feature_file;
#    $command .= $options;

# #   die $command, "\n";

#    ## Check if scanning is OK (sometimes there are no matrices)
#    if ($batch) {
#        push @main::batch_commands, $command;
#    } else {
#        my $error = &doit($command, $dry_run, 0, $verbose);
#        if ($error) {
# 	   &RSAT::message::Warning("Feature map skipped because matrix-scan returned an error");
# 	   return;
#        }
#    }
# }

# ################################################################
# #### draw feature-map for the result of dna-pattern
# sub DrawFeatureMap {
#    my ($fmap_prefix, $title, $options) = @_;

#    ## check file names
#    $fmap_prefix = $family{$family_name}->{oligo_ft_prefix} unless ($fmap_prefix);
#    $title = &ShortFileName($fmap_prefix) unless ($title);

#    ### pattern matching
#    &RSAT::error::FatalError("Cannot read file $family{$family_name}->{seq_file}") unless (($batch) || ($dry_run) || (-r $family{$family_name}->{seq_file}));
#    my $feature_file = $fmap_prefix.".ft";
#    $fmap_file = $fmap_prefix.".".$img_format;
#    $htmap_file = $fmap_prefix.".html";
#    &RSAT::message::Info("Drawing feature-map",
# 			"\n;\t", $feature_file,
# 			"\n;\t", $fmap_file,
# 			"\n;\t", $htmap_file,
# 		       )  if ($verbose >= 2);

#    ## feature-map drawing
#    if ($family{$family_name}->{calib_length}) {
# 	$from = -$family{$family_name}->{calib_length};
# 	$to = -1;
#    }
#    $command = "feature-map -i ".$feature_file;
#    $command .= " -format ".$img_format;
#    $command .= " -minfthick 2";
#    $command .= " -scalebar -scalestep $scalestep -legend ";
#    $command .= " -title '".$title."'";
# #   $command .= " -from $from" if (defined($from));
# #   $command .= " -to $to" if (defined($to));
#    $command .= " -o $fmap_file -scorethick ";
#    $command .= " ".$options;
#    if ($htmaps) {
# 	$command .= " -htmap > $htmap_file ";
#    };

#    ## Delete the feature file to save disk space (the feature space
#    ## occupy half od the result directory)
#    $command .= "; rm -f ".$feature_file;

#    if ($batch) {
#        push @main::batch_commands, $command;
#    } else {
#        &doit($command, $dry_run, 0, $verbose);
#    }
# }

# ################################################################
# ### dyad analysis
# sub DyadAnalysis {
#   &RSAT::message::TimeWarn("Analyzing dyads for family $family_name",
# 			   "monad length: $monad_length",
# 			   "min_sp: $min_sp",
# 			   "max_sp: $max_sp",
# 			  ) if ($verbose >= 2);
#   $dir = "${family_name}/dyads_${family_name}";
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir);
#   &RSAT::error::FatalError("Cannot read file $family{$family_name}->{input_seq_file}") unless (($batch) || ($dry_run) || (-r $family{$family_name}->{input_seq_file}));

#   if ($bg_seq_file) {
#     &compute_bg_dyads() unless (-e $bg_model_file{dyads});
#   }

#   ## build the dyad-analysis command
#   $command = "dyad-analysis";
#   $command .= " -return occ,proba,rank,zscore,ratio";
#   $command .= " -i ".$family{$family_name}->{input_seq_file};
#   $command .= " -format ".$seq_format;
#   $command .= " -o ".$family{$family_name}->{dyad_file};
#   $command .= " ".$dyad_options;
#   $command .= " -org ".$organism_name if ($org_fam);
#   if ($mask) {
#     $command .= " -mask ".$mask;
#   }

#   ### pattern assembly
#   &RSAT::message::TimeWarn("Assembling dyad patterns", $family{$family_name}->{dyad_assembly_file})
#     if ($verbose >= 2);
#   $command .= "; pattern-assembly -v 1";
#   $command .=  " -maxfl 1 -toppat ".$toppat;
#   $command .=  " ".${strands};
#   $command .= " -max_asmb_nb ".$nmotifs;
#   $command .= " -i ".$family{$family_name}->{dyad_file};
#   $command .= " -o ".$family{$family_name}->{dyad_assembly_file};

#   ### extract PSSM from assembled patterns
#   #    $command = "echo TEST";
#   if ($task{pssm}) {
#     $command .= "; ";
#     $command .= &MatrixFromPatterns($family{$family_name}->{dyad_assembly_file},
# 				   $family{$family_name}->{dyad_pssm_file}, "dyads");
#   }

#   if ($batch) {
#     push @main::batch_commands, $command;
#   } else {
#     &doit($command, $dry_run, $die_on_error, $verbose);
#   }
# }


# ################################################################
# ## Convert assembled patterns (dyads, oligos) into PSSMs
# sub MatrixFromPatterns {
#   my ($assembly_file, $pssm_file, $type) = @_;
#   &RSAT::message::TimeWarn("Matrix from patterns", $type, $pssm_file)
#     if ($verbose >= 2);
#   my $command = "matrix-from-patterns -v 1 ";
#   $command .= " -seq ".$family{$family_name}->{seq_file};
#   $command .= " -asmb ".$assembly_file;
#   $command .= " -bginput -markov 0 ";
#   $command .= " -max_asmb_nb ".$nmotifs;
#   $command .= " -flanks 2";
#   $command .= " -logo -logo_format png";
#   $command .= " -logo_file ".$pssm_file."_logo";
#   $command .= " -o ".$pssm_file;

# #  my $pssm_dir = `dirname $pssm_file`;
# #  chomp($pssm_dir);
# #  $command .= "; convert-matrix";
# #  $command .= " -i ".$pssm_file."_count_matrices.tf";
# #  $command .= " -from transfac -to tab -return logo -logo_format png";
# #  $command .= " -logo_dir ".$pssm_dir;
#   return ($command);
# }

# ################################################################
# ## Compare discovered motifs with motif database
# sub MotifsVsDB {
#   my ($matrix_file, $matrix_format, $db_file, $db_format, $compa_prefix) = @_;
#   &RSAT::message::TimeWarn("Comparing motifs to dabatase") if ($main::verbose >= 2);

#   my $cmd = "compare-matrices -v 2 ";
#   $cmd .= " -file1 ".$matrix_file;
#   $cmd .= " -format1 ".$matrix_format;
#   $cmd .= " -file2 ".$db_file;
#   $cmd .= " -format2 ".$db_format;
#   $cmd .= " -DR";
#   $cmd .= " -sort cor";
#   $cmd .= " -uth rank 1";
#   $cmd .= " -lth w 5";
#   $cmd .= " -lth cor 0.85";
#   $cmd .= " -lth Ncor 0.4";
#   $cmd .= " -return matrix_name,direction,Ncor,SW,cor,width,consensus";
# # $cmd .= " -return aligned_matrices";
#   $cmd .= " -o ".$compa_prefix.".tab";
#   $cmd .= " -out_matrices ".$compa_prefix."_matrices.tab";

#   my $wd = `pwd`;
#   &RSAT::message::Debug("Working dir", $wd, "\n", $cmd) if ($main::verbose >= 0);
#   if ($batch) {
#     push @main::batch_commands, $cmd;
#   } else {
#     &doit($cmd, $dry_run, 0, $verbose);
#   }
# }

# ################################################################
# ## Merge patterns detected by oligo-analysis and dyad-analysis
# sub MergePatterns {
#   my $filter_dyads = 1;
#   &RSAT::message::TimeWarn("Merging oligonucleotides and dyads for family ",
# 			   $family_name,
# 			   $family{$family_name}->{merged_file}
# 			  ) if ($verbose >= 2);
#    my $dir = "${family_name}/merged_${family_name}";
#    chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir);
#    my $command = "cat $family{$family_name}->{oligo_file}";
# #    $command .= " | grep -v '^;'";
#    $command .= " > $family{$family_name}->{merged_file};";
#    $command .= " cat $family{$family_name}->{dyad_file}";
# #    $command .= " | grep -v '^;'";
#    if ($filter_dyads) {
# 	$command .= " | grep -v '\{0\}'";
#    } else {
# 	$command .= " | perl -pe 's/n\{0\}//g'";
#    }
#    $command .= " >> $family{$family_name}->{merged_file};";
#    $command .= " wc $family{$family_name}->{merged_file}";
#    if ($batch) {
#        push @main::batch_commands, $command;
#    } else {
#        &doit($command, $dry_run, $die_on_error, $verbose);
#    }
#    &DrawMergedFeatureMap() if ($task{map});
# }


# ################################################################
# #### draw feature-map for the result of merged oligonucleotides and dyads
# sub DrawMergedFeatureMap {
#   &RSAT::message::TimeWarn("Drawing feature-map with results of merged oligonucleotides and dyads for family", $family_name)
#     if ($verbose >= 2);
#   ### pattern matching
#   &RSAT::error::FatalError("Cannot read file $family{$family_name}->{seq_file}") unless (($batch) || ($dry_run) || (-r $family{$family_name}->{seq_file}));
#   $fmap_file = $family{$family_name}->{merged_file}.".".$img_format;
#   $htmap_file = $family{$family_name}->{merged_file}.".html";
#   my $command = "dna-pattern";
#   $command .= " -i ".$family{$family_name}->{seq_file};
#   $command .= " -format ".$seq_format;
#   $command .= " -pl ".$family{$family_name}->{merged_file};
#   $command .= " -origin ".$map_origin;
#   $command .= " -N 4";
#   $command .= " -return sites,limits ";
#   $command .= " ".$strands;
#   $command .= " | features-from-dnapat -o $family{$family_name}->{merged_ft_file} ";

#   if ($batch) {
#     push @main::batch_commands, $command;
#   } else {
#     my $error = &doit($command, $dry_run, 0, $verbose);
#     if ($error) {
#       &RSAT::message::Warning("Feature map skipped because dna-pattern returned an error");
#       return;
#     }
#   }

#   ## feature-map drawing
#   if  ($family{$family_name}->{calib_length}) {
#     $from =  $family{$family_name}->{calib_length};
#     $to = -1;
#   }
#   $command = " feature-map -i ".$family{$family_name}->{merged_ft_file};
#   $command .= " -scalebar -scalestep $scalestep -legend";
#   $command .= " -format ".$img_format;
#   $command .= " -title ";
#   $command .= &ShortFileName($family{$family_name}->{merged_ft_file});
#   #   $command .= " -from $from" if (defined($from));
#   #   $command .= " -to $to" if (defined($to));
#   $command .= " -o $fmap_file -scorethick ";
#   if ($htmaps) {
#     $command .= " -htmap > $htmap_file ";
#   };

#   ## Delete the feature file to save disk space (the feature space
#   ## occupy half od the result directory)
#   $command .= "; rm -f ".$family{$family_name}->{merged_ft_file};

#   if ($batch) {
#     push @main::batch_commands, $command;
#   } else {
#     &doit($command, $dry_run, 0, $verbose);
#   }
# }


# ################################################################
# ## Run Gert Thijs' MotifSampler
# sub MotifSampler {
#   &RSAT::message::TimeWarn ("Running MotifSampler for family $family_name") if ($verbose >= 2);
#   ## Check output directory
#   my $dir = $family{$family_name}->{MotifSampler_dir};
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir);

#   ## run the command
#   my $MS_command = "MotifSampler ".$family{$family_name}->{MotifSampler_options};
#   unless (($batch) || ($verbose >= 3)) {
#     ## Suppress verbosity
#     $MS_command = "(".$MS_command.") >& MS_log.txt";
#   }
#   $MS_command .= "; MotifRanking -m 2";
#   $MS_command .= " -i ".$family{$family_name}->{MotifSampler_matrix};
#   $MS_command .= " -o ".$family{$family_name}->{MotifSampler_matrix}."_ICsorted";
#   $MS_command .= "; ";
#   $MS_command .= &ConvertMatrixCommand($family{$family_name}->{MotifSampler_file}, "MotifSampler");
#   if ($batch) {
#     push @main::batch_commands, $MS_command;
#   } else {
#     &doit($MS_command, $dry_run, $die_on_error, $verbose);
#   }
# }


# ################################################################
# ## Run Andrew Neuwald's gibbs program
# sub Gibbs {
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($family{$family_name}->{gibbs_dir});


#   if ($strands eq "-2str") {
#     $addrc = " -addrc";
#   }
#   ## This can be useful in case the file is compressed
#   my $gibbs_command = "convert-seq ".$addrc;
#   $gibbs_command .= " -from fasta -to fasta ";
#   $gibbs_command .= " -i ".$family{$family_name}->{input_seq_file};
#   $gibbs_command .= " -o ".$family{$family_name}->{gibbs_input_seq_file};
#   $gibbs_command .= " ; gibbs ".$family{$family_name}->{gibbs_options};
#   $gibbs_command .= " > ".$family{$family_name}->{gibbs_file};
#   unless (($batch) || ($verbose >= 3)) {
#     ## Suppress verbosity
#     $gibbs_command = "(".$gibbs_command.") >& gibbs_log.txt";
#   }

#   $gibbs_command .= "; ";
#   $gibbs_command .= &ConvertMatrixCommand($family{$family_name}->{gibbs_file}, "gibbs");
# #  $gibbs_command = &ConvertMatrixCommand($family{$family_name}->{gibbs_file}, "gibbs");

#   if ($batch) {
#     push @main::batch_commands, $gibbs_command;
#   } else {
#     &doit($gibbs_command, $dry_run, $die_on_error, $verbose);
#   }
# }


# ################################################################
# ## Run Roth's AlignACE program
# sub AlignACE {
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($family{$family_name}->{AlignACE_dir});

#   if ($strands eq "-2str") {
#     $addrc = " -addrc";
#   }

#   ## This can be useful in case the file is compressed, and to covert
#   ## n characters (masked nucleotide) into dots
#   my $AlignACE_command = "convert-seq ";
#   $AlignACE_command .= " -from fasta -to fasta -dotmask";
#   $AlignACE_command .= " -i ".$family{$family_name}->{input_seq_file};
#   $AlignACE_command .= " -o ".$family{$family_name}->{AlignACE_input_seq_file};
#   $AlignACE_command .= " ; AlignACE ".$family{$family_name}->{AlignACE_options};
#   $AlignACE_command .= " > ".$family{$family_name}->{AlignACE_file};

#   $AlignACE_command .= "; ";
#   $AlignACE_command .= &ConvertMatrixCommand($family{$family_name}->{AlignACE_file}, "AlignACE");

#   if ($batch) {
#     push @main::batch_commands, $AlignACE_command;
#   } else {
#     &doit($AlignACE_command, $dry_run, $die_on_error, $verbose);
#   }
# }

# ################################################################
# ## infogibbs (developed by Matthieu Defrance)
# sub infogibbs {
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($family{$family_name}->{infogibbs_dir});

#   ## This can be useful in case the file is compressed, and to covert
#   ## n characters (masked nucleotide) into dots
#   my $infogibbs_command = "";
#   $infogibbs_command .= "info-gibbs ".$family{$family_name}->{infogibbs_options};
#   $infogibbs_command .= " > ".$family{$family_name}->{infogibbs_file};

#   $infogibbs_command .= "; ";
#   $infogibbs_command .= &ConvertMatrixCommand($family{$family_name}->{infogibbs_file}, "infogibbs");

#   if ($batch) {
#     push @main::batch_commands, $infogibbs_command;
#   } else {
#     &doit($infogibbs_command, $dry_run, $die_on_error, $verbose);
#   }
# }


# ################################################################
# ## Run MEME program
# sub MEME {
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($family{$family_name}->{meme_dir});

#   ## MEME does not accept sequences shorter than 8
#   my $min_seq_len = &max(8, $MEME_options{minw});

#   ## Run the command
#   $meme_command = "convert-seq ";
#   $meme_command .= " -from fasta -to fasta -skip_short ".$min_seq_len;
#   $meme_command .= " -i ".$family{$family_name}->{input_seq_file};
#   $meme_command .= " -o ".$family{$family_name}->{meme_input_seq_file}." ; ";
#   $meme_command .= $meme_cmd." ".$family{$family_name}->{meme_options};
#   unless (($batch) || ($verbose >= 3)) {
#     ## Redirect verbosity to a log file
#     $meme_command = "(".$meme_command.") >& meme_log.txt";
#   }
#   $meme_command .= "; ";
#   $meme_command .= &ConvertMatrixCommand($family{$family_name}->{meme_file}, "meme");
# #  $meme_command = &ConvertMatrixCommand($family{$family_name}->{meme_file}, "meme");


#   if ($batch) {
#     push @main::batch_commands, $meme_command;
#   } else {
#     &doit($meme_command, $dry_run, 0, $verbose);
#   }
# }


# ################################################################
# ## Run orm program
# sub orm {
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($family{$family_name}->{orm_dir});

#   ## run the command
#   my $orm_command = "";
#   if ($ENV{ORM}) {
#     $orm_command = $ENV{ORM};
#   } else {
#     $orm_command = "orm";
#   }
#   $orm_command .= " -v 1 ".$family{$family_name}->{orm_options};

#   if ($task{assemble_orm}) {
#     &RSAT::message::TimeWarn("Assembling orm patterns", $family{$family_name}->{orm_assembly_file})
#       if ($verbose >= 2);
#     $orm_command .= "; pattern-assembly -v 1";
#     $orm_command .= " -maxfl 1 -subst 1 -toppat ".$toppat." ".$strands;
#     $orm_command .= " -max_asmb_nb ".$nmotifs;
#     $orm_command .= " -i ".$family{$family_name}->{orm_file};
#     $orm_command .= " -o ".$family{$family_name}->{orm_assembly_file};

#     ### extract PSSM from pattern assembly
#     &RSAT::message::TimeWarn("Matrix from orm patterns", $family{$family_name}->{orm_pssm_file})
#       if ($verbose >= 2);
#     $orm_command .= "; matrix-from-patterns -v 1 ";
#     $orm_command .= " -seq ".$family{$family_name}->{seq_file};
#     $orm_command .= " -asmb ".$family{$family_name}->{orm_assembly_file};
#     $orm_command .= " -bginput -markov 0 ";
# #    $orm_command .= " -max_asmb_nb ".$nmotifs;
#     $orm_command .= " -o ".$family{$family_name}->{orm_pssm_file};
#   }
# 	#print $orm_command."\n";
#   if ($batch) {
#     push @main::batch_commands, $orm_command;
#   } else {
#     &doit($orm_command, $dry_run, 0, $verbose);
#   }
# }


# ################################################################
# ## Run Jerry Hertz' consensus program
# sub Consensus {
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($family{$family_name}->{consensus_dir});

#   ## input file has to be converted to consensus format
#   $consensus_command = "convert-seq -dna -from fasta -to wc -i ".$family{$family_name}->{input_seq_file};

#   ## run the command
#   #   $consensus_command .= " | tr n ."; ## Transpose N residues in . because N is not recognized by consensus
#   $consensus_command .= " | consensus ".$family{$family_name}->{consensus_options};
#   $consensus_command .= " > ".$family{$family_name}->{consensus_file};
#   $consensus_command .= "; ";
#   $consensus_command .= &ConvertMatrixCommand($family{$family_name}->{consensus_file}, "consensus");
#   if ($batch) {
#     push @main::batch_commands, $consensus_command;
#   } else {
#     &doit($consensus_command, $dry_run, 0, $verbose);
#   }
# }

# ################################################################
# ### Read known known_site for each family
# sub ReadKnownSites {
#   my ($known_site_file) = @_;
#   %known_site = ();
#   %known_site_source = ();
#   if (-e $known_site_file) {
#     $known_sites_provided = 1;
#     open KNOWN_SITE, $known_site_file;
#     while (<KNOWN_SITE>) {
#       next if (/^;/);
#       next if (/^\#/);
#       next unless (/\S/);
#       chomp;
#       my @fields = split "\t";
#       my $site_sequence = &RSAT::util::trim(shift(@fields));
#       $site_sequence =~ s/N/n/g;
#       #$site_sequence = &compress_pattern($site_sequence,"n");
#       my $family_name = &RSAT::util::trim(shift(@fields));
#       next if (length($site_sequence) > $known_site_max_len);
#       my $source =  &RSAT::util::trim(shift(@fields));
#       push @{$known_site{$family_name}}, $site_sequence;
#       push @{$known_site_source{$family_name}}, $source;
#       #	    &RSAT::message::Debug(join ("\t", "; known site", "family:".$family_name, "site:".$site_sequence, $source)) if ($verbose >= 10);
#     }
#     close KNOWN_SITE;
#   }
# }


# ################################################################
# ### Generate a synthetic table
# sub SyntheticTable {
#   my $dir = "synthetic_tables";

#   $outfile{table} = $dir."/".$table_suffix;
#   $outfile{table} .= "_tr" if ($transpose);
#   $outfile{table} .= ".html";

#   &RSAT::message::TimeWarn("Generating synthetic table ", $outfile{table}) if ($verbose >= 1);

#   unless (-d $dir) {
#     mkdir $dir, 0755 || &RSAT::error::FatalError("Cannot create directory", $dir);
#   }

#   ### synthesize the results of all clusters into a single file

#   ### headers
#   $row = 0;
#   $row{f} = $row++;
#   $row{family} = $row++;
#   $row{size} = $row++;
#   $row{genes} = $row++;
#   if ($seq_length_calculated) {
#     $row{total_length} = $row++;
#     $row{avg_length} = $row++;
#   }
#   $row{known} = $row++ if ($known_sites_provided);
#   $row{oligos} = $row++;
#   $row{dyads} = $row++;
#   $row{orm} = $row++;
#   $row{selection} = $row++ if ($task{report});
#   $row{consensus} = $row++;
#   $row{gibbs} = $row++;
#   $row{AlignACE} = $row++;
#   $row{infogibbs} = $row++;
#   $row{meme} = $row++;
#   $row{MotifSampler} = $row++;
#   $row_num = $row-1;
#   while (($header,$row) = each %row) {
#     $table[$row][0] = "<b>$header</B>";
#   }
#   my $col = 0;

#   my $f = 0;
#   my $nb_fam = scalar(@families);
#   foreach my $family_name (@families) {
#     $f++;
#     &RSAT::message::TimeWarn("Indexing result files for family", $f."/".$nb_fam, $family_name) if ($verbose >= 2);
#     my @members = @{$family{$family_name}->{members}};
#     $gene_nb = scalar(@members);

#     ## Check minimum number of genes
#     if ($gene_nb < $min_genes) {
#       &RSAT::message::Warning("Skipping family",
# 			      $fam_count, $family_name,
# 			      $gene_nb." genes",
# 			      "< min = ".$min_genes), "\n" if ($verbose >= 2);
#       next;
#     }

#     $col++;
#     #### Index columns associated to each family
#     $column{$family_name} = $col;

#     ### check directory
#     $dir = "${family_name}/dyads_${family_name}";
#     unless (-d $dir) {
#       &RSAT::message::Warning ("Cannot find directory $dir\n");
#     }

#     ### family number
#     $table[$row{f}][$col] = "<b>$col</B>";

#     ### family name
#     if (($fam_link_prefix) || ($fam_link_suffix)) {
# 	$table[$row{family}][$col] = "<b><a href='".$fam_link_prefix.$family_name.$fam_link_suffix."'>".$family_name."</a></b>";
#     } else {
# 	$table[$row{family}][$col] = "<b><a href='../".$family_name."'>".$family_name."</a></B>";
#     }

#     #### number of genes
#     my $max_genes_to_list = 30; ## Max number of genes per family to list in the synthetic table
#     my @genes = @{$family{$family_name}->{members}};
#     my $genes = scalar(@genes);
#     $table[$row{size}][$col] = $genes;
# #    $table[$row{size}][$col] = scalar(@{$family{$family_name}->{members});

#     #### sequence length
#     if ($seq_length_calculated) {
#       if ($genes > 0) {
# 	$family{$family_name}->{avg_length} = $family{$family_name}->{total_length}/$genes;
#       } else {
# 	$family{$family_name}->{avg_length} = "NA";
#       }
#       $table[$row{total_length}][$col] = $family{$family_name}->{total_length};
#       $table[$row{avg_length}][$col] = sprintf("%.1f", $family{$family_name}->{avg_length});
#     }

#     ### list of genes
#     if (($gene_link_prefix) || ($gene_link_suffix)) {
# 	$table[$row{genes}][$col] = "";
# 	foreach my $g (0..&RSAT::stats::min($#genes, $max_genes_to_list)) {
# 	    $genes[$g] = "<a href='".$gene_link_prefix.$genes[$g].$gene_link_suffix."'>".$genes[$g]."</a>";
# 	}
#     } else {
# 	$table[$row{genes}][$col] = "<a href='../".$family{$family_name}->{family_file}."'>";
#     }
#     if ($genes <= $max_genes_to_list) {
# 	$table[$row{genes}][$col] .= join(" ", @genes);
#     } else {
# 	my $diff = $genes - $max_genes_to_list;
# 	$table[$row{genes}][$col] .= join(" ", @genes[0..($max_genes_to_list-1)]);
# 	if (($gene_link_prefix) || ($gene_link_suffix)) {
# 	    $table[$row{genes}][$col] .= "<a href='../".$family{$family_name}->{family_file}."'>";
# 	}
# 	$table[$row{genes}][$col] .= "<p> + ".$diff." other genes</p>";
#     }
#     if (($gene_link_prefix) || ($gene_link_suffix)) {
# 	$table[$row{genes}][$col] .= "</a>";
#     }

#     ### known patterns
#     if ($known_sites_provided) {
#       $table[$row{known}][$col] = "";
#       my @fam_known_site = @{$known_site{$family_name}};
#       my @fam_known_site_source = @{$known_site_source{$family_name}};
#       for $c (0..$#fam_known_site) {
# 	#		$table[$row{known}][$col] .= ${known_site{$family_name}}[$c];
# 	$table[$row{known}][$col] .= $fam_known_site[$c];
# 	$table[$row{known}][$col] .= "<BR>(".$fam_known_site_source[$c].")" if ($fam_known_site_source[$c]);
# 	$table[$row{known}][$col] .= "<P>";
#       }
#     }

#     ################################################################
#     #### Index results from oligo-analysis
#     &IndexPatterns($family_name,
# 		   $row{oligos},
# 		   $col,
# 		   patterns=>$family{$family_name}->{oligo_file},
# 		   assembly=>$family{$family_name}->{oligo_assembly_file},
# 		   pssm=>$family{$family_name}->{oligo_pssm_file},
# 		   validation=>$family{$family_name}->{oligo_vs_known},
# 		   validation_table_relw=>$family{$family_name}->{oligo_vs_known_relw_table},
# 		   validation_table_weight=>$family{$family_name}->{oligo_vs_known_weight_table},
# 		   type=>"oligos",
# 		  );

#     ################################################################
#     #### Index results from dyad-analysis
#     &IndexPatterns($family_name,
# 		   $row{dyads},
# 		   $col,
# 		   patterns=>$family{$family_name}->{dyad_file},
# 		   assembly=>$family{$family_name}->{dyad_assembly_file},
# 		   pssm=>$family{$family_name}->{dyad_pssm_file},
# 		   validation=>$family{$family_name}->{dyad_vs_known},
# 		   validation_table_relw=>$family{$family_name}->{dyad_vs_known_relw_table},
# 		   validation_table_weight=>$family{$family_name}->{dyad_vs_known_weight_table},
# 		   type=>"dyads",
# 		  );

#     ################################################################
#     #### Index results from oligo-analysis
#     &IndexPatterns($family_name,
# 		   $row{orm},
# 		   $col,
# 		   patterns=>$family{$family_name}->{orm_file},
# 		   assembly=>$family{$family_name}->{orm_assembly_file},
# 		   pssm=>$family{$family_name}->{orm_pssm_file},
# 		   validation=>$family{$family_name}->{orm_vs_known},
# 		   validation_table_relw=>$family{$family_name}->{orm_vs_known_relw_table},
# 		   validation_table_weight=>$family{$family_name}->{orm_vs_known_weight_table},
# 		   type=>"orm",
# 		  );

#     ################################################################
#     ### index results of manual pattern selection
#     if ($task{report}) {
#       &IndexPatterns($family_name,
# 		     $row{selection},
# 		     $col,
# 		     patterns=>$family{$family_name}->{oligo_selection},
# 		     type=>"manual"
# 		    );
#     }


#     ################################################################
#     #### Index results of consensus
#     &IndexConsensus($family_name, $row{consensus}, $col, matrix_file=>$family{$family_name}->{consensus_file});

#     ################################################################
#     #### Index results of gibbs
#     &IndexGibbs($family_name, $row{gibbs}, $col, matrix_file=>$family{$family_name}->{gibbs_file});

#     ################################################################
#     #### Index results of AlignACE
#     &IndexAlignACE($family_name, $row{AlignACE}, $col, matrix_file=>$family{$family_name}->{AlignACE_file});

#     ################################################################
#     #### Index results of infogibbs
#     &IndexInfoGibbs($family_name, $row{infogibbs}, $col, matrix_file=>$family{$family_name}->{infogibbs_file});

#     ################################################################
#     #### Index results of meme
#     &IndexMEME($family_name, $row{meme}, $col, matrix_file=>$family{$family_name}->{meme_file});

#     ################################################################
#     #### Index results of meme
#     &IndexMotifSampler($family_name, $row{MotifSampler}, $col, matrix_file=>$family{$family_name}->{MotifSampler_file});
#   }

#   ###############################################################
#   ## Print the header of the  synthetic table
#   $syn = &OpenOutputFile($outfile{table});
#   print $syn "<html>\n";
#   print $syn "<title>",$table_suffix, "</title>","\n";
#   print $syn "<body>\n";
#   print $syn "<h1>".$table_suffix."</h1>\n";
#   print $syn join( "\n",
# 		   "<pre>",
# 		   $verbose_message,
# 		   "</pre>"
# 		 ), "\n";
#   print $syn "<table border=1>\n";

#   ################################################################
#   ## Add a link to the MDC report files
#   if ($task{report}) {
#     print $syn "<h4>Reports for the motif discovery competition</h4>";
#     print $syn "<ul>";
#     print $syn "<li><a href=../".$outfile{results}.">Results</a></li>";
#     print $syn "<li><a href=../".$outfile{parameters}.">Parameters</a></li>";
#     print $syn "</ul>";
#   }


#   ################################################################
#   ## Sort clusters according to the selected criterion
#   if ($sort_key eq "score") {
#     #### sort clusters by score
#     #### decreasing order
#     @sorted_families = sort {$max_score{$b} <=> $max_score{$a}} @families;

#   } else {
#     #### sort clusters by family name
#     @sorted_families = sort @families;
#   }


#   ################################################################
#   ## Transpose the result table
#   if ($transpose) {
#     foreach $r (0..$row_num) {
#       print $syn "<tr valign=top>\n";
#       foreach $f (@sorted_families) {
# 	my $c = $column{$f};
# 	print $syn "<td>", $table[$r][$c], "</td>\n";
#       }
#       print $syn "</tr>\n";
#     }
#   } else {
#     #### print table header
#     foreach $r (0..$row_num) {
#       print $syn "<th>", $table[$r][0], "</th>\n";
#     }
#     #### print table content
#     foreach $f (@sorted_families) {
#       my $c = $column{$f};
#       print $syn "<tr valign=top>\n";
#       foreach $r (0..$row_num) {
# 	print $syn "<td>", $table[$r][$c], "</td>\n";
#       }
#       print $syn "</tr>\n";
#     }
#   }

#   print $syn "</table>\n";
#   print $syn "<hr>", &AlphaDate;
#   print $syn "</body>\n";
#   print $syn "</html>\n";
#   close $syn;
#   #    chdir "synthetic_tables";
#   #    chdir "../";
# }




# ################################################################
# ## Index the matrix from a consensus file
# sub IndexConsensus {
#    &IndexMatrix(@_, program=>"consensus");
# }

# ################################################################
# ##  Read a matrix and generate a summary for the synthetic table
# sub IndexMatrix {
#    my ($family_name, $row, $col, %args) = @_;

#    my $program = $args{program};
#    my $base = "../";
#    my $matrix_file = $args{matrix_file};
#    my $program = $args{program};
# #   my $family_dir = $args{dir}."/".$family_name."/";
#    my $family_dir = $family_name."/";
#    $family_dir =~ s|/+|/|g;
#    my $matrix_dir = $family_dir.$program."_".$family_name."/";
#    my $map_link = $base.$matrix_file;

#    &RSAT::message::Info("Indexing matrices from", $program, "family", $family_name, $matrix_file) if ($main::verbose >= 3);

#    if (-e $matrix_file) {
#      $table[$row][$col] .= &LinkToFile($family_dir, $family_name, $base);
#      $table[$row][$col] .= &LinkToFile($matrix_dir, "dir", $base);
#      $table[$row][$col] .= &LinkToFile($matrix_file, $program, $base);
#      $table[$row][$col] .= &LinkToFile($matrix_file.".tab", "tab", $base);
#      $matrix_result = "";


#      ################################################################
#      ## If not, write the first matrix
#      my @matrices = &ReadMatrices($family_name, $matrix_file, %args);
#      if (scalar(@matrices) > 0) {
#        my $max_matrices = 5; ## Maximum number of matrices per family in the synthetic table
#        my $m = 0;
#        foreach my $matrix (@matrices) {
# 	 $m++;
# 	 $matrix_result .= "; MATRIX ".$m."/".scalar(@matrices)."\n";
# 	 if ($m > $max_matrices) {
# 	   my $remaining = scalar(@matrices) - $m + 1;
# 	   $matrix_result .= ";\n; ... and ".$remaining." more matrices in the file.\n";
# 	   last;
# 	 }
# 	 &RSAT::message::Debug($family_name, "Indexing matrix", $m."/".scalar(@matrices),$program)
# 	   if ($verbose >= 3);
# 	 #	   $matrix_result .= $matrix->toString(sep=>" ", col_width=>4, type=>"counts");
# 	 #$matrix_result .= $matrix->toString(type=>"parameters");
# 	 $matrix_result .= $matrix->toString(type=>"consensus");
#        }
#        $table[$row][$col] .= "<table cellpadding=3>\n";
#        $table[$row][$col] .= "<TR>\n";
#        $table[$row][$col] .= "<td colspan=2><pre>";
#        $table[$row][$col] .= $matrix_result;
#        $table[$row][$col] .= "</pre></td>\n";
#        $table[$row][$col] .= "</TR>\n";
#        $table[$row][$col] .= "</table>\n";

#        ################################################################
#        ## If the logos exist, include them in the synthetic table
#        my $logo_expr = $matrix_file;
#        $logo_expr =~ s/\.tab//;
#        $logo_expr .= '*_logo.png';
#        my @logo_files = glob ($logo_expr);
#        if (scalar(@logo_files) >= 1) {
# 	 &RSAT::message::Debug("LOGO", $logo_expr, "FILES", @logo_files) if ($main::verbose >= 3);
# 	 foreach my $logo (@logo_files) {
# 	   $table[$row][$col] .= "<br><a href='".$base.$logo."'><img src='".$base.$logo."' height=100></a>\n";
# 	 }
#        }

#      } else {
#        $table[$row][$col] .= "<br><font color=red>No matrix could be found in the result file</font>\n";
#      }
#    } else {
#      $table[$row][$col] =  "<a href=".$base.$matrix_file."><font color=red>File not found</font></a><br>\n";
#      &RSAT::message::Warning("File not found. Skipped.", $program, $matrix_file) if ($main::verbose >= 3);
#    }
# }

# ################################################################
# ## Convert a PSSM to tab-delimited format

# ## Compute the options for converting a PSSM
# sub ConvertMatrixCommand {
#   my ($matrix_file, $program) = @_;

#   ## Convert matrix in tab-delimited file
#   my $command = "convert-matrix -v 1";
#   $command .= " -pseudo ".$matrix_pseudo;
#   $command .= " -from ".$program;
#   $command .= " -to tab";
#   $command .= " -return counts,parameters";
#   $command .= " -return logo -logo_format png";
#   $command .= " -i ".$matrix_file;
#   my $sort_key = $pssm_sort_key{$program};
#   my $sort_order = $pssm_sort_order{$program};
#   if ($sort_key) {
#     $command .= " -sort ".$sort_order." ".$sort_key;
#   }
#   $command .= " -o ".$matrix_file.".tab";
#   &RSAT::message::Warning("Converting matrix", $command) if ($verbose >= 2);
#   return $command;
# }

# ## Convert the PSSM
# sub ConvertMatrix {
#   my ($matrix_file, $program) = @_;
#   my $command = &ConvertMatrixCommand($matrix_file, $program);
#   &doit($command, $dry_run, $die_on_error, $verbose);
# }

# ################################################################
# ##  Read a matrix from a matrix file and return a matrix object
# ##
# ## Matrix objects are used for displaying the top matrix in the
# ## synthetic table (-task synthesis), and for exporting matrices in
# ## tab-delimited files (-task sql).
# ##
# sub ReadMatrices {
#   my ($family_name, $matrix_file, %args) = @_;

#   my $program = $args{program};
#   &RSAT::message::TimeWarn("Reading matrices from", $program, "family", $family_name, $matrix_file, $row, $col) if ($verbose >= 2);

#   ## If the matrix has already been loaded, return the indexed matrix
#   if ($matrix_by_file{$matrix_file}) {
#     return ($matrix_by_file{$matrix_file});
#   } else {
#     if (-e $matrix_file) {
#       ## This piece of code has to be evaluated because it can raise
#       ## an error, if the output file does not contain any
#       ## matrix. This is the case for some output files of consensus,
#       ## due to a bug. There are also gibbs file which contain empty
#       ## matrices.
#       #      eval {

#       ################################################################
#       ## Read matrices from the input file
#       my @matrices = &RSAT::MatrixReader::readFromFile($matrix_file, $program);
#       &RSAT::message::Info("Read",scalar(@matrices),"matrices from file",$matrix_file, $program) if ($verbose >= 3);
#       foreach my $matrix (@matrices) {
# 	$matrix->force_attribute("pseudo",$matrix_pseudo);
# 	$matrix->force_attribute("family_id",$family_id);
# 	$matrix->force_attribute("family", $family_name);
# 	$matrix->force_attribute("format", $program);
# 	$matrix->set_attribute("input_file", $matrix_file);
# 	$matrix_by_file{$matrix_file} = $matrix;
# 	$matrix->calcConsensus();
# 	$matrix->calcInformation();
# 	#	  if ($@) {
# 	#	    &RSAT::message::Warning("Matrix file does no contain a valid matrix. Skipped.", $program, $matrix_file) if ($verbose >= 2);
# 	#	    &RSAT::message::Debug(join "\n", $@) if ($verbose >= 10);
# 	#	    return(undef);
# 	#	  } else {
# 	#	    return($matrix);
# 	#	  }
# 	#	}
# 	my $id = RSAT::matrix->auto_id($run_prefix."_mat");
# 	$matrix->force_attribute("id", $id);
# 	$matrix->set_attribute("analysis", $run_prefix);

# 	&RSAT::message::Debug($family, $program, "Matrix", $matrix->get_attribute("id"),
# 			      $matrix->get_attribute("consensus.IUPAC"),
# 			     ) if ($verbose >= 3);
#       }

#       ################################################################
#       ## Sort matrices
#       my $sort_key = $pssm_sort_key{$program};
#       my $sort_order = $pssm_sort_order{$program};
#       if ($sort_key) {
# 	@matrices = &RSAT::MatrixReader::SortMatrices($sort_key, $sort_order, @matrices);
#       }

#       ## Export matrices to an SQL databases
#       my $first_matrix = $matrices[0];
#       if ($first_matrix) {
# 	foreach my $attr (@matrix_scalar_out_fields) {
# 	  $first_matrix->_set_attribute_cardinality($attr, "SCALAR");
# 	}
#       }
#       foreach my $matrix (@matrices) {
# 	$matrix_factory->add_object($matrix);
#       }


#       ## Check that the result file contains at least one matrix
#       if (scalar(@matrices) > 0) {
# 	return(@matrices);
#       } else {
# 	&RSAT::message::Warning("Matrix file does no contain any valid matrix. Skipped.", $program, $matrix_file) if ($verbose >= 3);
# 	return()
#       }

#     } else {
#       &RSAT::message::Warning("File not found. Skipped.", $program, $matrix_file) if ($verbose >= 3);
#       return();
#     }
#   }
# }

# ################################################################
# ## Index the matrix from gibbs file
# sub IndexGibbs {
#    &IndexMatrix(@_, program=>"gibbs");
# }

# ################################################################
# ## Index the matrix from AlignACE file
# sub IndexAlignACE {
#    &IndexMatrix(@_, program=>"AlignACE");
# }

# ################################################################
# ## Index the matrix from infogibbs file
# sub IndexInfoGibbs {
#    &IndexMatrix(@_, program=>"infogibbs");
# }

# ################################################################
# ## Index the matrix from meme file
# sub IndexMEME {
#    &IndexMatrix(@_, program=>"meme");
# }

# ################################################################
# ## Index the matrix from MotifSampler file
# sub IndexMotifSampler {
#    &IndexMatrix(@_, program=>"MotifSampler");
# }


# ################################################################
# ## Add a link to one file in the synthetic table
# sub LinkToFile {
#   my ($file, $label, $base) = @_;
#   my $link = '';
#   if (-e $file) {
#     $link = "<a href='".$base.$file."'>[".$label."]</a>\n";
#   } else {
#     $link = "<font color='#FFBBBB'>[".$label."]</font>";
#   }
# #  my $pwd = `pwd`; chomp $pwd;
# #  &RSAT::message::Debug("&LinkToFile", $file, $label, $base, $pwd);
#   return($link);
# }


# ################################################################
# ## Index the patterns discovered for the HTML synthetic table
# sub IndexPatterns {
#    my ($family_name, $row, $col, %args) = @_;


#    undef @patterns;
#    undef @sorted_patterns;
#    undef %rc;
#    undef %score;
#    undef %type;

#    my $base = "../";
#    my $type = $args{type};
#    my $assembly_file = $args{assembly};
#    my $pssm_file = $args{pssm};
#    my $pattern_file = $args{patterns};
#    my $validation_file = $args{validation};
#    my $validation_table_relw_file = $args{validation_table_relw};
#    my $validation_table_weight_file = $args{validation_table_weight};
# #   my $family_dir = $args{dir}."/".$family_name."/";
#    my $family_dir = $family_name."/";
#    $family_dir =~ s|/+|/|g;
#    my $pattern_dir = $family_dir.$type."_".$family_name."/";
#    my $map_link = $base.$pattern_file;

#    &RSAT::message::Info(join("\t", "; Indexing patterns of type", $type, "for family", $family_name, $pattern_file)) if ($verbose >= 2);

#    ## Read the patterns
#    if (-e $assembly_file) {
#        if (($type eq "dyads") ||
# 	   ($type eq "oligos") ||
# 	   ($type eq "orm"))
# 	   {
# 	   ## Parse an assembly file (only retain contigs and isolated)
# 	   ($error, @patterns) = &ReadAssemblyFile($family_name, $assembly_file, $type);
#        } else {
# 	   ## Parse a pattern file (retain all patterns)
# 	   @patterns = &ReadPatterns($assembly_file);
#        }
#    } else {
# 	$table[$row][$col] = "<a href=".$base.$assembly_file."><font color=red>File not found</font></a><br>\n";
# 	&RSAT::message::Warning("Family $family_name", "assembly file ", $assembly_file, " does not exist. Skipped.")
# 	  if ($verbose >= 3);
# 	return;
#    }

#    if ($error) {
# 	$table[$row][$col] = "<font color=red>$error</font>\n";
#    } else {
# 	@sorted_patterns = sort { $score{$b} <=> $score{$a} } @patterns;
# 	$table[$row][$col] .= &LinkToFile($family_dir, $family_name, $base);
# #	$table[$row][$col] .= "<a href=".$base.$family_dir.">".$family_name."</a>\n";
# 	$table[$row][$col] .= &LinkToFile($pattern_dir, "dir", $base);
# #	$table[$row][$col] .= "<a href=".$base.$pattern_dir.">[dir]</a>\n";
# 	$table[$row][$col] .= &LinkToFile($pattern_file, $type, $base);
# #	$table[$row][$col] .= "<a href=".$base.$pattern_file.">[".$type."]</a>\n";
# #	$table[$row][$col] .= "<a href=".$base.$pattern_file.">[patt]</a>\n";
# 	$table[$row][$col] .= &LinkToFile($assembly_file, "asmb", $base);
# #	$table[$row][$col] .= "<a href=".$base.$assembly_file.">[asmb]</a>\n";
# 	$table[$row][$col] .= &LinkToFile($pattern_file.".".$img_format, "map", $base);
# #	$table[$row][$col] .= "<a href=".$base.$pattern_file.".".$img_format.">[map]</a>\n";
# 	if ($htmaps) {
# 	  $table[$row][$col] .= &LinkToFile($pattern_file.".html", "html", $base);
# #	  $table[$row][$col] .= "<a href=".$base.$pattern_file.".html>[html]</a>\n";
# 	}
# 	$table[$row][$col] .= &LinkToFile($pssm_file."_sig_matrices.txt", "sig matrix", $base);
# 	$table[$row][$col] .= &LinkToFile($pssm_file."_count_matrices.tf", "PSSM-tf", $base);
# 	$table[$row][$col] .= &LinkToFile($pssm_file."_count_matrices.txt", "PSSM-tab", $base);
# 	$table[$row][$col] .= &LinkToFile($pssm_file.".".$img_format, "scan map", $base);
# #	$table[$row][$col] .= "<a href=".$base.$pssm_file."_sig_matrices.txt>[sig matrix]</a>\n";
# #	$table[$row][$col] .= "<a href=".$base.$pssm_file."_count_matrices.txt>[count matrix]</a>\n";
# #	$table[$row][$col] .= "<a href=".$base.$pssm_file.".$img_format>[scan map]</a>\n";

# #	if ($task{validate}) {
# 	$table[$row][$col] .= &LinkToFile($validation_file, "matches", $base);
# #	$table[$row][$col] .= "<a href=".$base.$validation_file.">[matches]</a>\n";
# 	$table[$row][$col] .= &LinkToFile($validation_table_weight_file, "match_weight", $base);
# #	$table[$row][$col] .= "<a href=".$base.$validation_table_weight_file.">[match_weight]</a>\n";
# 	$table[$row][$col] .= &LinkToFile($validation_table_relw_file, "match_relw", $base);
# #	$table[$row][$col] .= "<a href=".$base.$validation_table_relw_file.">[match_relw]</a>\n";
# #	}

# 	$table[$row][$col] .= "<table>\n";
# 	if ($max_score{$type}{$family_name}) {
# 	  $table[$row][$col] .="<tr><td align=right>max.score</td><td>".$max_score{$type}{$family_name}."</td></tr>\n";
# 	} else {
# 	  $table[$row][$col] .= "<tr><td>no pattern</td></tr>\n";
# 	}
# 	$table[$row][$col] .="</table>\n";
# 	$table[$row][$col] .= "<table>\n";
# 	$table[$row][$col] .= "<TR>\n";

# 	### print oligo sequence
# 	$table[$row][$col] .= "<TD>\n";
# 	for $p (@sorted_patterns) {
# 	    $table[$row][$col] .= "<b>" if ($p->get_attribute("score") >= 1);
# 	    my $sequence = $p->get_attribute("sequence");
# 	    $compressed = &compress_pattern($sequence, "n");
# 	    $compressed =~ s/N/n/g;
# 	    $table[$row][$col] .= $compressed;
# 	    $table[$row][$col] .= "</B>" if ($p->get_attribute("score") >= 1);
# 	    $table[$row][$col] .= "<BR>" unless ($p == $#sorted_patterns);
# 	}
# 	$table[$row][$col] .= "</TD>\n";

# 	### print reverse complementary oligo
# 	if ($strands eq "-2str") {
# 	    $table[$row][$col] .= "<TD>\n";
# 	    for $p (@sorted_patterns) {
# 		$table[$row][$col] .= "<b>" if ($p->get_attribute("score") >= 1);
# #		my $sequence =  $p->get_attribute("rc");
# 		my $rc =  &SmartRC($p->get_attribute("sequence"));
# 		$compressed = &compress_pattern($rc, "n");
# 		$compressed =~ s/N/n/g;
# 		$table[$row][$col] .= $compressed;
# 		$table[$row][$col] .= "</B>" if ($p->get_attribute("score") >= 1);
# 		$table[$row][$col] .= "<BR>" unless ($p == $#sorted_patterns);
# 	    }
# 	    $table[$row][$col] .= "</TD>\n";
# 	}

# 	### print pattern scores
# 	$table[$row][$col] .= "<TD><a href=".$map_link.">\n";
# 	for $p (@sorted_patterns) {
# 	    $table[$row][$col] .= "<b>" if ($p->get_attribute("score") >= 1);
# 	    $table[$row][$col] .= $p->get_attribute("score");
# 	    $table[$row][$col] .= "</B>" if ($p->get_attribute("score") >= 1);
# 	    $table[$row][$col] .= "<BR>" unless ($p == $#sorted_patterns);
# 	}
# 	$table[$row][$col] .= "</A></TD>\n";

# 	### print matching patterns
# 	if ($known_sites_provided) {
# 	    $table[$row][$col] .= "<TD>\n";
# 	    for $p (@sorted_patterns) {
# 		my @matches = ();
# 		foreach $site (@site_library) {
# 		    if (($p->contains($site, min_score=>$min_matching_score)) ||
# 			($site->contains($p->get_attribute("sequence"), min_score=>$min_matching_score))) {
# 			#my $match = $site->get_attribute("sequence");
# 			my $match = $site->get_id();
# 			push @matches, $match;
# 		    }
# 		}
# 		$table[$row][$col] .= join(";",@matches);
# 		$table[$row][$col] .= "<BR>" unless ($p == $#sorted_patterns);
# 	    }
# 	    $table[$row][$col] .= "</TD>\n";
# 	}
# 	$table[$row][$col] .= "</TR>\n";
# 	$table[$row][$col] .= "</TABLE>\n";
#    }


#    ################################################################
#    ## If  logos exist, include them in the synthetic table
#    my $logo_expr = $pattern_file;
#    $logo_expr =~ s/\.tab//;
#    $logo_expr .= '*_logo.png';
#    my @logo_files = glob ($logo_expr);
#    if (scalar(@logo_files) >= 1) {
#      &RSAT::message::Debug("LOGO", $logo_expr, "FILES", @logo_files) if ($main::verbose >= 3);
#      foreach my $logo (@logo_files) {
#        $table[$row][$col] .= "<br><a href='".$base.$logo."'><img src='".$base.$logo."' height=100></a>\n";
#      }
#    }
# }




# ################################################################
# #### Draw feature maps
# sub DrawDyadFeatureMap {
#    &RSAT::message::TimeWarn( "Drawing feature-map with results of dyad-analysis for family $family_name") if ($verbose >= 2);
#    &RSAT::error::FatalError("Cannot read file $family{$family_name}->{seq_file}") unless (($batch) || ($dry_run) || (-r $family{$family_name}->{seq_file}));

#    ## pattern matching
#    my $command = "dna-pattern";
#    $command .= " -pl $family{$family_name}->{dyad_file}";
#    $command .= " -format $seq_format";
#    $command .= " -i $family{$family_name}->{seq_file}";
#    $command .= " -origin $map_origin";
#    $command .= " -N 4";
#    $command .= " -return sites,limits ";
#    $command .= " ".$strands;
#    $command .= "| features-from-dnapat -o $family{$family_name}->{dyad_ft_file} ";
#    if ($batch) {
#        push @main::batch_commands, $command;
#    } else {
#        my $error = &doit($command, $dry_run, 0, $verbose);
#        if ($error) {
# 	   &RSAT::message::Warning("Feature map skipped because dna-pattern returned an error");
# 	   return;
#        }
#    }

#    ## feature-map drawing
#    $command = "feature-map -i $family{$family_name}->{dyad_ft_file} -o $family{$family_name}->{fmap_file} ";;
# #   $command .= " -from $from" if (defined($from));
# #   $command .= " -to $to " if (defined($to));
#    $command .= "-legend ";
#    $command .= "-scalebar -scalestep $scalestep ";
#    $command .= "-scorethick ";
#    $command .= "-mlen 520 ";
#    $command .= "-title '${family_name}$dyad_suffix' ";
#    if ($htmaps) {
# 	$command .= "-htmap >  $family{$family_name}->{htmap_file}";
#    }

#    ## Delete the feature file to save disk space (the feature space
#    ## occupy half od the result directory)
#    $command .= "; rm -f ".$family{$family_name}->{dyad_ft_file};

#    #  $command .= " ;  xv -o $family{$family_name}->{fmap_file} &";
#    if ($batch) {
#        push @main::batch_commands, $command;
#    } else {
#        &doit($command, $dry_run, 0, $verbose);
#    }
# }


# ################################################################
# #### check parameters
# sub CheckParameters {

#   ## Output directory
#   &RSAT::error::FatalError("You must specify the output directory (option -outdir)")
#     unless $dir{output};

#   #### accepted feature types
#   unless (defined %accepted_feature_types) {
#     ## By default, accept all feature types
#     $accepted_feature_types{cds} = 1;
#   }
#   $feature_types = join ",", keys (%accepted_feature_types);

#   ## For backward compatibility
#   if ($task{upstream}) {
#       &RSAT::message::Warning("Option -task upstream is obsolete, please use -task sequences");
#       $task{sequences} = 1;
#   }
#   if ($task{upstream_ensembl}) {
#       &RSAT::message::Warning("Option -task upstream_ensembl is obsolete, please use -task sequences_ensembl");
#       $task{sequences_ensembl} = 1;
#   }


#   #### check selected tasks
#   unless (defined(%task)) {
#     &RSAT::error::FatalError("You should select at least one task.");
#   }
#   if ($task{all}) {
#     foreach my $t (@supported_tasks) {
#       $task{$t} = 1;
#     }
#     unless ($draw_maps) {
#       $task{maps} = 0;
#       $task{oligo_maps} = 0;
#       $task{dyad_maps} = 0;
#       $task{orm_maps} = 0;
#     }
#   }
#   if ($task{maps}) {
#     $task{oligo_maps} = 1;
#     $task{dyad_maps} = 1;
#     $task{orm_maps} = 1;
#   }

#   ## Validation
#   if (($task{validate_oligos}) ||
#       ($task{validate_dyads}) ||
#       ($task{validate_orm})) {
#     $task{validate} = 1;
#     &RSAT::error::FatalError("For the task 'validate', you must specify a list of known sites with the option -known")
#       unless ($known_site_file);
#   }

#   ## Db match
#   if (($task{db_match_oligos}) ||
#       ($task{db_match_dyads})) {
#     $task{db_match} = 1;
#     &RSAT::error::FatalError("For the task 'db_match', you must specify a list of known sites with the option -db")
#       unless ($db_site_file);
#   }
#   ### family file ###
#   unless (($family_file) || ($sequence_file_list)) {
#     &RSAT::error::FatalError("You must specify either a family file or a sequence file list");
#   }

#   ### organism ###
#   if ($organism_name) {
#     unless ($supported_organism{$organism_name}) {
#       &RSAT::error::FatalError ("organism $organism_name is not supported");
#     }
#   } elsif ($org_fam) {
#   } else {
#     &RSAT::error::FatalError ("You must specify an organism (option -org) or indicate the organism in the second column of the family file (-org_fam).");
#   }

#   ## Upstream region limits
#   $from = $supported_organism{$organism_name}->{'up_from'}  unless defined($from);
#   $to = $supported_organism{$organism_name}->{'up_to'}  unless defined($to);
#   $seq_length = abs($to - $from) + 1;

#   ## Dyad type
#   $dyad_type = "any" unless ($dyad_type);

#   ## Strands
#   if (defined($force{strands})) {
#     $strands = $force{strands}; ### force
#   } else {
#     $strands = "-2str";
#   }

#   ## Strands for the background model
#   $bg_strands = $strands;
# #  $bg_strands = '-1str';


#   ## Threshold on occurrence significance
#   unless ((defined($lth{occ_sig})) || ($lth{occ_sig} eq $null)) {
#     $lth{occ_sig} = 0;
#     &RSAT::message::Info("Lower threshold on occ_sig automatically set to 0 for oligo-analysis and dyad-analysis.")
#       if ($verbose >= 1);
#   }


#   ################################################################
#   ## MEME directory
#   if ($task{meme}) {
#     &check_meme_path();
#   }

# }


# ################################################################
# ## Define background model files
# sub define_bg_model_files {
#   ## Directory for BG models has to be defined relative to the output directory
#   $dir{bg_models} = "bg_models";
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir{bg_models});
#   &RSAT::message::TimeWarn("Background model directory", $dir{bg_models}) if ($main::verbose >= 1);

#   ## Name of model files for oligo-analysis
#   for my $oligo_len ($min_oligo_len..$max_oligo_len) {
#     $bg_model_file{"oligos_".$oligo_len."nt"} = $dir{bg_models}."/bg_oligos_".$oligo_len."nt".$bg_strands.$noov.".tab";
#   }

#   ## Name of model files for dyad-analysis
#   $bg_model_file{"dyads"} = $dir{bg_models}."/bg_dyads_".$monad_length."nt".$bg_strands.$noov.".tab";

#   ## Name of model file for MEME
#   $bg_model_file{"meme"} = $dir{bg_models}."/bg_meme_mkv".$markov_order.".txt";
# }

# ################################################################
# ## Compute background models from a set of background sequences
# ## specified with the option -bg_seq
# sub ComputeBG {
#   chdir($dir{main});
#   &RSAT::message::TimeWarn("Computing background model from background sequence file") if ($main::verbose >= 1);
#   &compute_bg_oligos();
#   &compute_bg_dyads();
#   &compute_bg_meme();
#   chdir($dir{output});
# }


# ################################################################
# ## Compute background models for oligo-analysis
# sub compute_bg_oligos {
#   ## Tip: the bg sequence file is defined relative to the main
#   ## directory, but the bg model files must be defined relative to the
#   ## output directory.
#   chdir($dir{main});

#   for my $oligo_len ($min_oligo_len..$max_oligo_len) {
#     my $bg_model_file =  $bg_model_file{"oligos_".$oligo_len."nt"};
#     &RSAT::message::TimeWarn("\tComputing background model for oligos", $oligo_len."nt", $bg_model_file) if ($main::verbose >= 1);
#     my $cmd = "";
#     if ($bg_seq_file =~ /.gz$/) {
#       $cmd .= "gunzip -c ".$bg_seq_file. "| $count_words_cmd -v 1";
#     } else {
#       $cmd .= "$count_words_cmd -v 1 -i ".$bg_seq_file;
#     }
#     $cmd .= " -nogrouprc $bg_strands $noov ";
#     $cmd .= " -l ".$oligo_len;
#     $cmd .= " -o ".$dir{output}."/".$bg_model_file;
#     &doit($cmd, $dry_run, $die_on_error, $verbose);
#   }
#   chdir($dir{output});
# }

# ################################################################
# ## Compute background models for dyad-analysis
# sub compute_bg_dyads {
#   ## Tip: the bg sequence file is defined relative to the main
#   ## directory, but the bg model files must be defined relative to the
#   ## output directory.
#   chdir($dir{main});
#   my $bg_model_file =  $bg_model_file{"dyads"};
#   &RSAT::message::TimeWarn("\tComputing background model for dyads", $bg_model_file) if ($main::verbose >= 1);
#   my $cmd = "";
#   $cmd .= " dyad-analysis -v 2";
#   $cmd .= " -quick";
#   $cmd .= " -i ".$bg_seq_file;
#   $cmd .= " -return occ,freq";
#   $cmd .= " -nogrouprc $strands $noov ";
# #  $cmd .= " -type ".$dyad_type;
#   $cmd .= " -l ".$monad_length;
#   #   $dyad_spacing = $min_sp."-".$max_sp;
#   $cmd .= " -sp ".$dyad_spacing;
#   $cmd .= " -o ".$dir{output}."/".$bg_model_file;
#   &doit($cmd, $dry_run, $die_on_error, $verbose);
#   chdir($dir{output});
# }

# ################################################################
# ## Check MEME path
# sub check_meme_path {
#   $meme_dir = $ENV{meme_dir};
#   unless ($meme_dir) {
#     &RSAT::error::FatalError("The variable meme_dir is not defined. Please check the RSAT config file ".$ENV{RSAT}."/RSAT_config.props");
#   }
#   $fasta_get_markov = $meme_dir."/fasta-get-markov";
# #  $meme_cmd = $meme_dir."/meme";
#   $meme_cmd = &RSAT::server::GetProgramPath("meme", $die_on_error, $meme_dir);
# }

# ################################################################
# ## Compute background model for MEME
# sub compute_bg_meme {
#   ## Tip: the bg sequence file is defined relative to the main
#   ## directory, but the bg model files must be defined relative to the
#   ## output directory.
#   &check_meme_path();
#   chdir($dir{main});
#   my $bg_model_file =  $bg_model_file{"meme"};
#   &RSAT::message::TimeWarn("\tComputing background model for MEME", "markov".$markov_order, $bg_model_file) if ($main::verbose >= 1);
#   my $cmd = "";
#   if ($bg_seq_file =~ /.gz$/) {
#     $cmd .= "gunzip -c ".$bg_seq_file;
#   } else {
#     $cmd .= "cat ".$bg_seq_file;
#   }
#   $cmd .= " | ".$fasta_get_markov;
#   $cmd .= " -m ".$markov_order;
#   $cmd .= " > ".$dir{output}."/".$bg_model_file;
#   &doit($cmd, $dry_run, $die_on_error, $verbose);
#   chdir($dir{output});
# }


# ################################################################
# ## Analyze all the clusters
# sub AnalyzeFamilies {
#   my $fam_count=0;
#   my $fam_nb = $#families+1;

#   ### Calibrate oligonucleotides for each sequence length
#   if (($task{calibrate}) ||
#       ($task{calibN}) ||
#       ($sequence_file_list) ||
#       ($background eq "calib1") ||
#       ($background eq "calibN")
#      ) {
#     my @calib_lengths = &CalcCalibrationLengths();
#     &CalibrateOligos(@calib_lengths) if ($task{calibrate});
#     &CalibrateOligosN() if ($task{calibN});
#     #    } else {
#     #	foreach $family_name (@families) {
#     #	    $family{$family_name}->{seq_length} = $seq_length*scalar(@{$family{$family_name}->{members}});
#     #	}
#     $seq_length_calculated = 1;
#   }

#   ## Read all sequences from an input file and create the family-specific sequence files
#   if (($task{sequences}) && ($all_seq_file)) {
#     &ReadAllSequences();
#   }

#   foreach $family_name (@families) {
#     #       @main::batch_commands = ();
#     $fam_count++;
#     my @members = @{$family{$family_name}->{members}};
#     $gene_nb = scalar(@members);

#     #	&CheckSkip($fam_count,$family_name);
#     #	&CheckLast($fam_count);

#     ## Check minimum number of genes
#     if ($gene_nb < $min_genes) {
#       &RSAT::message::Warning ("Skipping family",
# 			       $fam_count, $family_name,
# 			       $gene_nb." genes",
# 			       "< min = ".$min_genes) if ($verbose >= 2);
#       next;
#     }

#     if ($org_fam) {
#       $organism_name = $family_name;
#       &RSAT::message::TimeWarn("Organism name", $organism_name);
#     }


#     &RSAT::message::TimeWarn("Treating cluster ", $family_name," (".$fam_count."/".$fam_nb.")",
# 			     $gene_nb." genes", $organism_name)
#       if ($verbose >= 1);

#     ### retrieve upstream sequences of the family
#     if ($task{sequences}) {
#       if ($all_seq_file) {
# 	&StoreSequences();
#       } else {
# 	&RetrieveSequences();
#       }
#     }

#     ### retrieve upstream sequences of the family
#     &RetrieveSequencesEnsEMBL() if ($task{upstream_ensembl});

#     ### purge upstream sequences of the family
#     &PurgeSequences() if ($task{purge});

#     ### oligo-analysis
#     if ($task{oligos}) {
#       ## This method runs oligo-analysis + merges patterns of different length + runs pattern-assembly
#       &OligoAnalysis();

#     } else {
#       ## oligo merging can be done a posteriori with the option -task merge_oligos
#       &MergeOligoLengths() if ($task{merge_oligos});

#       ## pattern assembly can be done a posteriori with the option -task assemble_oligos
#       &AssembleOligos() if ($task{assemble_oligos});
#     }

#     ## Convert oligos to PSSM
#     if ($task{oligos_pssm}) {
#       my $pssm_command = &MatrixFromPatterns($family{$family_name}->{oligo_assembly_file},
# 					     $family{$family_name}->{oligo_pssm_file}, "oligos");
#       if ($main::batch) {
# 	push @main::batch_commands, $pssm_command;
#       } else {
# 	&doit($pssm_command, $dry_run, $die_on_error, $verbose);
#       }
#     }

#     if ($task{oligo_maps}) {
#       ### feature-maps of oligonucleotide occurrences
#       &MatchPatterns($family{$family_name}->{oligo_file},
# 		     $family{$family_name}->{oligo_file},
# 		     " -N 4");
#       &DrawFeatureMap($family{$family_name}->{oligo_file});

#       ## scan sequences with the PSSM built from significant oligos
#       if (-e $family{$family_name}->{oligo_pssm_file}."_count_matrices.txt","tab") {
# 	&ScanSequences($family{$family_name}->{oligo_pssm_file}."_count_matrices.txt","tab",
# 		       $family{$family_name}->{oligo_pssm_file}," -uth Pval 0.00025");
# 	&DrawFeatureMap($family{$family_name}->{oligo_pssm_file});
#       }
#     }



#     ### dyad analysis
#     &DyadAnalysis() if ($task{dyads});

#     ## Convert dyads to PSSM
#     if ($task{dyads_pssm}) {
#       my $pssm_command = &MatrixFromPatterns($family{$family_name}->{dyad_assembly_file},
# 					     $family{$family_name}->{dyad_pssm_file}, "dyads");
#       if ($main::batch) {
# 	push @main::batch_commands, $pssm_command;
#       } else {
# 	&doit($pssm_command, $dry_run, $die_on_error, $verbose);
#       }
#     }

#     ### dyad feature-maps
#     if ($task{dyad_maps}) {
#       &DrawDyadFeatureMap();

#       ## scan sequences with the PSSM built from significant dyads
#       if (-e $family{$family_name}->{dyad_pssm_file}."_count_matrices.txt","tab") {
# 	&ScanSequences($family{$family_name}->{dyad_pssm_file}."_count_matrices.txt","tab",
# 		       $family{$family_name}->{dyad_pssm_file}," -uth Pval 0.00025");
# 	&DrawFeatureMap($family{$family_name}->{dyad_pssm_file});
#       }
#     }

#     ### Jerry Hertz' consensus
#     if ($task{consensus}) {
#       &Consensus();
#     }
#     if ($task{consensus_maps}) {
#       &ScanSequences($family{$family_name}->{consensus_file}."_count_matrices.txt","consensus",
# 		     $family{$family_name}->{consensus_file}," -uth Pval 0.00025");
#       &DrawFeatureMap($family{$family_name}->{consensus_file});
#     }

#     ### Andrew Neuwald's gibbs
#     &Gibbs() if ($task{gibbs});
#     if ($task{gibbs_maps}) {
#       &ScanSequences($family{$family_name}->{gibbs_file}."_count_matrices.txt","gibbs",
# 		     $family{$family_name}->{gibbs_file}," -uth Pval 0.00025");
#       &DrawFeatureMap($family{$family_name}->{gibbs_file});
#     }

#     ### Roth's AlignACE
#     &AlignACE() if ($task{AlignACE});
#     if ($task{AlignACE_maps}) {
#       &ScanSequences($family{$family_name}->{AlignACE_file}."_count_matrices.txt","AlignACE",
# 		     $family{$family_name}->{AlignACE_file}," -uth Pval 0.00025");
#       &DrawFeatureMap($family{$family_name}->{AlignACE_file});
#     }

#     ### infogibbs
#     &infogibbs() if ($task{infogibbs});
#     if ($task{infogibbs_maps}) {
#       &ScanSequences($family{$family_name}->{infogibbs_file}."_count_matrices.txt","infogibbs",
# 		     $family{$family_name}->{infogibbs_file}," -uth Pval 0.00025");
#       &DrawFeatureMap($family{$family_name}->{infogibbs_file});
#     }

#     ### Gert Thijs' MotifSampler
#     &MotifSampler() if ($task{MotifSampler});

#     ### MEME
#     &MEME() if ($task{meme});
#     if ($task{meme_maps}) {
#       &ScanSequences($family{$family_name}->{meme_file}."_count_matrices.txt","meme",
# 		     $family{$family_name}->{meme_file}," -uth Pval 0.00025");
#       &DrawFeatureMap($family{$family_name}->{meme_file});
#     }

#     ### orm
#     &orm() if ($task{orm});

#     ## Convert orm assembly to PSSM
#     if ($task{orm_pssm}) {
#       my $pssm_command = &MatrixFromPatterns($family{$family_name}->{orm_assembly_file},
# 					     $family{$family_name}->{orm_pssm_file}, "orm");
#       if ($main::batch) {
# 	push @main::batch_commands, $pssm_command;
#       } else {
# 	&doit($pssm_command, $dry_run, $die_on_error, $verbose);
#       }
#     }


#     ### orm feature-maps
#     if ($task{orm_maps}) {
#       &MatchPatterns($family{$family_name}->{orm_file},
# 		     $family{$family_name}->{orm_file},
# 		     " -N 4");
#       &DrawFeatureMap($family{$family_name}->{orm_file});
#     }

#     #	### merge oligo-analysis and dyad-analysis results
#     #	&MergePatterns() if ($task{merge_patterns});

#     #	### Match all patterns together with a sliding window
#     #	&SlidingWindow() if ($task{slide});
#     ### delete upstream sequence file
#     if ($task{clean}) {
#       &RSAT::message::TimeWarn("Cleaning files from the hard drive for family $family_name") if ($verbose >= 2);
#       foreach my $file ($family{$family_name}->{seq_file},
# 			$family{$family_name}->{seq_file_purged},
# 			$family{$family_name}->{merged_ft_file},
# 			$family{$family_name}->{orm_ft_file},
# 			$family{$family_name}->{dyad_ft_file}
# 		       ) {
# 	&RSAT::message::Info("\t$file") if ($verbose >=2);
# 	$command = "rm -f $file";
# 	if ($main::batch) {
# 	  push @main::batch_commands, $command;
# 	} else {
# 	  &doit($command, $dry_run, $die_on_error, $verbose);
# 	}
#       }
#     }

#     ## Compare discovered motifs with motif databases
#     if ($task{motifs_vs_db}) {
#       my @matrix_types = ("oligo", "dyad", "orm");
#       foreach my $matrix_type (@matrix_types) {
# 	my $matrix_prefix = $family{$family_name}->{$matrix_type."_pssm_file"};
# 	my $matrix_file = $matrix_prefix."_count_matrices.tf";
# 	if (-e $matrix_file) {
# 	  my $compa_prefix = $matrix_prefix."_vs_jaspar";
# 	  &MotifsVsDB($matrix_file, "transfac", $ENV{RSAT}."/data/motif_databases/JASPAR/jaspar_matrices.tf", "transfac", $compa_prefix);
# 	} else {
# 	  &RSAT::message::Warning("Motif versus DB", $matrix_type, "Matrix file does not exist", $matrix_file);
# 	}
#       }
#     }

#     ## Send the batch script to the queue
#     if (($batch) && (scalar(@batch_commands) > 0)) {
#       my $batch_script = join ";", @main::batch_commands;
#       &doit($batch_script, $dry_run, $die_on_error, $verbose, $batch, $family_name);
#       @main::batch_commands = ();
#     }
#   }
# }

# ################################################################
# ## Execute a command, with optional echo
# #sub doit {
# #    my ($command) = @_;
# #    print "\n$command\n" if ($verbose >= 2);
# #    system $command unless $dry_run;
# #}

# # ## Skip initial clusters if specified with -skip option
# # sub CheckSkip {
# #     my ($fam_count,$family_name) = @_;
# #     if ($fam_count <= $skip) {
# # 	warn join ("\t", "; Skipping family",
# # 		   $fam_count, $family_name,
# # 		       $gene_nb." genes",
# # 		   "\t(skip ".$skip. ")",
# # 		   ), "\n" if ($verbose >= 1);
# # 	next;
# #     }
# # }

# # ## Stop after a few clusters
# # sub CheckLast {
# #     my ($fam_count) = @_;
# #     if (($last >0) && ($fam_count > $last)) {
# # 	&RSAT::message::Warning("Stopped after $last genes");
# # 	last;
# #     }
# # }

# ########################## subroutine definition ############################

# sub PrintHelp {
# #### display full help message #####
#    $HELP_FAMILY_FILE = &help_message("class file");
#    open HELP, "| more";
#    print HELP <<End_of_help;
# NAME
# 	multiple-family-analysis

# VERSION
#        $program_version

# AUTHOR
#        Since 1999 by Jacques van Helden (jvanheld\@bigre.ulb.ac.be)
#        Since 2004 Olivier Sand (oly\@bigre.ulb.ac.be)

# USAGE
#        multiple-family-analysis [-i inputfile] [-v]

# DESCRIPTION
# 	Runs a combination of programs in order to discover significant
# 	patterns in the upstream regions of several gene clusters.

# 	This script is a way to auto;atize the systematic analysis of
# 	multiple clusters of co-regulated genes, such as those
# 	obtained with DNA chip experiments.

# 	The program reads the composition of several clusters in a
# 	single text file, then executes different program discovery
# 	programs, and generates a summary report.

# 	Results are subdifided in directories created on the fly, one
# 	directory per family plus one summary directory.

# CATEGORY
# 	sequences
# 	pattern discovery

# OPTIONS
# 	-h	(must be first argument) display full help message

# 	-help	(must be first argument) display options

# 	-v	verbose

# 	-i	family file (incompatible with -seq)

# 	-mask upper|lower
# 		Mask lower or uppercases, respecively, i.e. replace
# 		selected case by N characters.

# 	-seq_list
# 	        List of input sequences files (incompatible with -i)
# 		This option allows to specify a file containing a list
# 		of sequence files. The first word of each line must
# 		specify the path of a sequence file. Each sequence
# 		file must be in fasta format.

# 	-all_seq
# 		A single file containing all the sequences.
# 		This option is convenient to analyze for example
# 		results of ChIP-chip experiments, where there is one
# 		particular sequence associated to each probe.
# 		Mutually incompatible with -seq_list.

# 	-all_seq_format
# 		Input sequence format. This option is useful only in
# 		combination with the option -all_seq.

# 	-bg_seq

# 	        Specify a file containing a set of sequences used to
# 	        compute background models.  Background sequences must
# 	        be formatted in fasta.

# 	        Background models are only computed if the option
# 	        -task compute_bg is called.

# 	-skip # skip the first # data sets when performing the
# 		analyses (this si useful when the program has been
# 		interrupted after a considerable amount of work).

# 	-last # stop after the first # data sets when performing the
# 		analyses (this si useful to perform quick tests).

# 	-select	fam1[,fam2,fam3,...]
# 		Perform the analysis of selected clusters only.

# 	-maindir
# 		main directory (by default, the working directory is used)

# 	-outdir	output directory

# 	-mingenes #
# 		minimal number of genes per family. Clusters with less
# 		than this number are skipped.

# 	-maxgenes #
# 		maximal number of genes per family. Clusters with more
# 		than this number are skipped.

#    Sequence retrieval options
# 	-org	organism


# 	-org_fam
# 		When this option is used, the name of the family
# 		(second column of the family file) is used as
# 		organism. This allows to automatically perform a
# 		genome-per-genome analysis of the orthologs of a
# 		regulon from some model organism.

# 		Typically, the input for this option is obtained from
# 		the program get-orthologs: starting from a regulon
# 		from some model organism (e.g. Saccharomyces
# 		cerevisiae), one wants to discoer motifs in the
# 		corresponding orthologous genes in other species
# 		(e.g. each other species of Fungi).

# 		This option is incompatible with -org.

# 	-taxon	taxon
# 		If this option is specified, sequences are retrieved
# 		for all the orthologs of the input genes in the
# 		selected taxon. This approach relies on the detection
# 		of phylogenetic footprints (conserved elements in
# 		non-coding sequences). If the taxon is chosen in an
# 		appropriate way, it increases the sensitivity of the
# 		analysis.

# 	-noorf	exclude upstream ORFs from upstream sequences

# 	-rm	use repeat masked version  of the genome

# 	-orfov  do not exclude upstream ORFs from upstream sequences

# 	-from	upstream region left limit

# 	-to	upstream region right limit

#        -feattype
# 		feature type (e.g. CDS, mRNA)

# 	-seq_type
# 	       sequence type (upstream, downstream, ORF)

#         -size_names
#                 Use the old naming system (obsolete, only for backward
#                 compatibility). In the old naming system, file names
#                 included the upstream size. New names are more
#                 precise, indicating the -from and -to parameters. This
#                 avoids ambiguity in some cases.


#    Sequence purging option

#    	For pattern discovery, it is essential to purge sequences
#    	i.e. to mask redundant fragments. Such redundant fragments can
#    	come from genomic repeats (e.g. in duplicated genes in
#    	telomeric regions), or from neighbour genes sharing the same
#    	promoter, or , when working with multi-taxa sequences, the
#    	fact that several promoters were retrieved from closely
#    	related species (e.g. various strains of E.coli). These
#    	repeats give a strong bia on the statistical estimation of
#    	motif over-representation, and lead thus to a large number
#    	of false positive motif.

# 	The approach followed here is to mask repeats (replace them
# 	with N characters) during the pattern discovery step, but use
# 	the unmasked sequences for pattern matching, in order to
# 	locate all the putative instances of the discovered motifs.

# 	-purge	use purged sequences for pattern discovery
# 		(default)

# 	-nopurge
# 		use non-purged sequences for pattern discovery

#         -purge_ml #
# 		matching length above which repeats have to be masked

#         -purge_mis #
# 		number of accetped mismatches to consider two sequence
# 		segments as repats to be masked.

#    oligo-analysis and dyad-analysis options

#    	-two_tails
# 		Perform a two-tails test for oligo-analysis and
# 		dyad-analysis. The two-tails test detects both
# 		under-represented and over-represented motifs.

# 	-1str   strand-sensitive analysis

# 	-2str   strand-insensitive analysis

# 	-htmaps	draw html maps (dynamic map with pointers to the features)
# 		These html files take space on the hard drive, so I
# 		suppressed them from the default output.

# 	-noov	prevent overlapping matches for self-overlapping patterms
# 		(default)

# 	-ovlp	allow overlapping matches for self-overlapping patterms

# 	-nomap	do not draw feature maps (for saving time and hard disk space)

# 	-bg	background frequency model
# 		This parameter indicates which background model has to
# 		be used for the estimation of expected pattern
# 		frequencies.
# 		Supported: $supported_background

# 	-calib_dir

# 		Directory containing the calibration files for the
# 		negative binomial. The calibration file name is then
# 		calculated automatically from the conditions (seq
# 		lengths, seq number, ...)

# 	-calibN_repet #
# 		Number of repetitions for the calibration.

# 	-thosig #
# 		Threshold on occurrence significance.
# 		(obsolete: use -lth occ_sig instead)

# 	-lth param value
# 		Lower threshold on some parameter. All patterns with a
# 		parameter value smaller than the threshold are
# 		discarded.
# 		Supported parameters: $supported_thresholds
# 	      	Example: select patterns with a positive value for the
# 	      	occurrence significance.

# 			 -lth occ_sig 0

# 	-uth param value
# 		Upper threshold on some parameter. All patterns with a
# 		parameter value higher than the threshold are
# 		discarded.
# 		Supported parameters: $supported_thresholds
# 		Example: to select no more than 50 patterns
# 		        -uth rank 50

# 	-toppat #
# 		(default $toppat)
# 		Maximal number of patterns to take into account for
# 		the assembly. This parameter is passed to the command
# 		pattern-assembly.

#    oligo-analysis specific options
# 	-thmsf threshold on frequency of matching sequences (propotion
# 	        of sequences with at least one occurrence of the
# 	        pattern)

# 	-thmssig threshold on the significance of matching sequences
# 	        (sequences with at least one occurrence of the
# 	        pattern).

# 	-maxol	maximum oligonucleotide length

# 	-minol	minimum oligonucleotide length

# 	-oligo_exp_freq
# 		name of a file containing the expected frequencies for
# 		oligo-analysis.

# 	-pseudo	pseudo-weight (see oligo-analysis manual)

# 	-markov #
# 		calculate expected frequencies on the basis of a
# 		Markov chain model of order #

#    dyad-analysis specific options
# 	-monad  monad length for dyad-analysis

# 	-minsp	maximum spacing for dyad-analysis

# 	-maxsp	maximum spacing for dyad-analysis

# 	-dyad_exp_freq
# 		name of a file containing the expected frequencies for
# 		dyad-analysis.

#    feature-map options
#    	-origin	 #
# 		Origin for pattern matching positions, that will
# 		determine the way patterns are displayed on the
# 		feature-maps (default: -0, i.e. negative coordinates).
# 		This parameter is passed to dna-pattern and matrix-scan.

# 	-scalestep
# 		Distance between the vertical ticks on the scale bar
# 		of the feature maps.
# 		This parameter is passed to feature-map.

#    orm-specific options

# 	orm is a pattern discovery program developed by Matthieu
# 	Defrance. It should be installed in the contribution dir:
# 		    $RSAT/python-scripts/orm

# 	orm.py detects windows containing over-represented words, by
# 	comparison with the background model. More information can be found
# 	in the ORM documentation.

#    Matrix-based pattern discovery options (consensus, gibbs, AlignACE, meme, MotifSampler, infogibbs)
#        -width  matrix width (default $matrix_width)
#        -sps	expected number of sites per sequence
# 		(default $expected_sites_per_seq)
#        -nmotifs number of motifs to discover per sequence set (family)

#    Gibbs sampler (Neuwald, 1995)
# 	-seed	 seed number for the random generator

#    AlignACE (Roth, 1998)
# 	-seed	 seed number for the random generator

#    MotifSampler (Thijs, 2001)
# 	-MS_b	backgound file
# 	-MS_p	prior probability of 1 motif copy (default $MS_p)
# 	-MS_M   Maximal number of motif instances per sequence. (default $MS_M; unset=0)
# 	-MS_n	number of different motifs to search for (default $MS_n).
# 	-MS_x	allowed overlap between different motifs. (default $MS_x)
# 	-MS_r	number of times the MotifSampler should be repeated (default = $MS_r).
# 	Note	-MS_w is replaced by -width

#    MEME options (Bailey)
# 	Any option starting with -MEME_ will be passed to MEME.
# 	In particular, the following options are passed by default.
#    	-MEME_minw	minimum motif width (default $MEME_minw)
#    	-MEME_maxw	maximum motif widht (default $MEME_maxw)
# 	-MEME_bfile     name of background Markov model file

#    info-gibbs options (Defrance, 2008)
# 	Any option starting with -infogibbs_ will be passed to info-gibbs.
# 	The list of supported options can be obtained with the command
# 	    info-gibbs -h

#    Output options
# 	-transp	transpose synthetic result table (columns become rows)
# 	-sort sort_key
# 	        sort key for the synthetic table
# 		Supported: $supported_sort_keys

#   Options for the HTML synthesis table
#     -fam_link_pref prefix
#     -fam_link_pref suffix
#        prefix and suffix to put before and before family names in
#        order to build a specific HTML hyperlink for each family name.

#        Example (with prefix only):
#          -fam_link_pref 'http://amigo.geneontology.org/cgi-bin/amigo/term-details.cgi?term=GO:'
# 	will replace the family name 0003997 by a link of the type
# 	  <a href=http://amigo.geneontology.org/cgi-bin/amigo/term-details.cgi?term=GO:0003997>0003997</a>

#     -gene_link_pref prefix
#     -gene_link_pref suffix
#        prefix and suffix to put before and before family names in
#        order to build a specific HTML hyperlink for each gene name.

#        Example (with prefix only):
#          -gene_link_pref 'http://db.yeastgenome.org/cgi-bin/locus.pl?locus='
# 	   will create a link from the gene YBR093C to the URL
# 	   http://db.yeastgenome.org/cgi-bin/locus.pl?locus=YBR093C

#   Options for the automaticaly generated SQL scripts
# 	-schema database schema (default: $schema)
# 	-host	database host (default: $host)
# 	-user	database user (default: $user)
# 	-password
# 		database password (default: $password)

#    Other options
# 	-task selected_task
# 		Select the tasks to be performed.
# 		Supported tasks: $supported_tasks

# 		Can be used iteratively on the same command line to
# 		select multiple tasks.

# 		Example:
# 		    -task upstream -task oligos -task synthesis
# 		For a full analysis, simply type
# 		    -task all

# 		See details below (TASK DESCRIPTION)

# 	-known	known_site_file
# 		A file containing a list of known sites/motifs for
# 		each of the gene clusters. These known sites are NOT
# 		used during the pattern discovery. They are simply
# 		displayed besides the discovered patterns, for the
# 		sake of comparison.

# 		Format: the known site file is a tab-delimited text
# 		files, with the collofing column content :
# 		       1) site/motif sequence
# 		       2) family
# 		       3) source (optional)

# 		Example
# 		=======
# 		; sequence	family	source
# 		gGGAAAaTGAAACT	ISGF-3	TRANSFAC:R00001
# 		AGGAAATAGAAACT	ISGF-3	TRANSFAC:R00003
# 		ggagGGGCGGccct	Sp1	TRANSFAC:R14218
# 		...

#     	-known_max_len #
# 		maximal length for using known sites. This allows to
# 		filter out some sites with large sequences annotated in
# 		TRANSFAC (default: $known_site_max_len)

#         -db     database_file
#                 A file containing a list of known sites (from a database)

# 	-batch (advanced users)
# 		Send time-consuming jobs on a batch queue which will
# 		be executed on a PC cluster. This option only works on
# 		our lab cluster, but could be adapted for other
# 		configurations by adapting the method &doit() in the
# 		utilities ($RSAT/lib/RSA.lib).

# 	-nodie
# 		Continue the analysis even if errors are raised by
# 		sub-processes. By default, multiple-family-analysis
# 		stops if an error has occurred in one of the
# 		sub-programs. However, in some particular the user
# 		might want to ignore such errors.

# 		In particular, this option is useful to circumvent a
# 		problem encountered with the gibbbs sampler under some
# 		operating systems (Linux but not Mac OSX): the gibbs
# 		program returns an error code to the system even when
# 		the pattern discovery was successful.

# FILE FORMAT
#    $HELP_FAMILY_FILE

# QUICK START
#    1) create a directory for the set of clusters.

#    2) witin this directory, create a text file contaning the
#       description of family composition, as described above.

#    3) run the command. For example, with a bacterial genome :

#       multiple-family-analysis -org organism -i family_file \
#           -v -from -200 -to 49 -task all

#    4) With a web browser (e.g. Netscape), open the folder
#       'synthetic_tables'. There should be a HTML file with the name
#       of your family ile as prefix, and the suffix indicating the
#       other analysis parameters. Open this file and analyze the
#       result.

# TASK DESCRIPTION

#    all		Perform all the tasks below.

#    upstream	Retrieve upstream sequences for the clusters.
# 		(retrieve-seq)

#    purge	Purge upstream sequences for the family.
# 		(purge-sequences)

#    compute_bg 	Compute background models from a set of background
#       		sequences. Background sequences must be specified with
#       		the option -bg_seq.

#    calibrate
#    		Perform a word-specific calibration of expected
# 		frequencies, by calculating oligonucleotide
# 		distributions in the whole genome set of upstream
# 		sequences of the same size.

# 		Beware: this takes time, but, for higher organisms, it
# 		gives much less false positives than the standard
# 		background models.

#    calibN
# 		Perform a family-specific calibration of expected
# 		oligonucleotide frequencies by calculating occurrences
# 		in N random selections of genes.

# 		Beware: this takes a lot of time, but it strongly
# 		reduces the rate of false positives for higher
# 		organisms. We did not yet measure the rate of true
# 		positives remaining after this calibration.

#    oligos	Run oligo-analysis on each family.

#    dyads	Run dyad-analysis on each family.

#    maps	Draw feature maps with the results of oligo-analysi and
# 		dyad-analysis.
# 		(feature-map)

#    merge	Merge the results of oligo-analysis and dyad-analysis.

#    slide	Match discovered patterns with a sliding window.
# 		(dna-pattern)

#    synthesis	Generate a HTML report in the directory sythetic_tables.

#    sql		Export the discovered patterns in tab-delimited files,
# 		and export SQL scripts for loading these tables in a
# 		relational database.

#    clean	Delete upstream sequences after processing, in order
# 		to save disk space.

#    validate	Validate discovered patterns by comparing them to the list
#    		of known sites. This requires to specify a collection
#    		of know sites with the option -known.

#    db_match     Compare discovered patterns to a list of known sites
#                 (for instance a whole database). This requires to specify
#                 the list of known sites with the option -db.

# WISH LIST

#   -bg_seq must still be adapted for several programs
#   	  dyad-analysis
# 	  MotifSampler
# 	  info-gibbs (same bg format as MotifSampler)

# End_of_help
#  close HELP;
#  exit;
# }

# ################################################################
# #### Read arguments
# sub ReadArguments {

#   foreach my $a (0..$#ARGV) {
#     ### verbose ###
#     if ($ARGV[$a] eq "-v") {
#       $verbose = $ARGV[$a+1];
#       unless (&IsNatural($verbose)) {
# 	$verbose = 1;
#       }

#       #### dry run
#     } elsif ($ARGV[$a] eq "-n") {
#       $dry_run = 1;

#       #### Sequence puring options
#     } elsif ($ARGV[$a] eq "-nopurge") {
#       $analyze_purged_sequences = 0;

#     } elsif ($ARGV[$a] eq "-purge") {
#       $analyze_purged_sequences = 1;

#     } elsif ($ARGV[$a] eq "-purge_ml") {
#       $purge_ml = 1;

#     } elsif ($ARGV[$a] eq "-purge_mis") {
#       $purge_mis = 1;

#       ### detailed help
#     } elsif ($ARGV[$a] eq "-h") {
#       &PrintHelp();

#       ### list of options
#     } elsif ($ARGV[$a] eq "-help") {
#       &PrintOptions();

#       ### family file
#     } elsif (($ARGV[$a] eq "-i") ||
# 	     ($ARGV[$a] eq "-fam") # for compatibility with previous versions
# 	    ) {
#       $family_file = $ARGV[$a+1];

#       ### skip some data sets
#     } elsif ($ARGV[$a] eq "-skip") {
#       &FatalError("The option -skip is incompatible with the option -select") if (scalar(@selected) > 0);
#       $skip = $ARGV[$a+1];
#       &RSAT::error::FatalError("Invalid number with option -skip\t$skip") unless &IsNatural($skip);

#       ### sequence file list
#     } elsif ($ARGV[$a] eq "-seq_list") {
#       $sequence_file_list = $ARGV[$a+1];
#     } elsif ($ARGV[$a] eq "-seq") {
# 	&RSAT::message::Warning("Option -seq is obsolete, you should use the option -seq_list.");
# 	$sequence_file_list = $ARGV[$a+1];

#       ### Single file with all sequences
#     } elsif ($ARGV[$a] eq "-all_seq") {
#       $all_seq_file = $ARGV[$a+1];

#       ### input sequence format
#     } elsif ($ARGV[$a] eq "-all_seq_format") {
#       $all_seq_format = $ARGV[$a+1];

#       ### File containing sequences used to estimate the background model
#     } elsif ($ARGV[$a] eq "-bg_seq") {
#       $bg_seq_file = $ARGV[$a+1];
#       &RSAT::error::FatalError("Background sequence file does not exists", $bg_seq_file) unless (-e $bg_seq_file);

#       ### stop after some data sets
#     } elsif ($ARGV[$a] eq "-last") {
#       &FatalError("The option -last is incompatible with the option -select") if (scalar(@selected) > 0);
#       $last = $ARGV[$a+1];
#       &RSAT::error::FatalError("Invalid number with option -last\t$last") unless &IsNatural($last);

#       ### stop after some data sets
#     } elsif ($ARGV[$a] eq "-select") {
#       &FatalError("The option -delete is incompatible with the option -last") if ($last > 0);
#       &FatalError("The option -delete is incompatible with the option -skip") if ($skip > 0);
#       push (@selected, (split ",", $ARGV[$a+1]));

#       ## mask
#     } elsif ($ARGV[$a] eq "-mask") {
#       $mask = $ARGV[$a+1];
#       &CheckMask($mask);	

#       ### main directory
#     } elsif ($ARGV[$a] eq "-maindir") {
#       $dir{main} = $ARGV[$a+1];

#       ### output directory
#     } elsif ($ARGV[$a] eq "-outdir") {
#       $dir{output} = $ARGV[$a+1];

#       ### min number of genes
#     } elsif ($ARGV[$a] eq "-mingenes") {
#       $min_genes = $ARGV[$a+1];

#       ### max number of genes
#     } elsif ($ARGV[$a] eq "-maxgenes") {
#       $max_genes = $ARGV[$a+1];

#       ### max number of patterns to assemble
#     } elsif ($ARGV[$a] eq "-toppat") {
#       $toppat = $ARGV[$a+1];

#       ### upstream sequence lmits
#     } elsif (($ARGV[$a] eq "-from") &&
# 	     (&IsInteger($ARGV[$a+1]))) {
#       $from = $ARGV[$a+1];
#     } elsif (($ARGV[$a] eq "-to") &&
# 	     (&IsInteger($ARGV[$a+1]))) {
#       $to = $ARGV[$a+1];

#       ### strands
#     } elsif ($ARGV[$a] eq "-1str") {
#       $force{strands} = "-1str";
#     } elsif ($ARGV[$a] eq "-2str") {
#       $force{strands} = "-2str";

#       ### oligo-analysis parameters
#     } elsif ($ARGV[$a] eq "-minol") {
#       $min_oligo_len = $ARGV[$a+1];;
#     } elsif ($ARGV[$a] eq "-maxol") {
#       $max_oligo_len = $ARGV[$a+1];;

#       ### dyad-analysis parameters
#     } elsif ($ARGV[$a] eq "-monad") {
#       $monad_length = $ARGV[$a+1];;
#     } elsif ($ARGV[$a] eq "-minsp") {
#       $min_sp = $ARGV[$a+1];;
#     } elsif ($ARGV[$a] eq "-maxsp") {
#       $max_sp = $ARGV[$a+1];;

#       ## feature-map parameters
#     } elsif ($ARGV[$a] eq "-origin") {
#       $map_origin = $ARGV[$a+1];;
#     } elsif ($ARGV[$a] eq "-scalestep") {
#       $scalestep = $ARGV[$a+1];;

#       ### matrix-based pattern discovery
#     } elsif ($ARGV[$a] eq "-width") {
#       $matrix_width = $ARGV[$a+1];
#     } elsif ($ARGV[$a] eq "-sps") {
#       $expected_sites_per_seq = $ARGV[$a+1];
#     } elsif ($ARGV[$a] eq "-nmotifs") {
#       $nmotifs = $ARGV[$a+1];
#       $MEME_options{nmotifs} = $nmotifs;
#       $MS_n = $nmotifs;

#       ### Gibbs sampler + AlignACE + infogibbs seed option
#     } elsif ($ARGV[$a] eq "-seed") {
#       $seed = $ARGV[$a+1];

#       ### MotifSampler options
#     } elsif ($ARGV[$a] =~ /-(MS_\S+)/) {
#       $$1 =  $ARGV[$a+1];

#       ### MEME options
#     } elsif ($ARGV[$a] =~ /-MEME_(\S+)/) {
#       my $option = $1;
#       my $value =  $ARGV[$a+1];
#       push @MEME_options, $option unless (defined($MEME_options{$option}));
#       $MEME_options{$option} = $value;

#       ### info-gibbs options
#     } elsif ($ARGV[$a] =~ /-infogibbs_(\S+)/) {
#       my $option = $1;
#       my $value =  $ARGV[$a+1];

#       ## Replace long option name by short option name if defined
#       $option =~ s/^verbosity$/v/;
#       $option =~ s/^length$/l/;
#       $option =~ s/^strand$/s/;
#       $option =~ s/^iter$/n/;
#       $option =~ s/^words$/w/;
#       $option =~ s/^expected$/e/;
#       $option =~ s/^motifs$/m/;
#       #      $option =~ s/^bgfile$/b/;
#       $option =~ s/^dmin$/d/;

#       ## Avoid redundant parameters
#       if ($option eq "s") {
# 	&RSAT::error::FatalError("info-gibbs option: Strand should be defined with options -2str|-1str");
#       } elsif ($option eq "i") {
# 	&RSAT::error::FatalError("info-gibbs option: Input file is automaticaly specified by multiple-family-analysis");
#       } elsif ($option eq "l") {
# 	&RSAT::error::FatalError("info-gibbs option: Matrix width should be defined with option -w");
#       } elsif ($option eq "m") {
# 	&RSAT::error::FatalError("info-gibbs option: Number of motifs width should be defined with option -nmotif");
#       } elsif ($option eq "e") {
# 	&RSAT::error::FatalError("info-gibbs option: Expected number of sites per sequence should be defined with option -sps");
#       }
#       push @infogibbs_options, $option unless (defined($infogibbs_options{$option}));
#       $infogibbs_options{$option} = $value;

#       ### ORM options
#     } elsif ($ARGV[$a] =~ /-(ORM_\S+)/) {
#       $$1 =  $ARGV[$a+1];

#       ### infogibbs options
#     } elsif ($ARGV[$a] =~ /-(infogibbs_\S+)/) {
#       $$1 =  $ARGV[$a+1];

#       ### organism
#     } elsif ($ARGV[$a] eq "-org") {
#       $organism_name = $ARGV[$a+1];

#       ### organisms are specified as family names
#     } elsif ($ARGV[$a] eq "-org_fam") {
#       $org_fam = 1;

#       ### Old naming system, maintained for backward compatibility
#     } elsif ($ARGV[$a] eq "-size_names") {
#       $size_names = 1;

#       ### taxon
#     } elsif ($ARGV[$a] eq "-taxon") {
#       $taxon = $ARGV[$a+1];

#       ### file with a collection of known motifs, for comparison between discovered and known motifs
#     } elsif ($ARGV[$a] eq "-known") {
#       $known_site_file = $ARGV[$a+1];
#     } elsif ($ARGV[$a] eq "-known_max_len") {
#       $known_site_max_len = $ARGV[$a+1];

#       ### file with a collection of known motifs, for comparison between discovered and known motifs
#     } elsif ($ARGV[$a] eq "-db") {
#       $db_site_file = $ARGV[$a+1];

#       ### file with a collection of known motifs, for comparison between discovered and known motifs
#     } elsif ($ARGV[$a] eq "-dbname") {
#       $db_site_name = $ARGV[$a+1];

#       #### task selection
#     } elsif ($ARGV[$a] eq "-task") {
#       my @requested_tasks = split ",", $ARGV[$a+1];
#       foreach my $task (@requested_tasks) {
# 	next unless $task;
# 	#	    $task = $ARGV[$a+1];
# 	if ($supported_task{$task}) {
# 	  $task{$task} = 1;
# 	} else {
# 	  &RSAT::error::FatalError("Unsupported task '$task'. \n\tSupported: $supported_tasks");
# 	}
#       }

#       #### transpose synthetic table
#     } elsif ($ARGV[$a] =~ /^-transp/) {
#       $transpose = 1;

#       #### family link prefix
#     } elsif ($ARGV[$a] =~ /^-fam_link_pref/) {
#       $fam_link_prefix = $ARGV[$a+1];
#       #### family link suffix
#     } elsif ($ARGV[$a] =~ /^-fam_link_suff/) {
#       $fam_link_suffix = $ARGV[$a+1];

#       #### gene link prefix
#     } elsif ($ARGV[$a] =~ /^-gene_link_pref/) {
#       $gene_link_prefix = $ARGV[$a+1];
#       #### gene link suffix
#     } elsif ($ARGV[$a] =~ /^-gene_link_suff/) {
#       $gene_link_suffix = $ARGV[$a+1];

#       #### sort key
#     } elsif ($ARGV[$a] =~ /^-sort/) {
#       $sort_key = $ARGV[$a+1];
#       unless ($supported_sort_key{$sort_key}) {
# 	&RSAT::error::FatalError("$sort_key is not supported as sort key");
#       }

#       #### clip upstream ORFs fom upstream regions
#     } elsif ($ARGV[$a] =~ /^-noorf/) {
#       $noorf = "-noorf";

#       #### use repeat masked version of the genome
#     } elsif ($ARGV[$a] eq "-rm") {
#       $repeat_masked = 1;

#       #### do not clip upstream ORFs fom upstream regions
#     } elsif ($ARGV[$a] =~ /^-orfov/) {
#       $noorf = "";

#       #### Threshold on relative weight for compare-patterns
#     } elsif ($ARGV[$a] eq '-rel_w') {
#       $rel_w = $ARGV[$a+1];
#       &FatalError("Relative weight should be a real value comprised between 0 and 1") unless ((&IsReal($rel_w)) && ($rel_w >= 0) && ($rel_w <= 1));

#       ### Upper threshold
#     } elsif ($ARGV[$a] eq "-lth") {
#       my $thr_field = $ARGV[$a+1];
#       my $thr_value =  $ARGV[$a+2];
#       unless ($supported_threshold{$thr_field}) {
# 	&RSAT::error::FatalError("Invalid threshold field $thr_field. Supported: $supported_thresholds");
#       }
#       $lth{$thr_field} = $thr_value;
#       #	    &RSAT::message::Debug("Lower threshold", $thr_field, $thr_value, $lth{$thr_field});

#       ### Lower threshold
#     } elsif ($ARGV[$a] eq "-uth") {
#       my $thr_field = $ARGV[$a+1];
#       my $thr_value =  $ARGV[$a+2];
#       unless ($supported_threshold{$thr_field}) {
# 	&RSAT::error::FatalError("Invalid threshold field $thr_field. Supported: $supported_thresholds");
#       }
#       $uth{$thr_field} = $thr_value;

#       #### threshold
#     } elsif ($ARGV[$a] =~ /^-thosig/) {
#       &RSAT::error::FatalError("Option -thosig is deprecated. Please use '-lth occ_sig' instead.");

#       #### threshold on mseq frequency
#     } elsif ($ARGV[$a] =~ /^-thmsf/) {
#       &RSAT::error::FatalError("Option -thmsf is deprecated. Please use '-lth ms_freq' instead.");
# #      $lth{ms_freq} = $ARGV[$a+1];

#       #### threshold on mseq significance
#     } elsif ($ARGV[$a] =~ /^-thmssig/) {
#       &RSAT::error::FatalError("Option -thmssig is deprecated. Please use '-lth ms_sig' instead.");
# #      $lth{ms_sig} = $ARGV[$a+1];

#       #### dyad type
#     } elsif ($ARGV[$a] =~ /^-type/) {
#       $force{type} = $ARGV[$a+1];

#       #### feature types
#     } elsif ($ARGV[$a] eq "-feattype") {
#       my @types = split ",", $ARGV[$a+1];
#       foreach my $type (@types) {
# 	if ($supported_feature_types{lc($type)}) {
# 	  $accepted_feature_types{lc($type)}++;
# 	} else {
# 	  &RSAT::error::FatalError("$type invalid feature type. Supported: $supported_feature_types");
# 	}
#       }

#       #### sequence types
#     } elsif ($ARGV[$a] eq "-seq_type") {
#       $seq_type = $ARGV[$a+1];

#       #### expected frequency file for dyad-analysis
#     } elsif ($ARGV[$a] =~ /^-dyad_exp_freq/) {
#       $exp_freq_file{dyads} = $ARGV[$a+1];

#       #### expected frequency file for oligo-analysis
#     } elsif ($ARGV[$a] =~ /^-oligo_exp_freq/) {
#       $exp_freq_file{oligos} = $ARGV[$a+1];

#       #### Calibration directory
#     } elsif ($ARGV[$a] =~ /^-calib_dir/) {
#       $dir{calib1} = $ARGV[$a+1];
#       $dir{calibN} = $ARGV[$a+1];

#       #### Number of repetitions for the calibration
#     } elsif ($ARGV[$a] =~ /^-calibN_repet/) {
#       $calibN_repet = $ARGV[$a+1];
#       &FatalError("Invalid number of repetitions for the calibrations: should be at least 2, and a much larger value (e.g.100) is highly recommended.")
# 	unless ((&IsNatural($calibN_repet)) && ($calibN_repet >= 2));

#       #### background model for expected frequency.
#       #### Supported: "upstream", "upstream-noorf", "intergenic", "mncf", "upstream-rm", "upstream-noorf-rm"
#     } elsif (($ARGV[$a] =~ /^-exp/)
# 	     || ($ARGV[$a] =~ /^-bg/)) {
#       $background = $ARGV[$a+1];
#       $background =~ s/^ncf$/intergenic/;

#       unless ($supported_background{$background}) {
# 	&RSAT::error::FatalError("$background\tInvalid value for background. Supported : $supported_background");
#       }

#       #### Markov chain estimation of expected oligo frequencies
#     } elsif ($ARGV[$a] =~ /^-markov/) {
#       $markov_order = $ARGV[$a+1];
#       $markov=1;

#       #### prevent self-overlap
#     } elsif ($ARGV[$a] eq "-noov") {
#       $noov = "-noov";

#       #### two-tail test for oligo-analysis and dyad-analysis
#     } elsif ($ARGV[$a] eq "-two_tails") {
#       $two_tails = 1;

#       #### allow self-overlap
#     } elsif ($ARGV[$a] eq "-ovlp") {
#       $noov = "-ovlp";

#       #### prevent feature-map drawing
#     } elsif ($ARGV[$a] eq "-nomap") {
#       $draw_maps = 0;

#       #### draw htmaps
#     } elsif ($ARGV[$a] =~ /^-htmap/) {
#       $draw_maps = 1;
#       $htmaps = 1;

#       #### pseudo-frequency for oligoanalysis
#     } elsif ($ARGV[$a] =~ /^-oligo_pseudo/) {
#       $oligo_pseudo = $ARGV[$a+1];
#       unless ((&IsReal($oligo_pseudo)) && ($oligo_pseudo > 0)) {
# 	&RSAT::error::FatalError("pseudo-weight must be a positive real number");
#       }

#       #### pseudo-weight for matrix conversion
#     } elsif ($ARGV[$a] =~ /^-matrix_pseudo/) {
#       $matrix_pseudo = $ARGV[$a+1];
#       unless ((&IsReal($matrix_pseudo)) && ($matrix_pseudo > 0)) {
# 	&RSAT::error::FatalError("pseudo-weight must be a positive real number");
#       }

#       #### Options for the SQL scripts
#     } elsif ($ARGV[$a] eq "-schema") {
#       $schema = $ARGV[$a+1];
#     } elsif ($ARGV[$a] eq "-host") {
#       $host = $ARGV[$a+1];
#     } elsif ($ARGV[$a] eq "-user") {
#       $user = $ARGV[$a+1];
#     } elsif ($ARGV[$a] eq "-password") {
#       $password = $ARGV[$a+1];

#     } elsif ($ARGV[$a] eq "-pc_cluster") {
#       &RSAT::message::Warning("Option -pc_cluster is obsolete, use option -batch instead.");
#       $batch = 1;

#     } elsif ($ARGV[$a] eq "-batch") {
#       $batch = 1;

#     } elsif ($ARGV[$a] eq "-nodie") {
#       $die_on_error = 0;

#     }
#   }
# }



# ################################################################
# # Verbosity
# #
# sub Verbose {
#   my $verbose_message = "";
#   $verbose_message .= "; multiple-family-analysis ";
#   $verbose_message .= &PrintArguments()."\n";
#   printf "; %-22s\t%s\n", "Program version", $program_version;
#   printf "; clusters     \t%d\n", $#families + 1;
#   my $f = 0;
#   foreach my $family_name (@families) {
#     $f++;
#     printf ";\tfamily\t%d\t%s\t%d elements\n", $f, $family_name, $family{$family_name}->get_size();
#   }

#   if ($family_file) {
#     $verbose_message .= sprintf "; Family file\t%s\n", $family_file ;
#     $verbose_message .= sprintf "; Upstream regions\n";
#     $verbose_message .= sprintf ";\t%-22s\t%d\n", "Strands", $strands;
#     $verbose_message .= sprintf ";\t%-22s\t%d\n", "From", $from;
#     $verbose_message .= sprintf ";\t%-22s\t%d\n", "To", $to;
#   } elsif ($sequence_file_list) {
#     $verbose_message .= sprintf "; Sequence file list\t%s\n", $sequence_file_list;
#   }
#   $verbose_message .= sprintf "; %-13s\t%s\n", "Organism", $organism_name;

#   if ($task{oligos}) {
#     $verbose_message .= sprintf "; oligo-analysis\n";
#     $verbose_message .= sprintf ";\t%-22s\t%d\n", "max oligo length", $max_oligo_len;
#     $verbose_message .= sprintf ";\t%-22s\t%d\n", "min oligo length", $min_oligo_len;
#   }

#   if ($task{dyads}) {
#     $verbose_message .= sprintf "; dyad-analysis\n";
#     $verbose_message .= sprintf ";\t%-22s\t%d\n", "monad length", $monad_length;
#     $verbose_message .= sprintf ";\t%-22s\t%d\n", "min spacing", $min_sp;
#     $verbose_message .= sprintf ";\t%-22s\t%d\n", "max spacing", $max_sp;
#     $verbose_message .= sprintf ";\t%-22s\t%s\n", "dyad type", $dyad_type;
#     $verbose_message .= &PrintThresholdValues();
#     #	$verbose_message .= sprintf ";\t%-22s\t%g\n", "sig threshold", $lth{occ_sig};
#   }

#   if ($bg_seq_file) {
#     $verbose_message .= sprintf "; %-13s\t%s\n", "Background sequence file", $bg_seq_file;
#     foreach my $model (sort keys %bg_model_file) {
#       $verbose_message .= sprintf ";\t%-12s\t%s\n", $model, $bg_model_file{model};
#     }
#   } elsif ($background) {
#     $verbose_message .= sprintf "; %-13s\t%s\n", "Background model", $background;
#   } elsif ($markov) {
#     $verbose_message .= sprintf "; %-13s\t%s\n", "Background Markov order", $markov_order;
#   }

#   $verbose_message .= "; No overlap with upstream ORFs\n" if ($noorf);
#   if ($noov eq "-noov") {
#     $verbose_message .= "; Overlapping matches are discarded for self-overlapping patterns\n" ;
#   } else {
#     $verbose_message .= "; Overlapping matches are allowed for self-overlapping patterns\n" ;
#   }
#   warn $verbose_message;
#   return($verbose_message);
# }


# ################################################################
# ### initialize one directory for each family
# sub MakeDirectories {
#    foreach $family_name(@families) {
# 	$dir = "${family_name}/";
# 	unless (-d $dir) {
# 	    mkdir $dir, 0755 || &RSAT::error::FatalError("Cannot create directory", $dir);;
# 	}
# 	open FAM, ">$family{$family_name}->{family_file}" ||
# 	  &RSAT::error::FatalError("Cannot write family file", $family{$family_name}->{family_file});
# 	foreach $id ($family{$family_name}->get_members()) {
# 	  print FAM "$id\t$name{$id}\n";
# 	}
# 	close FAM;
#    }
# }


# ################################################################
# ### Generate tab files and SQL scripts for storing the results in a
# ### relational database
# sub ExportSQL {
#   &RSAT::message::TimeWarn("Generating SQL") if ($verbose >= 1);

#   &RSAT::message::debug($current_analysis, $current_analysis->get_attribute("id")) if ($verbose >= 10);

#   ## Set the parameters for the analysis as a whole
#   $current_analysis->set_attribute("date", $run_date);
#   $current_analysis->set_attribute("family_file", $family_file);
#   $current_analysis->set_attribute("organism", $organism_name);
#   $current_analysis->set_attribute("background", $background);
#   $current_analysis->set_attribute("up_from", $from);
#   $current_analysis->set_attribute("up_to", $to);
#   $current_analysis->set_attribute("orf_ovlp", $noorf);
#   $current_analysis->set_attribute("pattern_ovlp", $noov);
#   $current_analysis->set_attribute("strands", $strands);
#   $current_analysis->set_attribute("thosig", $lth{occ_sig});
#   $current_analysis->set_attribute("suffix", $table_suffix);

#   my %export_columns = ();
#   my $f = 0;
#   foreach my $family_name (@families) {
#     $f++;
#     &RSAT::message::TimeWarn("Generating SQL for family", $f."/".scalar(@families), $family_name) if ($verbose >= 2);

#     #### general family attributes
#     my $family_object = $family_factory->new_object(id=>$run_prefix."_".$family_name);
#     local $family_id = $family_object->get_attribute("id");
#     $current_analysis->push_attribute("clusters",$family_id);
#     $family_object->set_attribute("analysis", $run_prefix);
#     $family_object->set_attribute("name", $family_name);
#     $family_object->set_attribute("size", scalar(@{$family{$family_name}->{members}}));
#     $family_object->set_attribute("organism", $organism_name);
#     &RSAT::message::Warning ("", "SQL export for family", $family_name, $organism_name) if ($verbose >= 2);

#     #### family members
#     my @members = @{$family{$family_name}->{members}};
#     foreach my $m (@members) {
#       $family_object->push_attribute("genes", $m);
#     }

#     #### discovered oligos
#     my $oligo_file = $family{$family_name}->{oligo_file};
#     if (-e $oligo_file) {
#       my %export_columns = ();
#       ($oligo_handle, $dir) = &OpenInputFile($oligo_file);
#       while (<$oligo_handle>) {
# 	chomp;
# 	if (/^;/) {
# 	  if ((/;\s+(\d+)\t(\S+)/) && ($2 ne "id")) {
# 	    $export_columns{$2} = $1 - 1;
# 	  }
# 	  next;
# 	}
# 	next unless (/\S/);	## Skip empty lines
# 	next if (/^\#/);		## Skip header line
# 	my @fields = split;
# 	my $sequence = uc($fields[0]);
# 	my $oligo_object = $pattern_factory->new_object();
# 	&RSAT::message::Debug("Created object", $sequence, $oligo_object) if ($verbose >= 5);
# 	foreach $k (sort keys %export_columns) {
# 	  &RSAT::message::Debug ( "specifying attribute",
# 				  $k,
# 				  $export_columns{$k},
# 				  $fields[$export_columns{$k}],
# 				) if ($verbose >= 5);
# 	  $oligo_object->set_attribute($k, $fields[$export_columns{$k}]);
# 	}
# 	&RSAT::message::Debug ("specifying sequence", $sequence) if ($verbose >= 5);
# 	$oligo_object->force_attribute("sequence", $sequence);
# 	$oligo_object->force_attribute("type", "oligo");
# 	$oligo_object->set_attribute("family_id", $family_id);
# 	$oligo_object->set_attribute("family", $family_name);
# 	if ($strands eq "-2str") {
# 	  $oligo_object->force_attribute("rev_compl", &ReverseComplement($oligo_object->get_attribute("sequence")));
# 	}
#       }
#       close $oligo_handle;
#     } else {
#       my $pwd = `pwd`;
#       chomp $pwd;
#       &RSAT::message::Warning(join ("\t", "SQL export", $family_name, $pwd, "oligo file does not exist",  $oligo_file , "SKIPPED"));
#     }

#     #### discovered dyads
#     my $dyad_file = $family{$family_name}->{dyad_file};
#     if (-e $dyad_file) {
#       my %export_columns = ();
#       open DYADS, $dyad_file;
#       while (<DYADS>) {
# 	chomp;
# 	if (/^;/) {
# 	  ## Temporary patch for dyad obs_occ -> occ (to be compatile with oligo-analysis
# 	  s/obs_occ/occ/;

# 	  ## Output fields
# 	  if ((/;\s+(\d+)\t(\S+)/) && ($2 ne "id")) {
# 	    $export_columns{$2} = $1 - 1;
# 	  }
# 	  next;
# 	}
# 	next unless (/\S/);
# 	next if (/^\#/);		## Skip header line
# 	my @fields = split;
# 	my $sequence = uc($fields[0]);
# 	$sequence =~ s/N/n/g;
# 	my $dyad_object = $pattern_factory->new_object();
# 	&RSAT::message::Debug("Created object", $sequence, $dyad_object) if ($verbose >= 5);
# 	foreach $k (sort keys %export_columns) {
# 	  &RSAT::message::Debug("specifying attribute", $k, $export_columns{$k},$fields[$export_columns{$k}])
# 	    if ($verbose >= 5);
# 	  $dyad_object->set_attribute($k, $fields[$export_columns{$k}]);
# 	}
# 	&RSAT::message::Debug ("specifying sequence", $sequence) if ($verbose >= 5);
# 	$dyad_object->force_attribute("sequence", $sequence);
# 	$dyad_object->force_attribute("type", "dyad");
# 	$dyad_object->set_attribute("family_id", $family_id);
# 	$dyad_object->set_attribute("family", $family_name);
# 	if ($strands eq "-2str") {
# 	  my $rc_sequence = uc( &ReverseComplement($dyad_object->get_attribute("sequence")));
# 	  $rc_sequence =~ s/N/n/g;
# 	  $dyad_object->force_attribute("rev_compl",$rc_sequence);
# 	}
#       }
#       close DYADS;
#     } else {
#       &RSAT::message::Warning(join ("\t", "SQL export", $family_name, $pwd, "dyad file does not exist",  $dyad_file , "SKIPPED"));
#     }

#     #### Matrices
#     for my $program qw (consensus gibbs AlignACE infogibbs meme MotifSampler) {
#       my $matrix_file = $family{$family_name}->{$program."_file"};
#       if (-e $matrix_file) {
# 	&ReadMatrices($family_name, $matrix_file, program=>$program);
#       } else {
# 	&RSAT::message::Warning(join ("\t", "SQL export", $family_name, $pwd, "matrix file does not exist",  $matrix_file , "SKIPPED"));
#       }
#     }
#   }

#   ## Export directory
#   $dir{sql} = "sql_export";
#   chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir{sql});

#   ## Temporarily motify  output directory
#   ## for table dumping
#   $dir{output_bk} = $dir{output};
#   $dir{output} = $dir{sql};
#   my $pwd = `pwd`;
#   chomp $pwd;
#   &RSAT::message::Info ($pwd , $dir{output}, $dir{output_bk});
#   #### export the result tables
#   foreach my $factory ($analysis_factory, $family_factory, $pattern_factory, $matrix_factory) {
#     $factory->dump_tables();
#     $factory->generate_sql(schema=>$schema,
# 			   host=>$host,
# 			   password=>$password,
# 			   user=>$user);
#   }
#   &ExportMakefile("analysis", "family", "pattern");
#   $dir{output} = $dir{output_bk};
# }


# ## ##############################################################
# ## Read all sequences (option -all_seq)
# sub ReadAllSequences {
#   &RSAT::message::TimeWarn("Reading ALL sequences from file", $all_seq_file, "format", $all_seq_format);
#   chdir($dir{main});
#   our %all_sequences = ();
#   ($in, $input_dir) = &OpenInputFile($all_seq_file);
#   while ((($current_seq, $current_id, @comments) = &ReadNextSequence($in, $all_seq_format, $input_dir, "",$mask, %args)) &&
# 	 (($current_seq ne "") || ($current_id ne ""))) {
#     $seq_nb++;
#     &RSAT::message::TimeWarn("Read sequence", $seq_nb) if (($main::verbose >= 0) && ($seq_nb%1000==0));
#     #	&RSAT::message::Debug("", "read sequence", $seq_nb, $current_id) if ($main::verbose >= 10);
#     $all_sequences{lc($current_id)} = $current_seq;
#   }
#   close $in;
#   chdir($dir{output});
# }


# ## ##############################################################
# ## If the input is a list of sequence files instead of a family file,
# ## index the input sequence files
# sub ReadSequenceList {
#    ($sequence_dir, $short_sequence_file_list) = &SplitFileName($sequence_file_list);

#    ## Convert relative to absolute directory
#    if (($dir{main}) && ($sequence_dir !~ /^\//)) {
# 	$sequence_dir = $dir{main}."/".$sequence_dir;
#    }

#    &RSAT::message::Info("Sequence dir", $sequence_dir) if ($verbose >= 1);
#    &RSAT::message::Info("Reading sequence file list", $sequence_file_list) if ($verbose >= 1);
#    my ($files) = &OpenInputFile($sequence_file_list);
#    my $f = 0;
#    while (<$files>) {
# 	next if (/^;/);
# 	next unless (/\S/);
# 	chomp;
# 	$f++;
# 	my @fields = split /\s+/;
# 	my $sequence_file = shift @fields; ## the first word of each row is a file
# 	my $short_file = &ShortFileName($sequence_file);
# 	my $family_name = $short_file;
# 	$family_name =~ s|\.${seq_ext}$||;
#        my $comment = join "; ", @fields;

# #	push @families, $family_name;
# #	push @sequence_files, $sequence_file;

# 	#### create a new family if required
# 	unless ($family{$family_name}) {
# 	    $family{$family_name} = new RSAT::Family(name=>$family_name);
# 	}

#        #### Sequence file
# 	if ($sequence_dir) {
# 	    $family{$family_name}->{seq_file} = $sequence_dir."/".$sequence_file;
# 	} else {
# 	    $family{$family_name}->{seq_file} = $sequence_file;
# 	}
# 	$family{$family_name}->{seq_file} =~ s|/+|/|g;
# 	$family{$family_name}->{seq_file_purged} = $family{$family_name}->{seq_file};
# 	$family{$family_name}->{seq_file_purged} =~ s/\.${seq_ext}$//;
# 	$family{$family_name}->{seq_file_purged} .= "_purged.".${seq_ext};

#        #### Comment
#        $family{$family_name}->{comment} = $comment;

# 	&RSAT::message::Info (join ("\t", "File $f",
# 				    $family_name,
# 				    $family{$family_name}->{seq_file},
# 				    $family{$family_name}->{seq_file_purged},
# #				    $sequence_file,
# #				    $short_file,
# #				    $family_name,
# 				    $comment,
# #		   "\n", $family{$family_name}->{seq_file},
# #		   "\n", $family{$family_name}->{seq_file_purged}
# 				   )) if ($verbose >= 3);
# 	push @comments, $comment;
#    }
#    close $files;

#    @families = sort keys %family;

#    #### Family members are read from the sequence file (sequence IDs are used as member IDs)

#    &RSAT::message::Info("Reading family members from input sequence files") if ($verbose >= 1);
#    foreach my $family_name (@families) {
#        my ($in, $input_dir) =  &OpenInputFile($family{$family_name}->{seq_file});
#        while ((($current_seq, $current_id, @comments) = &ReadNextSequence($in, $seq_format, $input_dir, 'dna', $mask)) &&
# 	      (($current_seq) || ($current_id))) {
#            $family{$family_name}->new_member($current_id);
#            &RSAT::message::Info (join ("\t",
# 				       "", "File", $family{$family_name}->{seq_file},
# 				       "Family", $family_name,
# 				       "member", $current_id)) if ($verbose >= 4);
#        }
#        close $in;
#        my $gene_nb = scalar(@{$family{$family_name}->{members}});
#        &RSAT::message::Info (join ("\t",
# 				   $family_name,
# 				   $family{$family_name}->{seq_file},
# 				   $gene_nb." members")) if ($verbose >= 2);
# 			    }
# }

# ################################################################
# ## Validate the pattern discovery results by comparing discovered patterns to
# ## known sites
# sub Validate {
#    &RSAT::message::Info( ";\n; Validating discovered patterns") if ($verbose >= 1);
#    if ($task{validate_oligos}) {
#        $pattern_type = 'oligo';
#        $pattern_suffix = $oligo_suffix;
#    } elsif ($task{validate_dyads}) {
#        $pattern_type = 'dyad';
#        $pattern_suffix = $dyad_suffix;
#    } elsif ($task{validate_orm}) {
#        $pattern_type = 'orm';
#        $pattern_suffix = $orm_suffix;
#    }

#    if ($noorf eq "-noorf") {
#        $noorf_status = $noorf;
#    } else {
#        $noorf_status = "-orfov";
#    }

#    ## Validation per family file
#    $outfile{validation_per_family} = join ("", "validation_per_family", $pattern_suffix,$from,$noorf_status,"_",$feature_types,".tab");
#    $valid_fam_handle = &OpenOutputFile($outfile{validation_per_family});

#    ## Validation per pattern file
#    $outfile{validation_per_pattern} = join ("", "validation_per_pattern", $pattern_suffix,$from,$noorf_status,"_",$feature_types,".tab");
#    $valid_patt_handle = &OpenOutputFile($outfile{validation_per_pattern});

#    ## Print the column descriptions as comments
#    ## Validation per family
#    $valid_fam_keys{"Fam"} = "Family name";
#    $valid_fam_keys{"Members"} = "Number of sequences";
#    $valid_fam_keys{"Sites"} = "number of annotated sites";
#    $valid_fam_keys{"sig_max"} = "Maximal significance";
#    $valid_fam_keys{"sig_sum"} = "Sum of significances for the discovered patterns";
#    $valid_fam_keys{"sig_m"} = "Mean of significance for the discovered patterns";
#    $valid_fam_keys{"TPsites"} = "Know sites matched by at least one pattern";
#    $valid_fam_keys{"nb_pat"} = "Number of discovered patterns";
#    $valid_fam_keys{"TP_pat"} = "Number of discovered patterns matching at least one site";
#    $valid_fam_keys{"PPV"} = "Positive predictive value: PVV=TP_pat/nb_pat";
#    $valid_fam_keys{"Sn"} = "Sensitivity: Sn=TPsites/Sites";
#    $valid_fam_keys{"Acc.a"} = "Arithmetic accuracy: Acc.a = (Sn + PPV)/2";
#    $valid_fam_keys{"Acc.g"} = "Geometric accuracy: Acc.g = sqrt(Sn*PPV)";
#    $valid_fam_keys{"Acc.h"} = "Harmonic accuracy: Acc.h = 2*(Sn*PPV)/(Sn+PPV)";
#    print $valid_fam_handle "; Column contents\n";
#    foreach my $fam_key (keys(%valid_fam_keys)) {
#        print $valid_fam_handle ";\t", $fam_key, "\t", $valid_fam_keys{$fam_key}, "\n";
#    }

#    ## Print the column descriptions as comments
#    ## Validation per pattern
#    $valid_patt_keys{"Fam"} = "Family name";
#    $valid_patt_keys{"Members"} = "Number of sequences";
#    $valid_patt_keys{"Sites"} = "number of annotated sites";
#    $valid_patt_keys{"Pattern"} = "Pattern sequence";
#    $valid_patt_keys{"Sig"} = "Significance of discovered pattern";
#    $valid_patt_keys{"TPsites"} = "Know sites matched by discovered pattern";
#    $valid_patt_keys{"TP_pat"} = "Discovered pattern is matching at least one annotated site (0 = False, 1 = True)";
#    $valid_patt_keys{"PPV"} = "Positive predictive value: PVV=TP_pat";
#    $valid_patt_keys{"Sn"} = "Sensitivity: Sn=TPsites/Sites";
#    $valid_patt_keys{"Acc.a"} = "Arithmetic accuracy: Acc.a = (Sn + PPV)/2";
#    $valid_patt_keys{"Acc.g"} = "Geometric accuracy: Acc.g = sqrt(Sn*PPV)";
#    $valid_patt_keys{"Acc.h"} = "Harmonic accuracy: Acc.h = 2*(Sn*PPV)/(Sn+PPV)";
#    print $valid_patt_handle "; Column contents\n";
#    foreach my $patt_key (keys(%valid_patt_keys)) {
#        print $valid_patt_handle ";\t", $patt_key, "\t", $valid_patt_keys{$patt_key}, "\n";
#    }

#    ## Print header in validation files
#    print $valid_fam_handle join("\t",
# 			     "# Fam",
# 			     "members",
# 			     "sites",
# 			     "sig_max",
# 			     "sig_sum",
# 			     "sig_m",
# 			     "TPsites",
# 			     "nb_pat",
# 			     "TP_pat",
# 			     "PPV",
# 			     "Sn",
# 			     "Acc.a",
# 			     "Acc.g",
# 			     "Acc.h"), "\n";

#    print $valid_patt_handle join("\t",
# 				 "# Fam",
# 				 "Members",
# 				 "Sites",
# 				 "Pattern",
# 				 "Sig",
# 				 "TPsites",
# 				 "TP_pat",
# 				 "PPV",
# 				 "Sn",
# 				 "Acc.a",
# 				 "Acc.g",
# 				 "Acc.h"), "\n";

#    my $f = 0;
#    foreach my $family_name (@families) {
#        $f++;


#        my @members = @{$family{$family_name}->{members}};
#        $gene_nb = scalar(@members);

#        ## Check minimum number of genes
#        if ($gene_nb < $min_genes) {
# 	   &RSAT::message::Warning("Skipping family",
# 				   $f, $family_name,
# 				   $gene_nb." genes",
# 				   "< min = ".$min_genes), "\n" if ($verbose >= 2);
# 	   next;
#        }


#        ## Create a separate file for compare-patterns with the current family
#        my @sites = @{$known_site{$family_name}};
#        my @sources = @{$known_site_source{$family_name}};

#        &RSAT::message::Info("Validating family ".$f."/".scalar(@families),
# 			    $family_name,
# 			    "known sites",
# 			    $family{$family_name}->{known_sites},
# 			    "Number of sites",
# 			    scalar(@sites)) if ($verbose >= 2);

#        $known_handle = &OpenOutputFile($family{$family_name}->{known_sites});
#        foreach my $s (0..$#sites) {
# 	   my $source = $sources[$s];
# 	   unless ($source) {
# 	       $source = $family_name."known".$s;
# 	   }
# 	   print $known_handle join("\t", $sites[$s], $source), "\n";
#        }
#        close $known_handle;

#        &ValidateOneResult($family_name, scalar(@sites));
#    }
#    close $valid_fam_handle;
#    &RSAT::message::Info(join ("\t", "Validation per family done", $outfile{validation_per_family}));
#    close $valid_patt_handle;
#    &RSAT::message::Info(join ("\t", "Validation per pattern done", $outfile{validation_per_pattern}));
# }

# ################################################################
# ## Compare one result file with annotated motifs
# sub ValidateOneResult {
#     my ($family_name, $number_sites) = @_;

#     my $pattern_file = $family{$family_name}->{$pattern_type."_file"};

#     ## Compare discovered patterns with known sites
#     my $command = "compare-patterns";
#     $command .= " -v 1";
#     $command .= " ".$strands;
#     $command .= " -file1 ".$family{$family_name}->{known_sites};
#     $command .= " -file2 ".$pattern_file;
#     $command .= " -slide";
#     $command .= " -return match,weight,rel_w,id,strand,seq";
#     $command .= " -lth rel_w ".$rel_w;
#     $command .= " -o ".$family{$family_name}->{$pattern_type."_vs_known"};
#     &doit($command, $dry_run, $die_on_error, $verbose);

#     ## Compare discovered patterns with known sites (relw table output type)
#     my $command = "compare-patterns";
#     $command .= " -v 1";
#     $command .= " ".$strands;
#     $command .= " -file1 ".$family{$family_name}->{known_sites};
#     $command .= " -file2 ".$pattern_file;
#     $command .= " -slide";
#     $command .= " -table rel_w";
#     $command .= " -o ".$family{$family_name}->{$pattern_type."_vs_known_relw_table"};
#     &doit($command, $dry_run, $die_on_error, $verbose);

#     ## Compare discovered patterns with known sites (weight table output type)
#     my $command = "compare-patterns";
#     $command .= " -v 1";
#     $command .= " ".$strands;
#     $command .= " -file1 ".$family{$family_name}->{known_sites};
#     $command .= " -file2 ".$pattern_file;
#     $command .= " -slide";
#     $command .= " -table weight";
#     $command .= " -o ".$family{$family_name}->{$pattern_type."_vs_known_weight_table"};
#     &doit($command, $dry_run, $die_on_error, $verbose);

#     my $match_file = $family{$family_name}->{$pattern_type."_vs_known"};
#     my ($pattern_handle) = &OpenInputFile($pattern_file);

#     if ($pattern_type eq "oligo") {
# 	$score_col = 9;
#     } elsif ($pattern_type eq "dyad") {
# 	$score_col = 8;
#     } elsif ($pattern_type eq "orm") {
# 	$score_col = 9;
#     } else {
# 	&FatalError ("Pattern type $pattern_type is not supported yet");
#     }

#     ## Statistics per pattern
#     my $nb_patterns = 0;
#     my $max_sig = 0;
#     my $sum_sig = 0;

#     while (<$pattern_handle>) {
#         next if (/^;/); # Skip comment lines
#         next if (/^\#/); # Skip column header lines
#         next unless (/\S/); # Skip empty lines
#         chomp();
# 	$nb_patterns ++;
# 	    my @fields = split "\t", $_;
# 	    my $pattern = $fields[0];
# 	    my $score = $fields[$score_col -1];
# 	    if ($score > $max_sig) {
# 		$max_sig = $score;
# 	    }
# 	    $sum_sig = $sum_sig + $score;
# 	    if ($number_sites == 0) {
# 		$TPsites_patt = "NA";
# 		$TPpattern = "NA";
# 		$Sn_patt = "NA";
# 		$PPV_patt = "NA";
# 		$acc_a_patt = "NA";
# 		$acc_g_patt = "NA";
# 		$acc_h_patt = "NA";
# 	    } else {
# 		chomp($TPsites_patt = `more $match_file | grep '$pattern' | wc -l`);
# 		if ($TPsites_patt == 0) {
# 		    $TPpattern = 0;
# 		} elsif ($TPsites_patt >= 1) {
# 		    $TPpattern = 1;
# 		}
# 		$Sn_patt = $TPsites_patt/$number_sites;
# 		$Sn_patt = sprintf("%.4f", $Sn_patt);
# 		$PPV_patt = $TPpattern;
# 		$PPV_patt = sprintf("%.4f", $PPV_patt);
# 		$acc_a_patt = ($Sn_patt+$PPV_patt)/2;
# 		$acc_a_patt = sprintf("%.4f", $acc_a_patt);
# 		$acc_g_patt = sqrt($Sn_patt*$PPV_patt);
# 		$acc_g_patt = sprintf("%.4f", $acc_g_patt);
# 		if (($Sn_patt + $PPV_patt) == 0) {
# 		    $acc_h_patt = 0;
# 		} else {
# 		    $acc_h_patt = 2*($Sn_patt*$PPV_patt)/($Sn_patt+$PPV_patt);
# 		    $acc_h_patt = sprintf("%.4f", $acc_h_patt);
# 		}
# 	    }
# 	print $valid_patt_handle join("\t", $family_name,
# 				      scalar(@{$family{$family_name}->{members}}),
# 				      $number_sites,
# 				      $pattern,
# 				      $score,
# 				      $TPsites_patt,
# 				      $TPpattern,
# 				      $PPV_patt,
# 				      $Sn_patt,
# 				      $acc_a_patt,
# 				      $acc_g_patt,
# 				      $acc_h_patt), "\n";
#     }

#     ## Mean significance per family (only the patterns above the threshold of significance)
#     if ($nb_patterns > 0) {
# 	$mean_sig = sprintf("%.3f",  $sum_sig / $nb_patterns);
#     } else {
# 	$max_sig = "NA";
# 	$sum_sig = "NA";
# 	$mean_sig = "NA";
#     }

#     ## Count number of annotated sites matching at least one discovered pattern
#     $TPsites = `more $match_file | grep -v ';' | grep -v '#' | cut -f 6 | sort -u | wc -l`;
#     chomp($TPsites);

#     ## Count number of discovered patterns matching at least one annotated site
#     $TPpatterns = `more $match_file | grep -v ';' | grep -v '#' | cut -f 7 | sort -u | wc -l`;
#     chomp($TPpatterns);

#     ## Calculate sensitivity, positive predictive value and accuracy
#     if ($number_sites == 0) {
# 	$TPsites = "NA";
# 	$TPpatterns = "NA";
# 	$Sn = "NA";
# 	$PPV = "NA";
# 	$acc_a = "NA";
# 	$acc_g = "NA";
# 	$acc_h = "NA";
#     } else {
#         $Sn = $TPsites/$number_sites;
#         $Sn = sprintf("%.4f", $Sn);
# 	if ($nb_patterns == 0) {
# 	    $PPV = 0;
# 	    $acc_a = 0;
#             $acc_g = 0;
#             $acc_h = 0;
# 	} else {
# 	    $PPV = $TPpatterns/$nb_patterns;
# 	    $PPV = sprintf("%.4f", $PPV);
# 	    $acc_a = ($Sn+$PPV)/2;
# 	    $acc_a = sprintf("%.4f", $acc_a);
# 	    $acc_g = sqrt($Sn*$PPV);
# 	    $acc_g = sprintf("%.4f", $acc_g);
# 	    if ($Sn + $PPV == 0) {
# 		$acc_h = 0;
# 	    } else {
# 		$acc_h = 2*($Sn*$PPV)/($Sn+$PPV);
# 		$acc_h = sprintf("%.4f", $acc_h);
# 	    }
# 	}
#     }

#     ## Print results
#     print $valid_fam_handle join("\t", $family_name,
# 			     scalar(@{$family{$family_name}->{members}}),
# 			     $number_sites,
# 			     $max_sig,
# 			     $sum_sig,
# 			     $mean_sig,
# 			     $TPsites,
# 			     $nb_patterns,
# 			     $TPpatterns,
# 			     $PPV,
# 			     $Sn,
# 			     $acc_a,
# 			     $acc_g,
# 			     $acc_h), "\n";

# }

# ################################################################
# ## Database comparison
# sub DatabaseMatch {
#     &RSAT::message::Info( ";\n; Comparison of discovered patterns to database of known sites") if ($verbose >= 1);
#    if ($task{db_match_oligos}) {
#        $pattern_type = 'oligo';
#        $pattern_suffix = $oligo_suffix;
#    } elsif ($task{db_match_dyads}) {
#        $pattern_type = 'dyad';
#        $pattern_suffix = $dyad_suffix;
#    } elsif ($task{db_match_orm}) {
#        $pattern_type = 'orm';
#        $pattern_suffix = $orm_suffix;
#    }

#    if ($noorf eq "-noorf") {
#        $noorf_status = $noorf;
#    } else {
#        $noorf_status = "-orfov";
#    }

#    ## Database comparison synthetic file
# #   $outfile{db_match} = join ("", "database_match", $pattern_suffix,$from,$noorf_status,"_",$feature_types,".tab");
# #   $db_match_handle = &OpenOutputFile($outfile{db_match});

#    foreach my $family_name (@families) {
#        $f++;
#        &RSAT::message::Info("Matching patterns discovered for family ".$f."/".scalar(@families),
# 			    $family_name) if ($verbose >= 2);

#        my $pattern_file = $family{$family_name}->{$pattern_type."_file"};

#        ## Compare discovered patterns with database sites
#        my $command = "compare-patterns";
#        $command .= " -v 1";
#        $command .= " ".$strands;
#        $command .= " -file1 ".$db_site_file;
#        $command .= " -file2 ".$pattern_file;
#        $command .= " -slide";
#        $command .= " -return match,weight,rel_w,id,strand,seq";
#        $command .= " -lth rel_w ".$rel_w;
#        $command .= " -o ".$family{$family_name}->{$pattern_type."_vs_db"};
#        &doit($command, $dry_run, $die_on_error, $verbose);

#        ## Compare discovered patterns with database sites (relw table output type)
#        my $command = "compare-patterns";
#        $command .= " -v 1";
#        $command .= " ".$strands;
#        $command .= " -file1 ".$db_site_file;
#        $command .= " -file2 ".$pattern_file;
#        $command .= " -slide";
#        $command .= " -table rel_w";
#        $command .= " -o ".$family{$family_name}->{$pattern_type."_vs_db_relw_table"};
#        &doit($command, $dry_run, $die_on_error, $verbose);

#        ## Compare discovered patterns with database sites (weight table output type)
#        my $command = "compare-patterns";
#        $command .= " -v 1";
#        $command .= " ".$strands;
#        $command .= " -file1 ".$db_site_file;
#        $command .= " -file2 ".$pattern_file;
#        $command .= " -slide";
#        $command .= " -table weight";
#        $command .= " -o ".$family{$family_name}->{$pattern_type."_vs_db_weight_table"};
#        &doit($command, $dry_run, $die_on_error, $verbose);

# #       my $db_match_file = $family{$family_name}->{$pattern_type."_vs_db"};



#        ## Print results in synthetic file
# #       print $db_match_handle join("\t", $family_name,
# #				      scalar(@{$family{$family_name}->{members}}),
# #				      $number_sites,
# #				      $pattern,
# #				      $score,
# #				      $site,
# #				      $site_id), "\n";

#    }
# #   close $db_match_handle;
# #   &RSAT::message::Info(join ("\t", "Database match done", $outfile{db_match}));
# }

# ################################################################
# ## Distribution of significance
# sub SigDistrib {
#     &RSAT::message::Info( ";\n; Significance distribution of discovered patterns") if ($verbose >= 1);
#     if ($task{sig_distrib_oligos}) {
# 	$pattern_type = 'oligo';
# 	$pattern_suffix = $oligo_suffix;
#     } elsif ($task{sig_distrib_dyads}) {
# 	$pattern_type = 'dyad';
# 	$pattern_suffix = $dyad_suffix;
#     } elsif ($task{sig_distrib_orm}) {
# 	$pattern_type = 'orm';
# 	$pattern_suffix = $orm_suffix;
#     }

#     if ($noorf eq "-noorf") {
# 	$noorf_status = $noorf;
#     } else {
# 	$noorf_status = "-orfov";
#     }

#    ## Significance distribution file
#     $dir{distrib} = "sig_distrib";
#     chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir{distrib});
#     $outfile{sig_distrib_file} = join ("", $dir{distrib},"/","sig_distrib", $pattern_suffix,$from,$noorf_status,"_",$feature_types);
#     if ($min_genes > 1) {
# 	$outfile{sig_distrib_file} .= "_mingenes".$min_genes;
#     }
#     $sig_distrib_handle = &OpenOutputFile($outfile{sig_distrib_file}.".tab");

#     ## Getting and printing patterns and their significance
#     my %max_score = ();
#     my %pps = ();
#     my %fps = ();

#     $fc=0;
#     foreach my $family_name (@families) {
# 	$fc++;


# 	my @members = @{$family{$family_name}->{members}};
# 	$gene_nb = scalar(@members);

# 	## Check minimum number of genes
# 	if ($gene_nb < $min_genes) {
# 	    &RSAT::message::Warning("Skipping family",
# 				    $fc, $family_name,
# 				    $gene_nb." genes",
# 				    "< min = ".$min_genes), "\n" if ($verbose >= 2);
# 	    next;
# 	}


# 	my $pattern_file = $family{$family_name}->{$pattern_type."_file"};
# 	my ($pattern_handle) = &OpenInputFile($pattern_file);
# 	my $score_col;
# 	if ($pattern_type eq "oligo") {
# 	    $score_col = 9;
# 	} elsif ($pattern_type eq "dyad") {
# 	    $score_col = 8;
# 	} elsif ($pattern_type eq "orm") {
# 	    $score_col = 9;
# 	} else {
# 	    &FatalError ("Pattern type $pattern_type is not supported yet");
# 	}
# 	$max_score{$family_name} = "NA";
# 	while (<$pattern_handle>) {
# 	    next if (/^;/); # Skip comment lines
# 	    next if (/^\#/); # Skip column header lines
# 	    next unless (/\S/); # Skip empty lines
# 	    chomp();
# 	    my @fields = split "\t", $_;
# 	    my $pattern = $fields[0];
# 	    my $score = $fields[$score_col -1];
# 	    $pps{$score}++;
# 	    if (($max_score{$family_name} eq "NA") || ($score > $max_score{$family_name})) {
# 		$max_score{$family_name} = $score;
# 	    }
# #	    print $sig_distrib_handle join ("\t", $family_name, $pattern, $score), "\n";
# 	}
# 	$fps{$max_score{$family_name}}++;
# 	close $pattern_handle;
#     }

#     ## Output columns
#     @sig_distrib_columns = qw(Sig pps pps_icum ppf_icum fps_icum ffs_icum E_ppf E_ffs);

#     ## Print the column descriptions as comments
#     $sig_distrib_keys{"Sig"} = "Significance of discovered patterns (s)";
#     $sig_distrib_keys{"pps"} = "Patterns per score (number of patterns with score = s)";
#     $sig_distrib_keys{"pps_icum"} = "Patterns per score (number of patterns with score >= s)";
#     #    $sig_distrib_keys{"ppf"} = "Patterns per family (number of patterns with exactly this score per family)";
#     $sig_distrib_keys{"ppf_icum"} = "Patterns per family (number of patterns per family with score >= s)";
#     $sig_distrib_keys{"fps_icum"} = "Number of clusters with score >= s";
#     $sig_distrib_keys{"ffs_icum"} = "Fraction of clusters with score >= s";
#     $sig_distrib_keys{"E_ppf"} = "Expected number of patterns per family with score >= s";
#     $sig_distrib_keys{"E_ffs"} = "Expected fraction of clusters with score >= s";
#     print $sig_distrib_handle "; Column contents\n";
#     foreach my $k (0..$#sig_distrib_columns) {
# 	my $key = $sig_distrib_columns[$k];
# 	print $sig_distrib_handle join ("\t", ";", $k+1, $key,  $sig_distrib_keys{$key}), "\n";
#     }

#     ## Compute inverse cumulative distirbutions
#     my @sorted_scores_decr = sort {$b <=> $a} keys (%pps);
#     my %pps_icum = ();
#     my %fps_icum = ();
#     my %E_ppf = ();
#     my %E_ffs = ();
#     my $nb_families = scalar(@families);

#     my $pps_icum = 0;
#     my $fps_icum = 0;
#     foreach my $score (@sorted_scores_decr) {
# 	$pps_icum += $pps{$score}; $pps_icum{$score} =$pps_icum;
# 	$fps_icum += $fps{$score}; $fps_icum{$score} = $fps_icum;
# 	$E_ppf{$score} = 10**(-$score);

# 	&RSAT::message::Debug("distrib", $score, $pps{$score}, $pps_icum, $fps{$score}, $fps_icum, $E_ppf{$score})
# 	  if ($main::verbose >= 5);
# 	$E_ffs{$score} = 1- &poisson(0, $E_ppf{$score});
#       }

#    ## Print header row for the distribution table
#     print $sig_distrib_handle "#", join("\t", @sig_distrib_columns), "\n";

#     ## Print distribution table
#     my @sorted_scores = sort {$a <=> $b} keys (%pps);
#     foreach my $score (@sorted_scores) {
# 	print $sig_distrib_handle join("\t",
# 				       $score,
# 				       $pps{$score},
# 				       $pps_icum{$score},
# 				       sprintf("%.3f",$pps_icum{$score}/$nb_families),
# 				       $fps_icum{$score},
# 				       sprintf("%.3f",$fps_icum{$score}/$nb_families), ## ffs
# 				       sprintf("%7.2g",$E_ppf{$score}),
# 				       sprintf("%7.2g",$E_ffs{$score}),
# 				       ), "\n";
#     }

#     close $sig_distrib_handle;
#     &RSAT::message::Info(join ("\t", "Significance distribution done", $outfile{sig_distrib_file}.".tab"));

#     foreach my $log ("", "-ylog") {
# 	if ($log) {
# 	    $log_option = $log." 10";
# 	}
# 	my $sig_distrib_graph = $outfile{sig_distrib_file}.$log;

# 	## Draw a graph with the number of patterns per family, as a function of the sig score
# 	my $command = "XYgraph ".$log_option;
# 	$command .= " -title1 '".$sig_distrib_graph."'";
# 	$command .= " -title2 $organism_name.' (".$nb_families." families; min ".$min_genes." genes)'";
# 	$command .= " -ymin 0 -ysize 400";
# 	$command .= " -xgstep1 1";
# 	$command .= " -i ".$outfile{sig_distrib_file}.".tab";
# 	$command .= " -o ".$sig_distrib_graph."_ppf.jpg";
# 	$command .= " -xcol 1 -ycol 4,7 -lines -header -legend -xsize 800 -xleg1 'score' -yleg1 'patterns per family'";
# 	&doit($command, $dry_run, $die_on_error, $verbose);
# 	&RSAT::message::Info(join ("\t", "Patterns per family curve", $sig_distrib_graph."_ppf.jpg"));

# 	## Draw a graph with the fraction of clusters per score, as a function of the sig score
# 	my $command = "XYgraph ".$log_option;
# 	$command .= " -title1 '".$sig_distrib_graph."'";
# 	$command .= " -title2 $organism_name.' (".$nb_families." families; min ".$min_genes." genes)'";
# 	$command .= " -i ".$outfile{sig_distrib_file}.".tab";
# 	$command .= " -o ".$sig_distrib_graph."_ffs.jpg";
# 	$command .= " -ymin 0 -ymax 1 -ygstep1 0.1 -ygstep2 0.02 -ysize 400";
# 	$command .= " -xgstep1 1";
# 	$command .= " -xcol 1 -ycol 6,8 -lines -header -legend -xsize 800 -xleg1 'score' -yleg1 'fraction of clusters per score'";
# 	&doit($command, $dry_run, $die_on_error, $verbose);
# 	&RSAT::message::Info(join ("\t", "Clusters per score curve", $sig_distrib_graph."_ffs.jpg"));
#     }
# }

# ################################################################
# ## INDEX one sequence length per family
# sub CalcCalibrationLengths {
#    my %calib_lengths = ();
#    &RSAT::message::TimeWarn("Calculating sequence lengths") if ($verbose >= 1);
#    foreach my $family_name (@families) {
# 	$fam_count++;

# #	&CheckSkip($fam_count,$family_name);
# #	&CheckLast($fam_count);

# 	## Calculate sequence lengths
# 	&RSAT::message::Info( join("\t", "", "Calculating sequence lengths for family",
# 				   $fam_count."/".scalar(@families),
# 				   $family_name,
# 				   $family{$family_name}->{seq_file},
# 				   $family{$family_name}->{seq_len_file})
# 			     ) if ($verbose >= 2);
# 	my $command = "sequence-lengths -i ".$family{$family_name}->{seq_file};
# 	$command .= " -o ".$family{$family_name}->{seq_len_file};
# 	&doit($command, $dry_run, $die_on_error, $verbose);

# 	## Index sequence lengths
# 	my ($in, $indir) = &OpenInputFile($family{$family_name}->{seq_len_file});
# 	while (<$in>) {
# 	    chomp;
# 	    @fields = split "\t";	
# 	    my $current_len = $fields[1];
# 	    $family{$family_name}->{total_length} += $current_len;
# 	    ## check that all sequences of one family have the same length

# 	    if ((defined($family{$family_name}->{calib_length}))
# 		&& ($task{calibrate})) {
# 		unless ($family{$family_name}->{calib_length} == $current_len) {
# 		    &RSAT::error::FatalError("Sequences of family $family_name have different lengths.\nThis is not compatible with the option calibrate");
# 		}
# 	    } else {
# 		if ($current_len > $family{$family_name}->{calib_length}) {
# 		    $family{$family_name}->{calib_length} = $current_len;
# 		}
# 	    }
# 	}
# 	close $in;
# 	$calib_lengths{$family{$family_name}->{calib_length}}++;
# 	&RSAT::message::TimeWarn("Calibration length for family ", $fam_count."/".scalar(@families), $family_name,
# 				 $family{$family_name}->{total_length}) if ($verbose >= 2);
#    }
#    my @calib_lengths = sort {$a <=> $b } keys %calib_lengths;
#    &RSAT::message::Info("Calibration lengths", join( ";", @calib_lengths)) if ($verbose >= 1);
#    return @calib_lengths;
# }


# ## ##############################################################
# ## Calculate the prefix of the calibration file
# sub CalibrationPrefix {
#    my ($calib_length, $oligo_len, $N) = @_;

#    my $calib_prefix = "";

#    if ($N) {
# 	## set-based calibration (simulations)

# 	## Directories containing the calibration files
# 	unless ($dir{calibN}) {
# #	    $dir{calibN} = $supported_organism{$organism_name}->{'data'}."/rand_gene_selections";
# #	    ## Temporary
# #	    $dir{calibN} = "~/motif_discovery_competition_2003/results/".$organism_name."/rand_gene_selections/";
# 	    ## Temporary
# #	    $dir{calibN} = $dir{output}."/rand_gene_selections/";
# 	    $dir{calibN} = "calibrations";
# 	}

# 	## directory for the current calibration
# #	$calib_prefix = $dir{calibN}."/";
# 	$calib_prefix = $dir{main}."/".$dir{calibN};
# #	$calib_prefix .= $oligo_len."nt";
# #	$calib_prefix .= $strands;
# #	$calib_prefix .= $noov;
# #	$calib_prefix .= "_N".$N;
# #	$calib_prefix .= "_L".$calib_length;
# #	$calib_prefix .= "_R".$calibN_repet;

# 	## file name
# 	$calib_prefix .= "/";
# 	$calib_prefix .= $organism_name."_";
# 	$calib_prefix .= $oligo_len."nt_";
# 	$calib_prefix .= $strands;
# 	$calib_prefix .= $noov;
# 	$calib_prefix .= "_n".$N;
# 	$calib_prefix .= "_l".$calib_length;
# 	$calib_prefix .= "_r".$calibN_repet;

#    } else {
# 	## single-gene-based calibration (all upstream)
# 	$calib_prefix = $dir{calib1}."/";
# 	$calib_prefix .= $oligo_len."nt";
# 	$calib_prefix .= "_upstream_L".$calib_length;
# 	$calib_prefix .= "_".$organism_name;
# 	$calib_prefix .= $noov;
# 	$calib_prefix .= $strands;
#    }

#    return $calib_prefix;
# }

# ################################################################
# ## Generate report fiels for the motif disovery competition 2004
# sub MDCreport {
#    chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir{mdc_report});
#    &RSAT::message::TimeWarn("Generating MCD report\t".$outfile{results}) if ($verbose >= 1);

#    ################################################################
#    ## Open a file for the report of results
#    ($results) = &OpenOutputFile($outfile{results});
# #      print $results join ("\n",
# #  			 ">name of contact",
# #  			 "Jacques van Helden",
# #  			 ">email",
# #  			 "jvanheld\@bigre.ulb.ac.be",
# #  			 ">program name",
# #  			 "multiple-family-analyis"), "\n";
#    close $results;

#    ################################################################
#    ## Open a file for the report of parameters
#    ($parameters) = &OpenOutputFile($outfile{parameters});
# #      print $parameters join ("\n",
# #  			    ">name of contact",
# #  			    "Jacques van Helden",
# #  			    ">email",
# #  			    "jvanheld\@bigre.ulb.ac.be",
# #  			    ">program name",
# #  			    "multiple-family-analyis"), "\n";

#    ################################################################
#    ## Generate the report for each family
#    my $command = "";
#    foreach $family_name (@families) {
# 	$fam_count++;
# #	if ($fam_count <= $skip) {
# #	    warn "; Skipping family\t$fam_count\t$family_name\n" if ($verbose >= 3);
# #	    next;
# #	}

# 	my $selection_file = $family{$family_name}->{oligo_selection};
# 	unless (-e $selection_file) {
# #	    $selection_file = $family{$family_name}->{oligo_file};
# 	    &RSAT::message::Warning("No manual selection: file does not exist $selection_file") if ($verbose >= 0);
# 	    next;
# 	}
# 	&RSAT::message::Info("Pattern file for the report",  $selection_file) if ($verbose >= 2);

# 	my $selection_ft_prefix =  $family{$family_name}->{oligo_selection};
# 	my $feature_file = $selection_ft_prefix.".ft";
#  	my $matching_options = " -N 0"; ## Make sure flanking sequences are not considered
# 	$matching_options .= " -merge"; ## merge overlapping matches

# #  	## Threshold for reporting a match
# #  	my $threshold = 0;

# #  	### Sliding window options
# #  	if ($sliding_window_size >= 1) {
# #  	    my @patterns = &ReadPatterns(pattern_file=>$selection_file);
# #  	    my @scores = ();
# #  	    foreach my $pattern (@patterns) {
# #  		push @scores, $pattern->get_attribute("score");
# #  	    }
# #  	    if (scalar(@patterns) >= 1) {
# #  		$threshold = &max(@scores) + 0.01;
# #  	    } elsif (scalar(@patterns) == 1) {
# #  		$threshold = &max(@scores);
# #  	    }
# #  	    $matching_options .= " -window ".$sliding_window_size;
# #  	}
# #  	$matching_options .= " -th ".$threshold;



# 	################################################################
# 	## initialize postprocessing and parameter description
# 	my $postprocessing_text = ">postprocessing\n";
# 	my $parameters_text = ">parameters\n";
# 	my $multi_params = join " ", @ARGV;
# 	$multi_params =~ s/\ -/\n-/g;
# 	$parameters_text .= $multi_params."\n";

# 	### Read the pattern file in order to select
# 	### manually-specified parameters threshold
# 	($in) = &OpenInputFile($selection_file);
# 	my $parameters_started = 0;
# 	my $postprocessing_started = 0;
# 	while ($line = <$in>) {
# 	    if ($line =~ />parameters/) {
# 		$parameters_started = 1;
# 		next;
# 	    }
# 	    next unless ($parameters_started);
# 	    if ($line =~ />postprocessing/) {
# 		$postprocessing_started = 1;
# 		next;
# 	    }
# 	    next unless ($parameters_started);
# 	    $line =~ s/^;\s*//;
# 	    if ($postprocessing_started) {
# 		$postprocessing_text .= $line;
# 	    } elsif ($parameters_started) {
# 		$parameters_text .= $line;
# 		$matching_options .= " ".$line;
# 		&RSAT::message::Info("manually specified parameter $line") if ($verbose >= 3);
# 	    }
# 	}
# 	close($in);
# 	$matching_options =~ s/\n/ /g;
# 	$matching_options =~ s/\r/ /g;

# 	&RSAT::message::Info("Family $family_name\tParameters\t", $parameters_text) if ($verbose >= 2);
# 	&RSAT::message::Info("Family $family_name\tPostprocessing\t", $postprocessing_text) if ($verbose >= 2);
# 	&RSAT::message::Info("Matching options\t", $matching_options) if ($verbose >= 2);

# 	## Make sure there is a carriage return after postprocessing and parameters
# 	chomp($parameter_text);
# 	$parameter_text .= "\n";
# 	chomp($postprocessing_text);
# 	$postprocessing_text .= "\n";

# 	## Write parameters and postprocessing
# 	print $parameters ">data set\n";
# 	print $parameters $family_name, "\n";
# 	print $parameters $parameters_text;
# 	print $parameters $postprocessing_text;

# 	## Draw the feature map of the selected oligos
# 	&MatchPatterns($selection_file, $selection_ft_prefix,  $matching_options);
# 	&DrawFeatureMap($selection_ft_prefix, $selection_ft_prefix, " -minfthick 5");

# 	## Convert the features in MDC format
# 	&RSAT::message::Info("\tReporting\t",$feature_file) if ($verbose >=2);
# 	$command = "MDCreport-from-dnapat -d ".$family_name;
# 	$command .= " -i ".$feature_file;
# 	$command .= ">> ".$outfile{results} if ($outfile{results});
# 	&doit($command, $dry_run, $die_on_error, $verbose);
#    }

#    close $parameters;

#    &RSAT::message::TimeWarn("Generated report\t", $outfile{results}) if ($verbose >= 1);
# }


# ## ##############################################################
# ## Extract contigs and isolated patterns from an assembly file for the
# ## synthetic table
# sub ReadAssemblyFile {
#   my ($family_name, $assembly_file, $pattern_type) = @_;
#   my ($pattern_handle, $input_dir) = &OpenInputFile($assembly_file);
#   my @assembled_patterns = ();
#   my $error = "";
#   my %score = ();

#   while (<$pattern_handle>) {
#     chomp;
#     if (/Too many patterns to assemble/i) {
#       $error = $_;
#       while (<$pattern_handle>) {
# 	$error .= "\n", $_;
#       }
#       return($error);
#     }
#     next if (/^;/);
#     next if (/^\#/);
#     next unless (/\S/);

#     if ((/contig/) || (/consensus/) || (/isol/)) {
#       @fields = split "\t";
#       my $pattern_seq = &RSAT::util::trim(shift (@fields));
#       $pattern_seq =~ s/^\.+//g;
#       $pattern_seq =~ s/\.+$//g;
#       $pattern = new RSAT::pattern();
#       $pattern->set_attribute("sequence", $pattern_seq);
#       push @assembled_patterns, $pattern;
#       if ($strands eq "-2str") {
# 	my $rc = &RSAT::util::trim(shift @fields);
# 	$pattern->set_attribute("rc", $rc);
#       }
#       $score{$pattern} = shift @fields;
#       $pattern->set_attribute("score", $score{$pattern});

#       my $type = shift @fields;
#       $pattern->force_attribute("type", $type);
#       $max_score{$family_name} = &max($max_score{$family_name}, $score{$pattern});
#       $max_score{$pattern_type}{$family_name} = &max($max_score{$pattern_type}{$family_name}, $score{$pattern});
#     }
#   }
#   close $pattern_handle;
#   return ($error, @assembled_patterns);
# }



# ################################################################
# ## Calibrate oligonucleotide occurrences (mean and variance) for each
# ## sequence length
# sub CalibrateOligos {
#    my @calib_lengths = @_;
#    &RSAT::message::TimeWarn("Calibrating oligonucleotide occurrences") if ($verbose >= 1);

#    ### Analyse upstream occurrence distributions for each sequence length
#    chdir($dir{main}); chdir($dir{output}); &RSAT::util::CheckOutDir($dir{calib1});
#    &RSAT::message::TimeWarn("Calculating oligonucleotide distributions in all upstream sequences")
#      if ($verbose >= 1);
#    foreach my $calib_length (sort {$a <=> $b} @calib_lengths) {

# 	## retrieve all upstream sequence if required
# 	my $allup_file = $dir{calib1}."/tmp_all_up_".$calib_length.".fasta";
# 	if (-e $allup_file) {
# 	    &RSAT::message::Info("All upstream sequence file already exists. Skipping retrieval", $allup_file) if ($verbose >= 2);
# 	} else {
# 	    $command = "retrieve-seq -imp_pos -org $organism_name -all -from -$calib_length -to -1 -o $allup_file";
# 	    &RSAT::message::TimeWarn("Retrieving all upstream sequences", $allup_file) if ($verbose >= 2);
# ### This should not be done in batch, since the calibrate-oligo commands will be distributed over the nodes of the cluster
# #	    if ($batch) {
# #		push @main::batch_commands, $command;
# #	    } else {
# 		&doit($command, $dry_run, $die_on_error, $verbose);
# #	    }
# 	}

# 	## Calibrate oligonucleotides
# 	for my $oligo_len ($min_oligo_len..$max_oligo_len) {

# 	    ## Names of the calibration files
# 	    my $calib_prefix = &CalibrationPrefix($calib_length, $oligo_len);
# 	    my $distrib_file = $calib_prefix."_distrib.tab";
# 	    my $fitting_file = $calib_prefix."_negbin.tab";

# 	    ## Skip the calibration if the file already exists
# 	    if ((-e $fitting_file.".gz") && !($force_calib)) {
# 		&RSAT::message::Info("Calibration file already exists. Skipping calibration.\t${fitting_file}.gz");
# 		next;
# 	    } else {
# 		&RSAT::message::TimeWarn("Calculating oligo calibration\t${fitting_file}.gz");
# 	    }

# 	    ## Calculate occurrence distributions
# 	    $command = "oligo-analysis -v 3 -l $oligo_len $noov $strands  -i $allup_file -return occ -distrib -o $distrib_file";
# 	    &RSAT::message::TimeWarn( "Analysing ",$oligo_len."nt distributions",$distrib_file) if ($verbose >= 2);
# 	    if ($batch) {
# 		push @main::batch_commands, $command;
# 	    } else {
# 		&doit($command, $dry_run, 0, $verbose);
# 	    }

# 	    ## Calculate stats and fit negbin on the distributions
# 	    $command = "fit-distribution -v 1 -i $distrib_file -distrib negbin -o $fitting_file";
# 	    if ($batch) {
# 		push @main::batch_commands, $command;
# 	    } else {
# 		&doit($command, $dry_run, $die_on_error, $verbose);
# 	    }

# 	    ## Compress distrib and fitting file
# 	    $command = "gzip -f $distrib_file $fitting_file";
# 	    if ($batch) {
# 		push @main::batch_commands, $command;
# 	    } else {
#   	        &doit($command, $dry_run, $die_on_error, $verbose);
#            }
# 	}

# 	## Delete the file with all upstream sequences
# 	$command = "rm -f $allup_file";
# 	&RSAT::message::Info ("Cleaning all upstream sequences", $allup_file) if ($verbose >= 1);
# 	if ($batch) {
# 	    push @main::batch_commands, $command;
# 	} else {
# 	    &doit($command, $dry_run, 0, $verbose);
# 	}

# 	## Send the batch script to the queue
# 	if ($batch) {
# 	    my $batch_script = join "\n\n", @main::batch_commands;
# 	    &doit($batch_script, $dry_run, $die_on_error, $verbose, $batch);
# 	    @main::batch_commands = ();
# 	}
#    }

# }



# ################################################################
# ## Calibrate oligo frequencies by selecting R random sets of N genes,
# ## counting the occurrences of each word in each set, and estimating
# ## the average and standard deviation of the occurrence number.
# #sub CalibrateOligosN {
# sub CalibrateOligosN {
#   &RSAT::message::Info("Calibrating oligo occurrences with random gene selections") if ($verbose >= 1);

#   chdir($dir{main}); ## Calibration directory is specified relative to the main directory
#   &RSAT::message::Info("Chdir to main directory\t".$dir{main});
#   for my $oligo_len ($min_oligo_len..$max_oligo_len) {
#     foreach my $family_name (@families) {
#       my $gene_nb = scalar(@{$family{$family_name}->{members}});
#       my $seq_len = $family{$family_name}->{calib_length};

#       if ($calib_done{$gene_nb}{$seq_len} > 0) {
# 	&RSAT::message::Info($family_name, "Already done a calibration of oligo occurrences with ",$gene_nb,
# 			     "random gene selections","upstream length", $seq_len);
#       } else {

# 	&RSAT::message::Info($family_name, "Calibrating oligo occurrences with ",$gene_nb,
# 			     "random gene selections","upstream length", $seq_len)
# 	  if ($verbose >= 2);
# 	my $command = "calibrate-oligos -v 1";
# 	$command  .= " -r ".$calibN_repet." -sn ".$gene_nb." -ol ".$oligo_len." -sl ".$seq_len;
# 	my $calibN_task = "all,clean_oligos";
# 	$command .= " -task ".$calibN_task;
# 	#		$command .= "-start ${START}";
# 	#		$command .= "${END}";
# 	$command .= " ".$strands;
# 	$command .= " ".$noov;
# 	$command .= " -outdir ".$dir{calibN};
# 	$command .= " -org ".$organism_name;
# 	$calib_done{$gene_nb}{$seq_len}++;
# 	&doit($command, $dry_run, $die_on_error, $verbose, $batch, "multi_calibN");
#       }
#     }
#   }
#   chdir($dir{output});
# }


# ################################################################
# #### display short help message
# sub PrintOptions {
#  open HELP, "| more";
#  print HELP <<End_short_help;
# multiple-family-analysis options
# --------------------------------
# ## General options
# -h		(must be first argument) display full help message
# -help		(must be first argument) display options
# -v		verbose
# -i		family file (incompatible with -seq)
# -mask upper|lower	mask upper- or lowercases, respectively
# -seq_list	sequence file list (incompatible with -i)
# -all_seq	single file containing all the sequences
# -all_seq_format	input sequence format.
# -bg_seq		file containing sequences used to estimate background models
# -maindir	main directory (by default, the working directory is used
# -outdir		output directory
# -skip #		skip the # first data sets
# -last #		stop after the # first data sets
# -select		fam1[,fam2,fam3,...]. Perform the analysis of selected clusters only.
# -task		selected task (supported: $supported_tasks)
# -htmaps		draw html maps (dynamic map with pointers to the features)
# -n      	dry run: print commands without executing them
# -batch	send time-consuming jobs to a batch queue for a PC cluster
# -nodie	        continue the analysis even if errors are raised by sub-processes

# ## Sequence options
# -org		organism
# -size_names     use the old naming system (obsolete, only for backward compatibility)
# -org_fam	each family corresponds to one organism
# -taxon		taxon (collect upstream sequences of orthologous genes)
# -purge		use purged sequences for pattern discovery (default)
# -nopurge	use non-purged sequences for pattern discovery
# -purge_ml	min matching length for purge-sequence
# -purge_mis	max number of mismatches for purge-sequence
# -mingenes	minimal number of genes per family
# -maxgenes	maximal number of genes per family
# -from		upstream region left limit
# -to		upstream region right limit
# -noorf		exclude upstream ORFs from upstream sequences
# -rm		use repeat masked version of the genome
# -orfov		do not exclude upstream ORFs from upstream sequences
# -feattype	feature type for sequence retrieval (e.g. CDS, mRNA)
# -seq_type       sequence type (upstream, downstream, ORF)

# ## Parameters for oligo-analysis and dyad-analysis
# -1str   	strand-sensitive analysis
# -2str   	strand-insensitive analysis
# -two_tails	perform a two-tails test (detect under- and over-represented motifs)
# -noov		prevent overlapping matches for self-overlapping patterms
# -ovlp		allow overlapping matches for self-overlapping patterms
# -pseudo		pseudo-weight (see oligo-analysis manual)
# -nomap		do not draw feature maps (for saving time and hard disk space)
# -lth param \#	lower threshold on parameter. Supported: $supported_thresholds
# -uth param \#	upwer threshold on parameter. Supported: $supported_thresholds
# -thosig		threshold on occurrence significance (obsolete)
# -toppat # 	Maximal number of patterns to assemble (default $toppat).

# ## oligo-analysis specific parameters
# -maxol		maximum oligonucleotide length
# -minol		minimum oligonucleotide length
# -exp		expected frequency file (obsolete)
# -oligo_exp_freq	expected frequencies for oligo-analysis
# -bg		background frequency model
# -calib_dir	calibration directory
# -calibN_repet #	Number of repetitions for the calibration.
# -markov #	exp. freq. calcualted with Markov chain model of order #
# -thmsf		threshold on frequency of matching sequences (obsolete)
# -thmssig	threshold on the significance of matching sequences (obsolete)

# ## dyad-analysis specific parameters
# -monad		monad length for dyad-analysis
# -minsp		maximum spacing for dyad-analysis
# -maxsp		maximum spacing for dyad-analysis
# -transp		transpose synthetic result table (columns become rows)
# -sort		sort key (supported: $supported_sort_keys)
# -dyad_exp_freq	expected frequencies for dyad-analysis

# ## feature-map options
# -origin #       origin for dna-pattern, matrix-scan and feature-maps
# -scalestep #	scale steps for the feature maps.

# ## General options for matrix-based pattern discovery
# -width		matrix width (default $matrix_width)
# -sps		expected number of sites per sequence (default $expected_sites_per_seq)
# -nmotifs	number of motifs to discover per sequence set (family)

# ## Gibbs sampler (Neuwald, 1995)
# -seed		seed number for the random generator

# ## AlignACE (Roth, 1998)
# -seed		seed number for the random generator

# ## MotifSampler (Thijs, 2001)
# -MS_b		backgound file
# -MS_p		prior probability of 1 motif copy (default $MS_p)
# -MS_M   	Maximal number of motif instances per sequence. (default $MS_M; unset=0)
# -MS_n		number of different motifs to search for (default $MS_n).
# -MS_x		allowed overlap between different motifs. (default $MS_x)
# -MS_r		number of times the MotifSampler should be repeated (default = $MS_r).

# ## MEME options (Bailey, 1994)
# -MEME_minw	minimum motif width (default $MEME_minw)
# -MEME_maxw	maximum motif width (default $MEME_maxw)
# -MEME_bfile     name of background Markov model file
# -MEME_XXX	any other MEME option (XXX) can be passed by appending it to the prefix -MEME_

# ## infogibbs options (Defrance, 2008)
# -infogibbs_XXX	any other infogibbs option (XXX) can be passed by appending it to the prefix -infogibbs_

# ## Validation
# -known		file containing a list of known sites/motifs
# -known_max_len 	maximal length for using known sites (default: $known_site_max_len)

# ## Database comparison
# -db             file containing a list of known sites (from a database)

# ## Synthetic table
# -fam_link_pref  prefix to build a HTML link for the family names
# -fam_link_suff  suffix to build a HTML link for the family names
# -gene_link_pref  prefix to build a HTML link for the gene names
# -gene_link_suff  suffix to build a HTML link for the gene names

# ## Database
# -schema		database schema (default: $schema)
# -host		database host (default: $host)
# -user		database user (default: $user)
# -password	database password (default: $password)

# End_short_help

#  close HELP;
#  exit;
# }
