#!/usr/bin/perl -w

## use strict;

=pod

=head1 NAME

position-scan

=head1 VERSION

1

=head1 DESCRIPTION

This program computes the positional binding profile of a set of PSSMs in a set of sequences of the same length.


=head1 AUTHORS

castro@tagc.univ-mrs.fr

=head1 CATEGORY

=over

=item util

=back

=head1 USAGE

position-scan [-i inputfile] [-o outputfile] [-v #] [...]

=head1 INPUT FORMAT

=head1 OUTPUT FORMAT

=head1 SEE ALSO

=head1 WISH LIST

=over

=item B<wish 1>

=item B<wish 2>

=back

=cut


BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}
require "RSA.lib";
require "RSA2.cgi.lib";
use RSAT::util;
use RSAT::matrix;
use RSAT::MatrixReader;
use RSAT::SeqUtil;
use List::MoreUtils qw(uniq);
use Data::Dumper;
use File::Basename;
use File::Path;


################################################################
## Main package
package main;
{

  ################################################################
  ## Initialise parameters
  our $start_time = &RSAT::util::StartScript();
  our $program_version = do { my @r = (q$Revision: 1.48 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  #    $program_version = "0.00";

  our %infile = ();
  our %outfile = ();

  our $verbose = 0;
  our $in = STDIN;
  our $out = STDOUT;

  ## Default parameters

  local $seq_length = 100;
  local $bin_size = 50;
  local $p_value = 1e-3;
  local $title = "position-scan";
  local $bg_format = "oligos";
  local $seq_format = "fasta";
  local $matrix_format = "tf";
  local $individual_plots = 1;

  ## Optional parameters
  ## matrix-scan top/last matrices
  local $top_matrix_flag = 0;
  local $skip_matrix_flag = 0;

  ## convert-seq top/last matrices
  local $top_seq_flag = 0;
  local $skip_seq_flag = 0;

  ## BG model input
  local $bg_file_flag = 0;
  local $bg_input_flag = 0;
  local $markov = 1;

  $SCRIPTS = $ENV{RSAT}."/perl-scripts/";

  ## List of files to delete
  %to_delete = ();
  %to_delete_folder = ();



  ## Input formats: only accept formats supporting multiple matrices
  local @supported_matrix_formats = qw(transfac tf tab clusterbuster cb infogibbs meme stamp uniprobe);
  local %supported_matrix_format = ();
  foreach my $format (@supported_matrix_formats) {
    $supported_matrix_format{$format} = 1;
  }
  local $supported_matrix_formats = join ",", @supported_matrix_formats;

  ################################################################
  ## Read argument values
  &ReadArguments();

  ################################################################
  ## Check argument values

  &RSAT::message::TimeWarn("Checking parameter values") if ($main::verbose >= 2);

  ################################################################
  ## Input motif file is mandatory
  unless ($main::infile{motifs}) {
      &RSAT::error::FatalError("The input motif file is mandatory. (-matrix)");
  }

  ################################################################
  ## Input sequences file is mandatory
  unless ($main::infile{sequences}) {
      &RSAT::error::FatalError("The input sequences file is mandatory. (-seq)");
  }

  ################################################################
  ## Input BG file is mandatory
  unless ($main::infile{bg_file} || $bg_input_flag) {
      &RSAT::error::FatalError("The input Background Model file is mandatory. (-bgfile | -bginput)");
  }

  ################################################################
  ## Check that the output prefix has been specified
  unless ($main::outfile{prefix}) {
    &RSAT::error::FatalError("You must define the output prefix (option -o).");
  }

  ################################################################
  ## Non-recognized parameters are ignored
  if (scalar(@args_to_pass)) {
    $args_to_pass = join (" ", @args_to_pass);
    &RSAT::message::Info("Unrecognized arguments are ignored", $args_to_pass) if ($main::verbose >= 2);
  }

  local $basename;
  ($dir{output}, $basename) = &RSAT::util::SplitFileName($main::outfile{prefix});
  $dir{output} = "." if ($dir{output} eq "");
  &RSAT::util::CheckOutDir($dir{output});

  ## Create a BG model folder and assign a file name
  ## when the '-bginput' option is specified
  if($bg_input_flag == 1){

    local $bg_dir = $main::outfile{prefix}."_bg_model";
    &RSAT::util::CheckOutDir($bg_dir);

    $main::infile{bg_file} = $bg_dir."/mkv_" . $markov . "_2str.oligos"; 
  }

  ## Output file names
  $main::outfile{filtered_sequences} = $main::outfile{prefix}."_filtered_sequences.fasta";
  $main::outfile{motifs_transfac} = $main::outfile{prefix}."_input_matrix_transfac.tf";
  $main::outfile{matches_tab} = $main::outfile{prefix}."_matrix_scan_results.tab";
  $main::outfile{matches_tab_PARSED} = $main::outfile{prefix}."_matrix_scan_results_PARSED.tab";
  $main::outfile{motif_ID_to_name} = $main::outfile{prefix}."_TF_ID_name_correspondence.tab";

  $main::outfile{all_profiles} = $main::outfile{prefix}."_positional_profiles.pdf";
  $main::outfile{profiles_tab} = $main::outfile{prefix}."_profiles.tab";
  $main::outfile{profiles_counts} = $main::outfile{prefix}."_counts_per_bin_profiles.tab";
  $main::outfile{profiles_heatmap} = $main::outfile{prefix}."_profiles_heatmap.pdf";
  $main::outfile{report} = $main::outfile{prefix}."_scan_profile_report.html";
  $main::outfile{report_template} = $ENV{RSAT}."/public_html/templates_html/dynamic_positional_profiles.html";

  $main::outfile{log} = $main::outfile{prefix}."_log.txt"; push @outfiles, "log";
  $main::out = &OpenOutputFile($main::outfile{log});

  ## Error file should contain only errors
  $main::outfile{err_log} = $main::outfile{prefix}."_errors.txt"; push @outfiles, "err_log";
  $main::err = &OpenOutputFile($main::outfile{err_log});

  ## Specific log file for R commands
  $main::outfile{Rlog} = $main::outfile{prefix}."_Rlog.txt"; push @outfiles, "Rlog";

  ################################################################
  ## Ensure the access to the java libraries, which are required to
  ## display the results (logo trees, dynamic tables).
  ##
  ## If $include_js_lib is set to 1, the javascript libraries are
  ## included in the output directory, in order to avoid problems with
  ## the links. This solution enables to move the result
  ## (e.g. download a self-contained archive), but costs 500kb of disk
  ## space for each result, we should evaluate alternative solutions.
  ## 
  ## the alternative is to point to the libraries on the RSAT server,
  ## but we faced problems on some servers, due to the the path
  ## public_html/lib, which inludes a soft link (lib ->
  ## ../perl-scripts/lib).
  local $include_js_lib = 1; 

  ## Define the base directories for the javascripts, which are
  ## required to display logo trees, animations and dynamic tables
  if ($include_js_lib) {

      &RSAT::message::Debug("Copying javascript libraries to output directory") if ($main::verbose >= 3);
      my $js_sync_cmd = "rsync -ruptl";
      $js_sync_cmd .= " ".$ENV{RSAT}."/perl-scripts/lib/js";
      $js_sync_cmd .= " ".$dir{output};
      &doit($js_sync_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);
      $js_base = $dir{output}."/js";
      &RSAT::message::Info("Javascript libraries copied to output directory", $js_base) if ($main::verbose >= 3);

  }  elsif (defined($ENV{RSA_OUTPUT_CONTEXT}) && ($ENV{RSA_OUTPUT_CONTEXT} eq "cgi")) {
      $js_base = $ENV{rsat_www}."/lib/js";
  } else {

      ## NOTE from JvH to Jaime: This solution does not allow to see the
      ## result on another computer than the RSAT server on which
      ## matrix-clustering ran.
      $js_base = $ENV{RSAT}."/perl-scripts/lib/js";
  }
  $c3_base = &RSAT::util::RelativePath($main::outfile{report}, $js_base."/c3-0.4.10/c3.js");
  $c3_css_base = &RSAT::util::RelativePath($main::outfile{report}, $js_base."/c3-0.4.10/c3.css");
  $d3_base = &RSAT::util::RelativePath($main::outfile{report}, $js_base."/d3.v3.min.js");
  $jquery_base = &RSAT::util::RelativePath($main::outfile{report}, $js_base."/DataTables-1.10.4/media/js/jquery.js");
  $datatable_base = &RSAT::util::RelativePath($main::outfile{report}, $js_base."/DataTables-1.10.4/media/js/jquery.dataTables.min.js");
  $datatable_css_base = &RSAT::util::RelativePath($main::outfile{report}, $js_base."/DataTables-1.10.4/media/css/jquery.dataTables.min.css");

  ################################################################
  ## Calculate the sequence length
  ## For the moment the input are sequences of same length
  ## TO DO: It will take the first sequence as template and the other sequences will be adapted to
  ## that sequence length
  &ProcessSeq();      

  ################################################################
  ## Calculate background of the input sequences
  if ($bg_input_flag == 1) {
      &CalculateBGModel();
  }

  ################################################################
  ## Run matrix-scan quick
  &ScanSequences();

  ################################################################
  ## Parse the matrix-scan results table
  &ParseScanResults();

  ################################################################
  ## Produce the ID to Motif name reference table
  &ID_NameRefTable();

  ################################################################
  ## Produce the logos
  &GenerateLogos();

  ################################################################
  ## Compute statistics (X2 pval, eval, qval) 
  ## Draw the dynamic profiles
  ## These steps are executed in R
  &DynamicProfilesR();

  #######################################
  ## Delete temporal files and folders
  #&Delete_temporal_files();

  ################################################################
  ## Print verbose
  &Verbose() if ($main::verbose >= 1);

  ################################################################
  ## Report execution time and close output stream
  &close_and_quit();
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################


################################################################
## Close output file and quit
sub close_and_quit {

  ## Report execution time
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
  print $main::out $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified

  ## Close output file
  if ($outfile{output}) {
    close $main::out;
    &RSAT::message::TimeWarn("Output file", $outfile{output}) if ($main::verbose >= 2);
  }

  ## CLOSE OTHER FILES HERE IF REQUIRED

  exit(0);
}


################################################################
## Display full help message 
sub PrintHelp {
  system "pod2text -c $0";
  exit()
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}

################################################################
## Read arguments 
sub ReadArguments {
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);


=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
	$main::verbose = shift(@arguments);
      } else {
	$main::verbose = 1;
      }


=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();


=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();


=pod

=item	B<-title title>

Title displayed on top of the report page.

=cut
     } elsif ($arg eq "-title") {
      $main::title = shift(@arguments);
      $main::title =~ s/\s+/_/g;

=pod

=item B<-matrix input_motif_file>

The input file contains a set of position-specific scoring
matrices.

=cut
    } elsif ($arg eq "-matrix") {
      $main::infile{motifs} = shift(@arguments);


=pod

=item B<-matrix_format matrix_format>

Specify the input matrix format.


B<Supported matrix formats>

Since the program takes several matrices as input, it only accepts
matrices in formats supporting several matrices per file (transfac,
tf, tab, clusterbuster, cb, infogibbs, meme, stamp, uniprobe).

For a description of these formats, see the help of I<convert-matrix>.

=cut
    } elsif ($arg eq "-matrix_format") {
      $matrix_format = shift(@arguments);

      unless ($supported_matrix_format{$matrix_format}) {
	&RSAT::error::FatalError($matrix_format, "Invalid format for input matrices\tSupported: ".$main::supported_matrix_formats);
      }


=pod

=item B<-top_matrices X>

Only analyze the first X motifs of the input file. This options is
convenient for quick testing before starting the full analysis.

=cut
    } elsif ($arg eq "-top_matrices") {
      $top_matrices = shift(@arguments);
      $top_matrix_flag = 1;

      &RSAT::error::FatalError($top_matrices, "Invalid value for option -top_matrices: must be a natural number") 
         unless (&IsNatural($top_matrices));

=pod

=item B<-skip_matrices X>

Skip the first X motifs of the input file. This options is convenient
for testing the program on a subset of the motifs before starting the
full analysis.

=cut
    } elsif ($arg eq "-skip_matrices") {
      $skip_matrices = shift(@arguments);
      $skip_matrix_flag = 1;
      &RSAT::error::FatalError($skip_matrices, "Invalid value for option -skip_matrices: must be a natural number") 
         unless (&IsNatural($skip_matrices));

=pod

=item B<-seq input_sequences_file>

A file containing the sequences in fasta format.

=cut
    } elsif ($arg eq "-seq") {
      $main::infile{sequences} = shift(@arguments);


=pod

=item B<-seq_format sequence_format>

Sequence format.

=cut
	} elsif ($arg eq "-seq_format") {
	    $main::seq_format = shift(@arguments);


=pod

=item B<-top_sequences X>

Only analyze the first X sequences of the input sequence file.
=cut
    } elsif ($arg eq "-top_sequences") {
      $top_sequences = shift(@arguments);
      $top_seq_flag = 1;

      &RSAT::error::FatalError($top_sequences, "Invalid value for option -top_sequences: must be a natural number") 
         unless (&IsNatural($top_sequences));

=pod

=item B<-skip_sequences X>

Skip the first X sequences of the input sequence file. 

=cut
    } elsif ($arg eq "-skip_sequences") {
      $skip_sequences = shift(@arguments);
      $skip_seq_flag = 1;
      &RSAT::error::FatalError($skip_sequences, "Invalid value for option -skip_matrices: must be a natural number") 
         unless (&IsNatural($skip_sequences));



=pod

=item B<-bin bin_length>

The length of the bin (in bp).

Default: 50

=cut
    } elsif ($arg eq "-bin") {
      $bin_size = shift(@arguments);


=pod

=item B<-pval p_value>

Select only those matches with a p-value equal or lower than the
specified.

Default: 1e-3

=cut
    } elsif ($arg eq "-pval") {
      $p_value = shift(@arguments);


=pod

=item B<-bgfile background_file>

A file containing the background model used to scan the sequences.

=cut
	} elsif ($arg eq "-bgfile") {
	    $bg_file_flag = 1;
	    $main::infile{bg_file} = shift(@arguments);


=pod

=item B<-bg_format background_file>

Format for the background model file.

        Supported formats: all the input formats supported by
        I<convert-background-model>.

=cut
	} elsif ($arg eq "-bg_format") {
		$main::bg_format = shift(@arguments);

=pod

=item B<-bginput>

Calculate background model from the input sequence set.

=cut

    } elsif ($arg eq "-bginput") {
$bg_input_flag = 1;
      &RSAT::error::FatalError("Options -bgfile and -bginput are mutually exclusive") if (($bg_file_flag) && ($bg_input_flag));

=pod

=item B<-markov>

Order of the markov chain for the background model.

This option is incompatible with the option -bgfile.

=cut
    } elsif ($arg eq "-markov") {
      $markov = shift(@arguments);
      &RSAT::error::FatalError("Markov order must be a natural number.")
	unless &RSAT::util::IsNatural($markov);
      &RSAT::error::FatalError("The options -markov and -bgfile are mutually exclusive.")
	if (($bg_file_flag) && ($bg_input_flag));

=pod

=item	B<-o output_prefix>

Prefix for the output files and folders.

=cut
    } elsif ($arg eq "-o") {
      $main::outfile{prefix} = shift(@arguments);

    } else {
      &FatalError(join("\t", "Invalid option", $arg));

    }
 }

=pod

=back

=cut

}

################################################################
## Verbose message
sub Verbose {
  print $out "; template ";
  &PrintArguments($out);
  printf $out "; %-22s\t%s\n", "Program version", $program_version;
  if (%main::infile) {
    print $out "; Input files\n";
    while (my ($key,$value) = each %main::infile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
  if (%main::outfile) {
    print $out "; Output files\n";
    while (my ($key,$value) = each %main::outfile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
}

################################################################
## Scan the input sequences with the input matrices using
## RSAT matrix-scan
sub ScanSequences {

    &RSAT::message::TimeWarn("Matrices loaded from file", $main::infile{motifs}) if ($main::verbose >= 2);

    &RSAT::message::TimeWarn("Scanning sequences") if ($main::verbose >= 2);
    
    $matrix_scan_cmd = $SCRIPTS."/matrix-scan -v 1 -quick ";
    $matrix_scan_cmd .= " -m ".$main::infile{motifs};
    $matrix_scan_cmd .= " -matrix_format ".$matrix_format;
    $matrix_scan_cmd .= " -origin end -n score";
    $matrix_scan_cmd .= " -uth pval ".$p_value;
    $matrix_scan_cmd .= " -i ".$main::infile{sequences}." -seq_format fasta";
    $matrix_scan_cmd .= " -pseudo 0.01 -decimals 2 -2str -bg_pseudo 0.01";
    $matrix_scan_cmd .= " -return limits -return pval ";
    $matrix_scan_cmd .= " -o ".$main::outfile{matches_tab};
    $matrix_scan_cmd .= " -bgfile ".$main::infile{bg_file}." -bg_format ".$bg_format;

    if($top_matrix_flag){
	$matrix_scan_cmd .= " -last_matrix ".$top_matrices;
    } 

    if ($skip_matrix_flag){
	$matrix_scan_cmd .= " -first_matrix ".$skip_matrices;
    } 

    &doit($matrix_scan_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);

    &RSAT::message::TimeWarn("matrix-scan file results", $main::outfile{matches_tab}) if ($main::verbose >= 2);
}


################################################################
## Parse the matrix-scan result table.
## This is done because as the table is read and loaded by R
## and this requires a lot of memory, the table is cut to only 
## those columns required to draw the profiles.
sub ParseScanResults {

    &RSAT::message::TimeWarn("Parsing matrix-scan results", $main::outfile{matches_tab}) if ($main::verbose >= 3);

    $parse_cmd = " grep -v '^;' ".$main::outfile{matches_tab};
    $parse_cmd .= " | awk -F '\t'  ' ".'$2!="limit" && ($11 >= 4) {print $1"\t"$3"\t"($6+$5)/2"\t"$9}'." ' ";
    $parse_cmd .= " > ".$main::outfile{matches_tab_PARSED};

    &doit($parse_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);

    &RSAT::message::TimeWarn("PARSED matrix-scan file results", $main::outfile{matches_tab_PARSED}) if ($main::verbose >= 3);

    $to_delete{'matched_tab_PARSED'} = $main::outfile{matches_tab_PARSED};
}


################################################################
## Create an associative list of the motif ID -> name
## This is done because sometimes we have redundancy in the motif set and hence
## their names can be repeated and although the ID is unique, this is not so 
## informative to display it.
sub ID_NameRefTable {

    &RSAT::message::TimeWarn("Generating ID -> Name reference table") if ($main::verbose >= 3);

    ## If it is required, convert the input motif file in Transfac format
    if($matrix_format eq "transfac" || $matrix_format eq "tf"){
	$main::outfile{motifs_transfac} = $main::infile{motifs};
    } else {

	&RSAT::message::TimeWarn("Converting input matrices to Tansfac format") if ($main::verbose >= 3);
	$convert_matrix_cmd = $SCRIPTS."/convert-matrix";
	$convert_matrix_cmd .= " -i ".$main::infile{motifs};
	$convert_matrix_cmd .= " -from tf";
	$convert_matrix_cmd .= " -to transfac";
	$convert_matrix_cmd .= " -o ".$main::outfile{motifs_transfac};
	&doit($convert_matrix_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);
	$to_delete{'motifs_transfac'} = $main::outfile{motifs_transfac};
    }

    ## Open the ID to Motif name reference table
    my ($ID_to_Motif_name_file) = &RSAT::util::OpenOutputFile($main::outfile{motif_ID_to_name});

    ################################################
    ## Save the unique ID in a hash table
    ## Key = ID unique
    ## Value = Motif name
    my ($transfac_matrix_file) = &OpenInputFile($main::outfile{motifs_transfac});
    my ($AC_flag, $name_flag, $null, $AC_unique, $motif_ID) = 0;
    while(<$transfac_matrix_file>){

	next if (/^#/); ## Skip header line
	next if (/^;/); ## Skip comment lines
	next unless (/\S/); ## Skip empty lines	
	chomp();

	## Get the AC (unique)
	if(/^\s*AC\s+/){
	    ($null, $AC_unique) = split(/\s+/, $_);
	}

	## Get the ID (motif name)
	if(/^\s*ID\s+/){
	    ($null, $motif_ID) = split(/\s+/, $_);

	    ## Print the ID -> Name in the File
	    print $ID_to_Motif_name_file  $AC_unique."\t".$motif_ID."\n";
	    ($AC_flag, $name_flag, $AC_unique, $motif_ID) = 0;
	}
    }
    close($transfac_matrix_file);
    close($ID_to_Motif_name_file);

    &RSAT::message::TimeWarn("ID to Motif name correspondence table", $main::outfile{motif_ID_to_name}) if ($main::verbose >= 3);    
}


################################################################
## Generate the Motif Logos
sub GenerateLogos {

    &RSAT::message::TimeWarn("Generating logos from input matrices") if ($main::verbose >= 2);

    $logos_cmd = $SCRIPTS."/convert-matrix";
    $logos_cmd .= " -i ".$main::infile{motifs};
    $logos_cmd .= " -from ".$matrix_format;
    $logos_cmd .= " -to transfac";
    $logos_cmd .= " -return logo -logo_program weblogo";
    $logos_cmd .= " -logo_format png,jpeg -logo_dir ".$main::outfile{prefix}."_logos";

    ## If top or skip matrices are required
    if($top_matrix_flag){
	$logos_cmd .= " -top ".$top_matrices;
    } 

    if ($skip_matrix_flag){
	$logos_cmd .= " -skip ".$skip_matrices;
    } 

    &doit($logos_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);
}


################################################################
## Call the Rscript where the statistics are calculated
## (Chi-squared, p-val, q-val, e-val)
##
## Within this script the dynamic profiles are drawn and exported
## in a HTML file.
sub DynamicProfilesR {

    &RSAT::message::TimeWarn("Calculating Chi-squared values for positional profiles of TFBSs") if ($main::verbose >= 2);

    ##################################
    ### Identify the path of the R executable
    my $r_path = &RSAT::server::GetProgramPath("R");

    my $profiles_script  = $ENV{RSAT}."/R-scripts/Scan_profile.R";
    &RSAT::error::FatalError("Cannot read Motif scan profile script", $profiles_script) unless (-r $profiles_script);
    # $r_verbosity = &RSAT::stats::max(($main::verbose-1), 0);

    ## Basic parameters
    my $profile_cmd = "";
    $profile_cmd .= " cat ".$profiles_script;
    $profile_cmd .= " | ".$r_path;
    $profile_cmd .= " --slave --no-save --no-restore --no-environ";
    $profile_cmd .= " --args \"";
    $profile_cmd .= " matrix.scan.file = '".$main::outfile{matches_tab_PARSED}."'";
    $profile_cmd .= "; p.val = '".$p_value."'";
    $profile_cmd .= "; bin = '".$bin_size."'";
    $profile_cmd .= "; prefix = '".$main::outfile{prefix}."'";
    $profile_cmd .= "; logo.folder = '".$basename."_logos/'";
    $profile_cmd .= "; results.folder = '".$dir{output}."'";
    $profile_cmd .= "; basename = '".$basename."'";
    $profile_cmd .= "; ID.to.names.correspondence.tab = '".$main::outfile{motif_ID_to_name}."'";
    $profile_cmd .= "; seq.length = '".$seq_length."'";
    $profile_cmd .= "; individual.plots = '".$individual_plots."'";
    $profile_cmd .= "; html.template.file = '".$main::outfile{report_template}."'";
    $profile_cmd .= "; c3.css.base = '".$c3_css_base."'";
    $profile_cmd .= "; c3.base = '".$c3_base."'";
    $profile_cmd .= "; d3.base = '".$d3_base."'";
    $profile_cmd .= "; jquery.base = '".$jquery_base."'";
    $profile_cmd .= "; datatable.base = '".$datatable_base."'";
    $profile_cmd .= "; datatable.css.base = '".$datatable_css_base."'";
    $profile_cmd .= " \"";
#    $Cluster_motifs_cmd = "(".$cluster_motifs_cmd.")"; # Uncomment this line to check the R output in the terminal
#    $profile_cmd = "(".$profile_cmd.") 2 > ".$main::outfile{Rlog}; 

    if ($r_path) {
        &doit($profile_cmd, 0, 1, $verbose, 0, "", $main::out, $main::err);
    } else {
        &RSAT::message::Warning("Could not run motif clustering because the program R is not available") if ($main::verbose >= 1);
    }
    return();
}


################################################################
## Calculate sequence length
sub ProcessSeq {

    &RSAT::message::TimeWarn("Calculating Sequence length") if ($main::verbose >= 2);

    $seq_length_cmd = $SCRIPTS."/sequence-lengths";
    $seq_length_cmd .= " -i ".$main::infile{sequences};
    $seq_length_cmd .= " -unit bp -in_format ".$main::seq_format;
    $seq_length_cmd .= " | cut -f2 | uniq | head -n 1";

    chomp($seq_length = `$seq_length_cmd`);

    my $seq_length_up = $seq_length + 1;
    my $seq_length_dw = $seq_length - 1;
    
    &RSAT::message::TimeWarn("Sequence length: ", $seq_length, "bp") if ($main::verbose >= 2);

    &RSAT::message::TimeWarn("Filtering sequences shorter or longer than", $seq_length) if ($main::verbose >= 2);

    $convert_seq_cmd = $SCRIPTS."/convert-seq";
    $convert_seq_cmd .= " -i ".$main::infile{sequences};
    $convert_seq_cmd .= " -from ".$main::seq_format." -to ".$main::seq_format;
    $convert_seq_cmd .= " -skip_long ".$seq_length_up;
    $convert_seq_cmd .= " -skip_short ".$seq_length_dw;
    $convert_seq_cmd .= " -top ".$top_sequences if($top_seq_flag == 1);
    $convert_seq_cmd .= " -skip ".$skip_sequences if($skip_seq_flag == 1);
    $convert_seq_cmd .= " -o ".$main::outfile{filtered_sequences};
    &doit($convert_seq_cmd, $dry, $die_on_error, $verbose, 0, $job_prefix);    

    $main::infile{sequences} = $main::outfile{filtered_sequences};

    $to_delete{'filtered_sequences'} = $main::outfile{filtered_sequences};

    return($seq_length);
}


##############################################################
## When the option -top is used the analysis is restricted 
## to the first X motifs, however the input motif file is 
## splitted on individual files, although not all of them 
## are not. This functions will delete those files
sub Delete_temporal_files {

  foreach my $f (keys %to_delete) {
    unlink($to_delete{$f});
  }

  foreach my $fd (keys %to_delete_folder) {
    rmtree($to_delete_folder{$fd});
  }

}


##############################################################
## When the option -top is used the analysis is restricted 
## to the first X motifs, however the input motif file is 
## splitted on individual files, although not all of them 
## are not. This functions will delete those files
sub CalculateBGModel {

    &RSAT::message::TimeWarn("Calculating background model by sequence set", $bg_dir) if ($main::verbose >= 3);
    
    my $bg_comand = $SCRIPTS."/oligo-analysis -v 1 -quick -2str " ;
    $bg_comand .= " -i ".$main::infile{sequences}." -l ".$markov." -noov ";
    $bg_comand .= " -o ".$main::infile{bg_file};
    
    &doit($bg_comand, $dry, $die_on_error, $verbose, 0, $job_prefix);
    &RSAT::message::TimeWarn("Background for sequence set", $main::infile{sequences}, $main::infile{bg_file}) if ($main::verbose >= 3);

    $bg_format = "oligos";
}
        
__END__



