#!/usr/bin/perl -w
############################################################
#
# $Id: variation-scan,v 1.15 2013/09/23 14:23:36 rsat Exp $
#
############################################################

## use strict;


=pod

=head1 NAME

variation-scan

=head1 VERSION

$program_version

=head1 DESCRIPTION

Scan variant sequences with position specific scoring matrices (PSSM)
and report variations that affect the binding score, in order to
predict regulatory variants.


=head1 AUTHORS

=over

=item Jeremy.Delerce@univ-amu.fr

=item Jacques.van-Helden\@univ-amu.fr

=item Alejandra Medina-Rivera  <amedina@lcg.unam.mx>

=back


=head1 CATEGORY

=over

=item util

=back

=head1 USAGE

 variation-scan [-i sequence_file] -m matrix_file -bg backgournd_file [-calc_distrib] [-o outputfile] [-v #] [...]

=head2 Example


=head1 INPUT FORMAT

=head2 Sequence file

I<variation-scan> takes as input a variation file in the format
produced by I<retrieve-variation-seq>. for details about this format,
see I<retrieve-variation-seq> output format.

=head2 Matrix file

A list  of matrix in transfanc format

=head2 Background file

oligo-analysis format

=head1 OUTPUT FORMAT

A tab delimited file with the following column content.

=over

=item 1. matrix

Name of the matrice

=item 2. variation

Name of the variation

=item 3. SO

SO term of the variation.

=item 4. var_coord

Coordinate of the variation.

=item 5. B_weight

Best max weigth.

=item 8. W_weight

Worst max weigth.

=item 7. Diff

Difference between the two max weigth.

=item 8. variant

Variant of the variation in the sequence.

=item 9. B_pval

Pvalue of the best max weigth.

=item 10. W_pval

Pvalue of the worst max weigth.

item 11. sigma

Log10 difference between the two p-value.

item 12. B_var

Variant(s) in the sequence with the best max weigth.

Multiple variant are return comma separate if the highest max weigth
 is the same in multiple sequence.

item 13. W_var

Variant(s) in the sequence with the worst max weigth.

Multiple variant are return comma separate if the lowest max weigth
 is the same in multiple sequence.

item 14. B_offset


item 15. W_offset


item 14. B_seq

Sequence with the highest max weigth.

Multiple sequence are return comma separate if the best max weigth
 is the same in multiple sequence.

item 15. W_seq

Sequence with the lowest max weigth.

Multiple sequence are return comma separate if the worst  max weigth
 is the same in multiple sequence.

=back

=head1 SEE ALSO

=head2 download-ensembl-genome

I<retrieve-variation-seq> uses the sequences downloaded
from Ensembl using the tool I<download-ensembl-genome>.

=head2 download-ensembl-variations

I<retrieve-variation-seq> uses variation coordinates downloaded
from Ensembl using the tool I<download-ensembl-variations>.

=head2 variation-scan

Scan variation sequences with one or several position-specific scoring
matrices.

=head1 WISH LIST

=cut

BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}

require "RSA.lib";
use RSAT::matrix;
use RSAT::MatrixReader;

################################################################
## Main package
package	main;
{

  ###############################################################
  ## Initialise parameters
  our $start_time = &RSAT::util::StartScript();
  our $program_version = do { my @r = (q$Revision: 1.15 $ =~ /\d+/g); sprintf"%d."."%02d" x $#r, @r };
  our $output_lines = 0;

  our %infile	= ();
  $infile{'distrib_list'} = "";
  our %outfile = ();

  our $verbose = 0;
  our $in = STDIN;
  our $out = STDOUT;

  our %matrix_PWM = ();
  our $flank_len = 29; ## Default -mml value
  our $only_biggest = 0;
  our $pval_limit = 1;
  our %matrices_id=();

  our %uth = ();
  our %supported_uth = ();
  $supported_uth{'pval'} = 1;

  ## Supported thresholds
  our %lth = ();
  our %supported_lth = ();
  $supported_lth{'score'} = 1;
  $supported_lth{'w_diff'} = 1;
  $supported_lth{'pval_ratio'} = 1;

  our $calc_distrib = 0;
  our $distrib_dir = "";

  our $html = 0;
  our $top_matrix =-1;
  our $top_variation = -1;
  our $no_offset = 1;


  our $nb_matrix = 0;
  our $nb_variation =0;
  our $nb_seq = 0;
  our $nb_rvar =0;


  ################
  ## Matrix format

#  %supported_input_format = %RSAT::MatrixReader::supported_input_format;
  %supported_input_format = &RSAT::MatrixReader::ListInputMatrixFormats();
  $supported_input_formats = join ",", sort keys %supported_input_format;

  $main::matrix_format="transfac";
  local @matrix_files = ();
  $top_matrices = "none";

  ################################################################
  ## Read argument values
  &ReadArguments();
  $out = &OpenOutputFile($outfile{output});

  ## If input was not given by user, die
  &RSAT::error::FatalError("No sequences were given as input use -i or STDIN ") unless ($main::infile{input});

  ## check the matrix-scan-quick executable is present
  my $quick_cmd_test = &RSAT::server::GetProgramPath("matrix-scan-quick");

  ## In addition, we want to check if the command actually works before any calculation is done
  if ($quick_cmd_test) {
    `$quick_cmd_test -h`;
    if ($?) {
       &RSAT::error::FatalError("matrix-scan-quick is installed but not working. variation-scan requires matrix-scan-quick to be fully functional");
      return(0);
    }
  } else {
    &RSAT::error::FatalError("matrix-scan-quick is not installed. variation-scan requires matrix-scan-quick.");
    return(0);
  }

  ###############################################################
  ## Check arguments
  &RSAT::error::FatalError("Background file is mandatory. Use -bg option to specify it") unless ($infile{'bg'});
  &RSAT::error::FatalError("Matrix files in mandatory. Use -m option to specify it") unless (@matrix_files>0);
  &RSAT::error::FatalError("Background file",$infile{'bg'},"does not exist") unless (-f $infile{'bg'});

  ################
  ## Calculate distribution of scores for matrices using convert-matrix (separete matrices) and matrix-distrib on each matrix file

  ## NOTE FROM ALE, I think we can change this and put it within the matrix reading section
  if ($calc_distrib) {
    &RSAT::error::FatalError("Specify distribution directory. Use -distrib_dir option") unless ($distrib_dir);
    &RSAT::message::TimeWarn("Calculing distribution") if ($main::verbose >= 2);

    my @bg_name = split('/',$infile{'bg'});
    @bg_name = split('.',$bg_name[-1]);
    my $bg_name = $bg_name[-1];
    $outfile{'distrib_list'} = $bg_name."_list.tab";

    &Verbose() if ($main::verbose >= 1);

    my $split_dir = $distrib_dir."/split_dir/tmp";
    &RSAT::util::CheckOutDir($split_dir);

    ## convert-matrix command, split matrix file into separete ones
    my $convert_matrix_cmd=$ENV{'RSAT'}."/perl-scripts/convert-matrix -v 1 -from ".$main::matrix_format." -to tf -split ";
    $convert_matrix_cmd.=" -i ".$infile{'matrixfile'};
    $convert_matrix_cmd.=" -o ".$split_dir;

    &doit($convert_matrix_cmd, 0, 0, 0);

    ## Opent outfile to keep record of all matrix distribution files
    my $outD = &OpenOutputFile($distrib_dir.'/'.$outfile{'distrib_list'});
    print $outD "#MATRIX_ID\tDISTRIB_FILE\tDB\tBG_PREFIX";

    ## Read in temporary matrix list file
    ## For each split matrix calculate the distribution of scores using matrix-distrib,
    my ($file) = &OpenInputFile($split_dir.'_matrix_list.tab');
    while (<$file>) {
      next if (/^#/); ## Skip comment lines
      next if (/^;/); ## Skip RSAT-like comment lines
      next unless (/\S/); ## Skip empty lines
      chomp();
      my @field = split("\t");
      my $distrib_file = $field[1]."_".$bg_name.".tab"; ## Matrix distrib outfile for each matrix

      my $matrix_distrib_cmd=$ENV{'RSAT'}."matrix-distrib ";
      $matrix_distrib_cmd.=" -m ".$field[2]." -matrix_format tf -decimals 1 ";
      $matrix_distrib_cmd.=" -bgfile ".$infile{'matrixfile'};
      $matrix_distrib_cmd.=" -bg_pseudo 0.01 -bg_format oligos -pseudo 1 ";
      $matrix_distrib_cmd.=" -o ".$distrib_dir/$distrib_file;
      &doit($matrix_distrib_cmd, 0, 0, 0);

      print $outD $field[1],"\t",$distrib_file,"\t.\t",$bg_name,"\n"; ## Print distrib file
    }
    &RSAT::message::Info("Distrib_list :", $distrib_dir.'/'.$outfile{'distrib_list'}) if ($main::verbose >= 2);
    

    ################################################################
    ## Report execution time and close output stream
    my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
    print $out $exec_time  if ($main::verbose >= 1); ## only report exec time if verbosity is specified
    close $out;

    exit(0);
  }

  ## If distrib_list file is given as input check the file exists
  if ($infile{'distrib_list'}) {
    &RSAT::error::FatalError("Distrib list",$infile{'distrib_list'},"file does not exist") unless (-f $infile{'distrib_list'});
  }


  ################################################################
  ## Print column descritions if verbosity was required
  if ($main::verbose >= 1) {
    &Verbose() if ($main::verbose >= 1);
    print $out "; column content","\n";
    print $out ";\t",'1',"\t",'matrix_ac',"\t",'Accession number of the positions-pecific scoring matrix',"\n";
    print $out ";\t",'2',"\t",'matrix_name',"\t",'Name of the matrix (generally the transcription factor name)',"\n";
    print $out ";\t",'3',"\t",'var_id',"\t",'ID of the variation',"\n";
    print $out ";\t",'4',"\t",'var_class',"\t",'Variation type, according to SNP Ontology (SO) nomenclature',"\n";
    print $out ";\t",'5',"\t",'var_coord',"\t",'Coordinates of the variation',"\n";
    print $out ";\t",'6',"\t",'best_w',"\t",'Best weigth for the putative site',"\n";
    print $out ";\t",'7',"\t",'worst_w',"\t",'Worst weigth for the putative site',"\n";
    print $out ";\t",'8',"\t",'w_diff',"\t",'Difference between best and worst weigth',"\n";
    print $out ";\t",'9',"\t",'best_pval',"\t",'P_value of the best putative site',"\n";
    print $out ";\t",'10',"\t",'worst_pval',"\t",'P_value of the worst putative site',"\n";
    print $out ";\t",'11',"\t",'pval_ratio',"\t",'Ratio between worst and best pval ( pval_ratio = worst_pval/best_pval )',"\n";
    print $out ";\t",'12',"\t",'best_variant',"\t",'Variant in the best putative site',"\n";
    print $out ";\t",'13',"\t",'worst_variant',"\t",'Variant in the worst putative site',"\n";
    print $out ";\t",'14',"\t",'best_offest',"\t",'Offset of the best putative site',"\n";
    print $out ";\t",'15',"\t",'worst_offset',"\t",'Offset of the worst putative site',"\n";
    print $out ";\t",'16',"\t",'min_offset_diff',"\t",'Difference minimal between best and worst putative site',"\n";
    print $out ";\t",'17',"\t",'best_strand',"\t",'Strand of the best putative site',"\n";
    print $out ";\t",'18',"\t",'worst_strand',"\t",'Strand of the worst putative site',"\n";
    print $out ";\t",'19',"\t",'str_change',"\t",'Indicate if strand have change between the offset of min_offset_diff',"\n";
    print $out ";\t",'20',"\t",'best_seq',"\t",'Sequence of the worst putative site',"\n";
    print $out ";\t",'21',"\t",'worst_seq',"\t",'Sequence of the worst putative site',"\n";
    print $out ";\t",'22',"\t",'reference_allele',"\t",'Reference allele',"\n";
    print $out ";\t",'23',"\t",'is_ref_better',"\t",'Flag if the reference allele is the allele with the best score',"\n";
    print $out ";\t",'24',"\t",'monir_alle_freq',"\t",'Minor allele frequency',"\n";

  }

  ## Print header
  @out_fields = qw(
                      ac_motif
                      motif
                      var_id
                      var_class
                      var_coord
                      best_w
                      worst_w
                      w_diff
                      best_pval
                      worst_pval
                      pval_ratio
                      best_variant
                      worst_variant
                      best_offset
                      worst_offset
                      min_offset_diff
                      best_strand
                      worst_strand
                      str_change
                      best_seq
                      worst_seq
                      reference_allele
                      is_ref_better
                      minor_allele_freq);
  print $out "#",join("\t", @out_fields),"\n";

  ################################################################
  ## ALE'S NOTE= Read Matrix File and store it in a hash ? 201412

  ################################################################
  ## Read matrix file, store matrix objects

  &RSAT::message::TimeWarn("Reading Matrix File") if ($main::verbose >= 2);

  my $length = 0;
  my $need_count = 0;
  my $ac = "";

  ################################################################
  ## Read the input matrices and collect all matrices
  &RSAT::message::TimeWarn("Reading matrices") if ($main::verbose >= 2);
  if (scalar(@matrix_files >= 1)) {
      $m = 0;
      foreach my $matrix_file (@matrix_files) {
	  last if ((&IsNatural($top_matrices)) && ($m > $top_matrices)); ## Stop reading files if we already collected the specified number of matrices
	  my @matrices_from_file = &RSAT::MatrixReader::readFromFile($matrix_file, $matrix_format);
	  foreach my $matrix (@matrices_from_file) {
	      $m++;
	      last if ((&IsNatural($top_matrices)) && ($m > $top_matrices)); ## Stop holding matrices if we already collected the specified number of matrices
	      &RSAT::MatrixReader::SetMatrixName($matrix, $m, $matrix_file, $matrix_format);
	      push @matrices, $matrix;
	  }
      }
  }
  $nb_matrix=scalar(@matrices);
# else {  ## NOTE code to be removed
  #   my @matrices_from_file = &RSAT::MatrixReader::readFromFile($infile{input}, $input_format, %args);
  #   my $m = 0;
  #   foreach my $matrix (@matrices_from_file) {
#	  $m++;
#	  last if ((&IsNatural($top_matrices)) && ($m > $top_matrices)); ## Stop holding matrices if we already collected the specified number of matrices
#	  &SetMatrixName($matrix, $m, "", $input_format);
#	  push @matrices, $matrix;
  #     }
  # }





  ## NOTE FROM ALE: old readin matrices version
  # my ($mat_file) = &OpenInputFile($main::infile{'matrixfile'});
  # while (<$mat_file>) {
  #   next if (/^#/); ## Skip comment lines
  #   next if (/^;/); ## Skip RSAT-like comment lines
  #   next unless (/\S/); ## Skip empty lines
  #   chomp();

  #   ## Identify parts of the transfac matrix
  #   my @field = split (" ",$_);

  #   if ( $field[0] eq "AC") {
  #     $ac = $field[-1];
  #     $length = 0;
  #   }

  #   if ( $field[0] eq "ID" ) {
  #     $matrix_list{$ac}{'id'} = $field[-1];
  #   }

  #   $need_count = 0 if ( $field[0] eq "XX" );

  #   if ($need_count) {
  #     $length += 1;

  #     for (my $i = 1;$i < scalar(keys(%{$matrix_list{$ac}{'tab'}})+1);$i++ ) {
  #       push ( @{ $matrix_list{$ac}{'tab'}{$i} }, $field[$i] );
  #     }
  #   }

  #   if ( $field[0] eq "P0" ||$field[0] eq "PO" ) {
  #     $need_count = 1;

  #     for (my $i = 1;$i < scalar(@field);$i++ ) {
  #       push ( @{ $matrix_list{$ac}{'tab'}{$i} }, lc($field[$i]) );
  #     }
  #   }

  #   if ( $field[0] eq "//") {
  #     $matrix_list{$ac}{'length'} = $length;
  #     $nb_matrix++;
  #   }

  #   if ($top_matrix) {
  #     last if ($top_matrix == $nb_matrix && $top_matrix > 0);
  #   }

  # }
  # $matrix_list{$ac}{'length'} = $length; ## ALE'S NOTE=Is this line correct or is it redundant with line 401
  # $nb_matrix++;
  # close $mat_file;

  ## Get distrib files
  my %distrib_file_list = ();
  if ( -f $infile{'distrib_list'} ) {
    my $distrib_file_name = "";
    ($distrib_dir, $distrib_file_name) = &SplitFileName($infile{'distrib_list'});

    ($file) = &OpenInputFile($infile{'distrib_list'});
    while (<$file>) {
      next if (/^#/); ## Skip comment lines
      next if (/^;/); ## Skip RSAT-like comment lines
      chomp();
      my @field = split("\t");  ## NOTE FROM ALE: Is this storing the matrix name and then the corresponding distri
      $distrib_file_list{$field[0]} = $field[1];
    }
  }

  ################################################################
  ## Scan variations

  ## Prepare temporary files for scanning
  my $fasta_file = &RSAT::util::make_temp_file("","variation-scan_fasta_seq", 1);
  my $bg_file = &RSAT::util::make_temp_file("","variation-scan_bg_file", 1);
  my $matrix_file_tab = &RSAT::util::make_temp_file("","variation-scan_matrices_tab", 1);
  my $variation_seq = &RSAT::util::make_temp_file("","variation-scan_variant_seq_tab", 1);

  ## Change bg_file format, matrix-scan-quick only accepts background model in inclusive format
  my $convert_bg_cmd = "$ENV{'RSAT'}/perl-scripts/convert-background-model ";
  $convert_bg_cmd .=" -i ".$main::infile{'bg'}." -from oligos -to inclusive ";
  $convert_bg_cmd .=" -o ".$bg_file." -bg_pseudo 0.01 ";
  &doit($convert_bg_cmd, 0, 0, 0);

  ## Take variant sequence from STDIN and write them in a file to be read
  unless ($main::infile{input}) {
    my $outstream_var_seq = &OpenOutputFile($variation_seq);
    while (<$main::in>) {
      next if (/^#/); ## Skip comment lines
      next if (/^;/); ## Skip RSAT-like comment lines
      next unless (/\S/); ## Skip empty lines
      next unless (/\t/); ## Skip lines containing no tab (likely to be starting comment lines)
      print $outstream_var_seq $_;
    }
    close $outstream_var_seq;
    $main::infile{input} = $variation_seq; ## Define internally the input file
  }


  ## Scan variants using one matrix at the time
  ## matrix-scan-quick will be used, all matrices will be exported to tab format
  our $matrix_length = 0;

  ## For each matrix extract:
  ##  1)ID and/or AC
  ##  2)width
  ##  3)Counts in tab format and write them in a temporal file, after writing them there is no need to store this info?

  #foreach $mtx ( @matrices){
      ## Get ID or AC, if matrix is not transfac format consensus will be used
  #    my $mtx_name="";
  #    my $ID = $mtx->get_attribute("name");
  #    my $AC=$mtx->get_attribute("id");
  #    if ($ID eq $AC){
 #	  $mtx_name=$ID;
  #    }
   #   else {
#	  $mtx_name=$ID.$AC;
 #     }
  #    my $width = $mtx->get_attribute("ncol");
   #   die "BOOM".$mtx_name."col"."++".$width."++";

  #}

  ## for each matrix in the matrix hash created previously by reading the matrix file --> OLD
  #foreach my $matrix_ac (keys(%matrix_list)) {

  foreach $mtx ( @matrices){
      $matrix_length = $mtx->get_attribute("ncol");     #$matrix_list{$matrix_ac}{'length'};
      $ID = $mtx->get_attribute("name");
      $AC=$mtx->get_attribute("id");

      ## We prefer to keep separately the matrix accession (AC) and
      ## the matrix/factor name (ID), because the AC permits to
      ## recover the matrix.
#      if (($ID eq $AC) || 
#	  ($ID !~/\S/)) {
	  $matrix_ac=$AC;
#      } else {
#	$matrix_ac=$ID."_".$AC;
#      }
      next if ($matrix_length == 0);
      if ($matrix_length > $flank_len+1){ ## Matrices wider than the sequences flanking the variant don't use it for scanning
	  &RSAT::message::Info("Matrix is wider than varian flank region:",$matrix_ac ) if ($main::verbose >= 3);
	  next;
      }
      
      $matrices_id{$matrix_ac}=$ID;
      #print "AC $matrix_ac\t$ID";
      
      ## Save matrix in tab format. matrix-scan-quick only takes as
      ## input matrices in tab delimited format
      my $outM = &OpenOutputFile($matrix_file_tab);

      ##foreach my $line (sort {$a <=> $b} keys( %{ $matrix_list{$matrix_ac}{'tab'} } )) {
      ##print $outM join("\t",@{$matrix_list{$matrix_ac}{'tab'}{$line}})."\n";
      ##}
      ## Print the counts matrix
      print  $outM $mtx->toString(sep=>"\t",
				  type=>"counts",
				  format=>"tab" );

      ## Remove code
      close($outM);
 

      ################################################################
      ## Compute the probability distribution of weight scores for the
      ## current matrix, in order to associate a p-values to each
      ## possible weight. The resulting distribution table is used to
      ## assign p-values to the weight scores returned by matrix-distrib-quick.
      &RSAT::message::TimeWarn("\tGet distrib") if ($main::verbose >= 3);

      %matrix_info = ();

      ## Use precalculte distrib file if it was specified or distributions were requested as output
      if ($distrib_file_list{$matrix_ac}) {
	  my %legend_col = ();
	
	  my $distrib_file = $distrib_dir."/".$distrib_file_list{$matrix_ac};
	  my ($d_file) = &OpenInputFile($distrib_file);
	   # print "$distrib_file" ; die "BOOM";

      ## Read in distribution file for current matrix
      while (<$d_file>) {
        next if (/;/);
        chomp;

        if (/^#/) {
          $_ = substr($_,1) ;
          my @legends = split("\t");
          for (my $i = 0; $i < scalar(@legends);$i++) {
            $legend_col{$legends[$i]} = $i;
          }
        } else {
          my @value = split("\t");
          $matrix_info{$matrix_ac}{'pval'}{$value[$legend_col{"weight"}]} = $value[$legend_col{"Pval"}]; ## Retrive corresponding pvalue per score
        }
      }

      ## Define pval values for score 0 to match matrix-scan-quick number notation
      $matrix_info{$matrix_ac}{'pval'}{'0'} = $matrix_info{$matrix_ac}{'pval'}{'0.0'};
      $matrix_info{$matrix_ac}{'pval'}{'-0.0'} = $matrix_info{$matrix_ac}{'pval'}{'0.0'};

      my @sorted_pval = sort {$a <=> $b} keys (%{$matrix_info{$matrix_ac}{'pval'}});

      ## variation-scan calculates the pvalue ration or sigma between variants. Define the maximal ratio based on the theoretical max pvalues
      $matrix_info{$matrix_ac}{'sigma_max'} = log10($matrix_info{$matrix_ac}{'pval'}{$sorted_pval[0]}/$matrix_info{$matrix_ac}{'pval'}{$sorted_pval[-1]});

   #####################
   ## If distribution files were not given as input or were not required as outputs
   ## Calculate distribution for each matrix
    } else {
      my %legend_col = ();
      ## matrix-distrib command
      my $mtx_distrob_cmd=" $ENV{'RSAT'}/perl-scripts/matrix-distrib " ;
      $mtx_distrob_cmd .= " -m ".$matrix_file_tab ." -matrix_format tab";
      $mtx_distrob_cmd .= " -decimals 1 -pseudo 1";
      $mtx_distrob_cmd .= " -bgfile ".$main::infile{'bg'}." -bg_format oligos";
      $mtx_distrob_cmd .= " -bg_pseudo 0.01";

    &RSAT::message::Debug("matrix-distrib command:", $mtx_distrob_cmd ) if ($main::verbose >= 2);

      ## Excute matrix-distrib command and store it in an array
      my @result_distrib = qx{$mtx_distrob_cmd };
      foreach (@result_distrib) { ## Read matrix-distrib file
        chomp;
	## ALE'S NOTE: This chunck of code is redundant
        if (/^#/) {
          $_ = substr($_,1) ;
          my @legends = split("\t");
          for (my $i = 0; $i < scalar(@legends);$i++) {
            $legend_col{$legends[$i]} = $i;
          }
          next;
        }

        my @value = split("\t");
        $matrix_info{$matrix_ac}{'pval'}{$value[$legend_col{"weight"}]} = $value[$legend_col{"Pval"}];
      }

      ## Define pval values for score 0 to match matrix-scan-quick number notation
      $matrix_info{$matrix_ac}{'pval'}{'0'} = $matrix_info{$matrix_ac}{'pval'}{'0.0'};
      $matrix_info{$matrix_ac}{'pval'}{'-0.0'} = $matrix_info{$matrix_ac}{'pval'}{'0.0'};

      my @sorted_pval = sort {$a <=> $b} keys (%{$matrix_info{$matrix_ac}{'pval'}});

      ## variation-scan calculates the pvalue ration or sigma between variants. Define the maximal ratio based on the theoretical max pvalues
      $matrix_info{$matrix_ac}{'sigma_max'} = log10($matrix_info{$matrix_ac}{'pval'}{$sorted_pval[0]}/$matrix_info{$matrix_ac}{'pval'}{$sorted_pval[-1]});
    }


    ## Control variables to match lines representing the same variant (alleles)
    my $last_id = "";
    my $last_nb = 2000; ## Note from Alejandra: I don't understand this number

    ## Scan command
    my $scan_cmd = $quick_cmd_test ;
    $scan_cmd .= " -i $fasta_file";
    $scan_cmd .= " -m $matrix_file_tab";
    $scan_cmd .= " -pseudo 1 -decimals 1 -2str -origin start";
    $scan_cmd .= " -bgfile $bg_file";
    $scan_cmd .= " -name $matrix_ac";

    &RSAT::message::TimeWarn("\tScan matrix", $matrix_ac,$scan_cmd  ) if ($main::verbose >= 3);


    ################
    ## Open rsat variable input file and change it to fasta format to be used with matrix-scan-quick
    ## One fasta file will be created per variant with all alleles to be scanned with the current matrix
    ## ALE'S NOTE: not sure this should be here, maybe this should be done outside the matrix loop and stored to be used inside it?

    &RSAT::message::Info("Open Sequence File", $main::infile{input} ) if ($main::verbose >= 3);
    &RSAT::message::Debug("Open Empty fasta file to store sequences", $fasta_file) if ($main::verbose >= 3);
    my $out_fas = &OpenOutputFile($fasta_file);

    my ($var_seq) = &OpenInputFile($main::infile{input});
    while (<$var_seq>) {
      next if (/^#/); ## Skip comment lines
      next if (/^;/); ## Skip RSAT-like comment lines
      next unless (/\S/); ## Skip empty lines
      next unless (/\t/); ## Skip lines containing no tab (likely to be starting comment lines)
      chomp();

      ## Split line in rsat_var format
      my ($chrom, $start, $end,$strand,$id,$type,$ref,$variant,$ma_freq,$seq) = split(/\t/);
      &RSAT::message::Debug("Reading in one variation from rsat-var file", join("//",$chrom, $start, $end,$strand,$id,$type,$ref,$variant,$ma_freq,$seq)) if ($main::verbose >= 10);
      #die "HELLO3";
      $ref = "-" if ($ref eq "");

      if ($last_id ne $id && $last_id ne "") {
	  $nb_variation ++;
	  last if ($nb_variation > $top_variation && $top_variation>0);
	
	  if ($nb_variation >= $last_nb) {
	      &RSAT::message::Info("matrix-scan command", $scan_cmd ) if ($main::verbose >= 3);
	      my @scanning = qx{$scan_cmd};
	      &Analyse_scanning(@scanning) unless (scalar(@scanning) == 0);
	      close($out_fas);
	      $out_fas = &OpenOutputFile($fasta_file);
	      $last_nb += 2000;
	  }
      }
      print $out_fas ">$id;$ref;$variant;$type;$ma_freq|$chrom:$start-$end\_$strand\n";
      print $out_fas substr($seq,$flank_len-$matrix_length+1,length($seq)-($flank_len-$matrix_length+1)*2)."\n";
      $nb_seq++;
      #die "BOOM2";
      $last_id = $id;
    }
    close ($var_seq);
    close($out_fas);
    #die "BOOM";

    $nb_variation ++;
    &RSAT::message::Info("matrix-scan command", $scan_cmd ) if ($main::verbose >= 3);
    my @scanning = qx{$scan_cmd};
    &Analyse_scanning(@scanning) unless (scalar(@scanning) == 0);

    @scanning = ();
    $out_fas = &OpenOutputFile($fasta_file); ##Erase content from the temporal fasta file
    close($out_fas);

  }

  ################################################################
  ## Report execution time and close output stream
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts

  if ($main::verbose >= 1) {
    print $out "; Nb matrices\t",$nb_matrix,"\n";
    print $out "; Nb scanned variations\t",$nb_variation/$nb_matrix,"\n";
    print $out "; Nb scanned sequences\t",$nb_seq,"\n";
    print $out $exec_time; ## only report exec time if verbosity is specified
  }

  close $out;

  ################################################################
  ## Make html output
  if ($outfile{'output'} && $html) {
    $html_file = $outfile{'output'};
    $html_file =~ s/\.tab/\.html/g;
    $command = $ENV{'RSAT'}."/perl-scripts/text-to-html -i ".$outfile{'output'};
    $command .= " -no_sort -chunk 1000" if ($output_lines >= 5000);
    $command .= " -o ".$html_file;
#    $command = "$ENV{'RSAT'}/perl-scripts/text-to-html -i $outfile{'output'} -o $html_file";
    &doit($command, 0, 0, $verbose);
  }

  exit(0);
}




################################################################
################### SUBROUTINE DEFINITION ######################
################################################################

################################################################
## Display full help message
sub PrintHelp {
  system "pod2text -c $0";
  exit(0);
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}
################################################################
## Calclate log10
sub log10 {
  my $n = shift;
  return log($n)/log(10);
}

################################################################
## Retreive information from the scanning output
## per line.
## Each line contains the variant information that can be usefull
## for the user, like the minor allele frequency
sub getLineInfo {
    my %line_info = ();

    my @field = split ("\t",$_[0]);  #var_info,site,matrix,site_strand,site_start_offset,site_end_offset,seq,score
    $line_info{'matrice'} = $field[2];
    $line_info{'site_strand'} = $field[3];
    $line_info{'seq'} = $field[6];
    #$line_info{'score'} = sprintf("%.1f",$field[7]); ## sprintf rounds which makes a problem when the score is close to the highest posible one and then that score is not in the distribution file
    if ($field[7] =~ /\./ ){
	$line_info{'score'} = substr($field[7],0,index($field[7],'.') + 1 + 1);
    }else{
	    $line_info{'score'} = $field[7].".0";
    }
    #print "pre $field[7]  $line_info{'score'}\n";
    my @field2 = split(/\|/,$field[0]); #var_info, coord
    $line_info{'coord'} = $field2[1];

    my @split_coord= split (/_/,$field2[1]); ## chr:start-end , strand
    $line_info{'var_strand'} = $split_coord[1];

    my @field3 = split(/\;/,$field2[0]); #id,ref,alt,type,minor_allele_freq
    $line_info{'id'} = $field3[0];
    $line_info{'variant_ref'} = $field3[1];
    $line_info{'variant_alt'} = $field3[2];
    $line_info{'type'} = $field3[3];
    $line_info{'minor_allele_freq'} = $field3[4];

    $diff_pos = 0;
    $diff_pos ++ if ($line_info{'variant_ref'} eq "-");
    $line_info{'site_offset_start'} = $field[4]+$diff_pos-$matrix_length;
    return (%line_info);
}

################################################################
## Analyse matrix-scan results
sub Analyse_scanning {
    @scanning = @_;

    my %scanning_info = ();
    my %scanning_score_pos = ();
    my %scanning_score_var = ();
    my $same_variant_len = 1;

    while ( my $line = shift @scanning ) {

	next if ($line =~ m/^#/);
	next if ($line =~ m/^;/);
	chomp($line);
	#die "Boom".$line;
	#print $line."\n";
	my %line_info = &getLineInfo($line);

	my $same_variation = !$scanning_info{'variation'} || $line_info{'id'} eq $scanning_info{'variation'};
	
	#my $same_variation = 1 if ( !$scanning_info{'variation'} || $line_info{'id'} eq $scanning_info{'variation'});

	#print "same var ".$same_variation."\n" ;

	$same_variant_len = 0 if ( length($line_info{'variant_alt'}) != length($line_info{'variant_ref'}) );
	$same_variant_len = 0 if ( $line_info{'variant_alt'} eq '-' || $line_info{'variant_ref'} eq '-' );

	unless ( $same_variation ) {
	    &Analyse_variation(\%scanning_info,\%scanning_score_pos,\%scanning_score_var );
	    %scanning_score_pos = ();
	    %scanning_score_var = ();
	    %scanning_info = ();
	    $same_variant_len = 1;
	}

	push ( @{ $scanning_score_pos{$line_info{'site_offset_start'}}{$line_info{'site_strand'}}{$line_info{'score'}}{'variant_alt'} } , $line_info{'variant_alt'} );
	push ( @{ $scanning_score_pos{$line_info{'site_offset_start'}}{$line_info{'site_strand'}}{$line_info{'score'}}{'seq'} }         , $line_info{'seq'});

	if ( !$scanning_score_var{$line_info{'variant_alt'}}{'score'} || $scanning_score_var{$line_info{'variant_alt'}}{'score'} < $line_info{'score'} ) {
	    $scanning_score_var{$line_info{'variant_alt'}} = ();
	    $scanning_score_var{$line_info{'variant_alt'}}{'score'} = $line_info{'score'};
	    
	    push ( @{ $scanning_score_var{$line_info{'variant_alt'}}{'site_offset_start'}} , $line_info{'site_offset_start'});
	    push ( @{ $scanning_score_var{$line_info{'variant_alt'}}{'site_strand'}} ,       $line_info{'site_strand'});
	    push ( @{ $scanning_score_var{$line_info{'variant_alt'}}{'seq'}} ,               $line_info{'seq'});

	} elsif ( $scanning_score_var{$line_info{'variant_alt'}}{'score'} == $line_info{'score'} ){
	    
	    push ( @{ $scanning_score{$line_info{'variant_alt'}}{'site_offset_start'}} , $line_info{'site_offset_start'} );
            push ( @{ $scanning_score{$line_info{'variant_alt'}}{'site_strand'}} ,       $line_info{'site_strand'} );
            push ( @{ $scanning_score_var{$line_info{'variant_alt'}}{'seq'}} ,           $line_info{'seq'});
	}

	$scanning_info{'original_ref'}=$line_info{'variant_ref'};
	$scanning_info{'matrice'} = $line_info{'matrice'};
	$scanning_info{'variation'} = $line_info{'id'};
	$scanning_info{'type'} = $line_info{'type'}; ##Get type of variant
	$scanning_info{'minor_allele_freq'} = $line_info{'minor_allele_freq'}; ##Get minor allele frequency of the variant
	$scanning_info{'coord'} = $line_info{'coord'};
	$scanning_info{'same_len'} = $same_variant_len;
	$scanning_info{'var_strand'}=$line_info{'var_strand'};
    }
    &Analyse_variation(\%scanning_info,\%scanning_score_pos,\%scanning_score_var );
}

################################################################
## Analyse score for each variant of a variation
sub Analyse_variation {
  my ($scanning_info,$scanning_score_pos,$scanning_score_var) = @_;
  my %scanning_info = %{ $scanning_info };
  my %scanning_score_pos = %{ $scanning_score_pos };
  my %scanning_score_var = %{ $scanning_score_var };

  
  ## Compare all positions
  if (($no_offset && $scanning_info{'same_len'})) {
    &RSAT::message::Debug("Comparing all offset positions") if ($main::verbose >= 10);
    foreach my $offset (keys( %scanning_score_pos )) {
      &RSAT::message::Debug("Offset",$offset) if ($main::verbose >= 10);
      foreach my $site_strand (keys (%{ $scanning_score_pos{$offset} })) {
	my %scanning_score =  %{ $scanning_score_pos{$offset}{$site_strand} };
	my @scores =  keys ( %scanning_score );
	foreach $score ( @scores ) {
	  push (@{$scanning_score{$score}{'site_offset_start'}}, $offset);
	  push (@{$scanning_score{$score}{'site_strand'}}, $site_strand);
	}

	if  (scalar( @scores ) == 1 ) {
	  push (@{$scanning_score{$scores[0]}{'site_offset_start'}}, $offset);
	  push (@{$scanning_score{$scores[0]}{'site_strand'}}, $site_strand);
	}

	&Compare(\%scanning_info,\%scanning_score) ;
      }
    }

    ## Compare best score for each variant.
  } else {
    &RSAT::message::Debug("Comparing best score for each variant") if ($main::verbose >= 10);
    my %scanning_score  = ();
    foreach my $variant ( keys (%scanning_score_var ) ) {
	&RSAT::message::Debug("Comparing  scores for one variant", $variant ) if ($main::verbose >= 10);
	#<STDIN>;
      $score = $scanning_score_var{$variant}{'score'};
      push (@{ $scanning_score{$score}{'variant_alt'} }, $variant);
      push (@{ $scanning_score{$score}{'site_offset_start'} }, join(",",@{$scanning_score_var{$variant}{'site_offset_start'}}) );
      push (@{ $scanning_score{$score}{'site_strand'} }, join(",",@{$scanning_score_var{$variant}{'site_strand'}}) );
      push (@{ $scanning_score{$score}{'seq'} }, join(",",@{$scanning_score_var{$variant}{'seq'}}) );
    }
    &Compare(\%scanning_info,\%scanning_score);
  }
}

################################################################
## Compare best and worst scores
sub Compare {
    my ($scanning_info,$scanning_score) = @_;

    my %scanning_info = %{$scanning_info };
    my %scanning_score = %{$scanning_score };

    # foreach $key (keys %scanning_score)
    # {
    # 	print "$key - $scanning_score{$key}\n";
    # }
    

    &RSAT::message::Debug("Comparing scores" ) if ($main::verbose >= 10);

    my @sorted_score = sort {$b <=> $a} ( keys( %scanning_score ) );
    #die scalar(@sorted_score)."++";
    my $l_count=0;

    foreach my $score (@sorted_score) {
	$l_count++;
	&RSAT::message::Debug("In score loop", $l_count ) if ($main::verbose >= 10);
	next if ($score == $sorted_score[-1] && scalar(@sorted_score) > 1);
	my $best_score = $sorted_score[0];
	my $best_pval = $matrix_info{$scanning_info{'matrice'}}{'pval'}{$best_score};

	&RSAT::message::Debug("Best score and pval", $best_score, $best_pval ) if ($main::verbose >= 10);

	
	next if ( $lth{'score'} && $lth{'score'} > $best_score );
	next if ( $uth{'pval'} && $uth{'pval'} < $best_pval);

	my $worst_score = $sorted_score[-1];
	my $worst_pval = $matrix_info{$scanning_info{'matrice'}}{'pval'}{$worst_score};
	#print "worst pval". $worst_pval;
	#die "BOOM";

	my $diff_1 = $best_score-$score;
	my $diff_2 = $score-$worst_score;
	my $pval_ratio_1 = $matrix_info{$scanning_info{'matrice'}}{'pval'}{$score}/$best_pval;
	my $pval_ratio_2 = $worst_pval/$matrix_info{$scanning_info{'matrice'}}{'pval'}{$score};
	my $nearest_score_worst = 0;
	$nearest_score_worst = 1 if ( $lth{'score'} && $diff_1 > $diff_2);
	$nearest_score_worst = 1 if ( $uth{'pval'} && $pval_ratio_1 > $pval_ratio_2);

	if ( $nearest_score_worst) {
	    next if ( $lth{'w_diff'} && $lth{'w_diff'} > $diff_1 );
	    next if ( $lth{'pval_ratio'} && $lth{'pval_ratio'} > $pval_ratio_1);

	    $scanning_info{'best_variant'} =  join(";", @{ $scanning_score{$best_score}{'variant_alt'} });
	    $scanning_info{'worst_variant'} =  join(";", @{ $scanning_score{$score}{'variant_alt'} });
	    $scanning_info{'best_score'} = $best_score;
	    $scanning_info{'worst_score'} = $score;
	    $scanning_info{'w_diff'} = $diff_1;
	    $scanning_info{'best_pval'} = $best_pval;
	    $scanning_info{'worst_pval'} = $matrix_info{$scanning_info{'matrice'}}{'pval'}{$score};
	
	    $scanning_info{'pval_ratio'} = $pval_ratio_1;
	    $scanning_info{'best_offset'} =  join(";", @{ $scanning_score{$best_score}{'site_offset_start'} });
	    $scanning_info{'worst_offset'} = join(";", @{ $scanning_score{$score}{'site_offset_start'} });
	    $scanning_info{'best_strand'} =  join(";", @{ $scanning_score{$best_score}{'site_strand'} });
	    $scanning_info{'worst_strand'} = join(";", @{ $scanning_score{$score}{'site_strand'} });
	    $scanning_info{'best_seq'} =  join(";", @{ $scanning_score{$best_score}{'seq'} });
	    $scanning_info{'worst_seq'} = join(";", @{ $scanning_score{$score}{'seq'} });
	    &Printline(&GetMinOff(%scanning_info));

	} elsif ($diff_1 == $diff_2) {
	    next if ( $lth{'w_diff'} && $lth{'w_diff'} > $diff_1 );
	    next if ( $lth{'pval_ratio'} && $lth{'pval_ratio'} > $pval_ratio_1);
	
	    $scanning_info{'best_variant'} =  pop( @{ $scanning_score{$best_score}{'variant_alt'} });
	    $scanning_info{'worst_variant'} =  join(";", @{ $scanning_score{$best_score}{'variant_alt'} } );
	    $scanning_info{'best_score'} = $best_score;
	    $scanning_info{'worst_score'} = $scanning_info{'best_score'};
	    $scanning_info{'w_diff'} = $diff_1;
	    $scanning_info{'best_pval'} = $best_pval;
	    $scanning_info{'worst_pval'} = $scanning_info{'best_pval'};
	    $scanning_info{'pval_ratio'} = $pval_ratio_1;
            $scanning_info{'best_offset'} =  pop ( @{ $scanning_score{$best_score}{'site_offset_start'} });
	    $scanning_info{'worst_offset'} = join(";", @{ $scanning_score{$score}{'site_offset_start'} });
	    $scanning_info{'best_strand'} =  pop ( @{ $scanning_score{$best_score}{'site_strand'} });
	    $scanning_info{'worst_strand'} = join(";", @{ $scanning_score{$score}{'site_strand'} });
	    $scanning_info{'best_seq'} =  pop( @{ $scanning_score{$best_score}{'seq'} });
	    $scanning_info{'worst_seq'} = join(";", @{ $scanning_score{$best_score}{'seq'} });
            &Printline(&GetMinOff(%scanning_info));

	} else  {
            next if ( $lth{'w_diff'} && $lth{'w_diff'} > $diff_2 );
            next if ( $lth{'pval_ratio'} && $lth{'pval_ratio'} > $pval_ratio_2);

	    $scanning_info{'worst_variant'} =  join(";", @{ $scanning_score{$worst_score}{'variant_alt'} });
	    $scanning_info{'best_variant'} =  join(";", @{ $scanning_score{$score}{'variant_alt'} });
	    $scanning_info{'worst_score'} = $worst_score;
	    $scanning_info{'best_score'} = $score;
	    $scanning_info{'w_diff'} = $diff_2;
	    $scanning_info{'worst_pval'} = $worst_pval;
	    $scanning_info{'best_pval'} = $matrix_info{$scanning_info{'matrice'}}{'pval'}{$score};
	    $scanning_info{'pval_ratio'} = $pval_ratio_2;
	    $scanning_info{'worst_offset'} =  join(";", @{ $scanning_score{$worst_score}{'site_offset_start'} });
	    $scanning_info{'best_offset'} = join(";", @{ $scanning_score{$score}{'site_offset_start'} });
	    $scanning_info{'worst_strand'} =  join(";", @{ $scanning_score{$worst_score}{'site_strand'} });
            $scanning_info{'best_strand'} = join(";", @{ $scanning_score{$score}{'site_strand'} });
	    $scanning_info{'worst_seq'} =  join(";", @{ $scanning_score{$worst_score}{'seq'} });
	    $scanning_info{'best_seq'} = join(";", @{ $scanning_score{$score}{'seq'} });
	    &Printline(&GetMinOff(%scanning_info));
	}
	if ($only_biggest){
	    &RSAT::message::Debug("Reporting only biggest score difference", $only_biggest) if ($main::verbose >= 10);
	    print (&GetMinOff(%scanning_info)); #<STDIN>;
	    last();
	}

   }
    return();
}

################################################################
## Get the min_offset_diff and if the strand change
sub GetMinOff {
    my %scanning_info = @_;

    $scanning_info{'min_offset_diff'} = $flank_len;
    $scanning_info{'strand_change'} = 0;

    my @best_off_by_diff_variant = split(";", $scanning_info{'best_offset'});

    for ($b=0; $b < scalar(@best_off_by_diff_variant); $b++) {
	my @best_off_by_same_variant = split(",",$best_off_by_diff_variant[$b]);

	for ($b2 = 0; $b2 < scalar(@best_off_by_same_variant); $b2++) {
	    my @worst_off_by_diff_variant =  split(";", $scanning_info{'worst_offset'});

	    for ($w=0; $w < scalar(@worst_off_by_diff_variant); $w++) {
		my @worst_off_by_same_variant = split(",",$worst_off_by_diff_variant[$w]);

		for ($w2 = 0; $w2 < scalar(@worst_off_by_same_variant); $w2++) {

		    if ( abs($worst_off_by_same_variant[$w2]-$best_off_by_same_variant[$b2]) < abs( $scanning_info{'min_offset_diff'}) ) {
			 $scanning_info{'min_offset_diff'} = $worst_off_by_same_variant[$w2]-$best_off_by_same_variant[$b2];

			my @best_strand_by_diff_variant = split(";", $scanning_info{'best_strand'});
			my @worst_strand_by_diff_variant = split(";", $scanning_info{'worst_strand'});
			my @best_strand_by_same_variant = split(",",$best_strand_by_diff_variant[$b]);
			my @worst_strand_by_same_variant = split(",",$worst_strand_by_diff_variant[$w]);

			if ($worst_strand_by_same_variant[$w2] ne $best_strand_by_same_variant[$b2]) {
			    $scanning_info{'strand_change'} = 1;
			}
		    }
		}
	    }
	}
    }
    return %scanning_info;
}

################################################################
## Print
sub Printline {
  my (%scanning_info) = @_;
  $output_lines++;

  ## Flag to specify if the best score was obtained from the major allele (reference variant) or from the minor allele
  my $is_ref_better;
  my $local_ref=$scanning_info{'original_ref'};

  ## Take into account the strand information of the variable.
  if ($scanning_info{'var_strand'} eq "-"){

       $local_ref=~ tr/ACGTacgt/TGCAtgca/;
       &RSAT::message::Info("Transforming ref variant to revers complement for comparison", $scanning_info{'variation'}) if ($main::verbose>=5)
  }

  if ($scanning_info{'best_variant'} eq $local_ref){
      $is_ref_better="Y";
  }
  else {
      $is_ref_better="N";
  }

  # ## JvH: TO BE FIXED.This does not work because the best_seq and
  # ## worst_seq are sometimes multiple sequences, separated by commas
  # ## !!!
  # my $best_offset = -$scanning_info{'best_offset'};
  # my $best_seq = lc(substr($scanning_info{'best_seq'}, 0, $best_offset-1));
  # $best_seq .= uc(substr($scanning_info{'best_seq'},$best_offset,1));
  # $best_seq .= lc(substr($scanning_info{'best_seq'},$best_offset+1));

  # my $worst_offset = -$scanning_info{'worst_offset'};
  # my $worst_seq = lc(substr($scanning_info{'worst_seq'}, 0, $worst_offset-1));
  # $worst_seq .= uc(substr($scanning_info{'worst_seq'},$worst_offset,1));
  # $worst_seq .= lc(substr($scanning_info{'worst_seq'},$worst_offset+1));

  #$mtx_aux_id=$scanning_info{'matrice'};
  #die "AC ". $mtx_aux_id. $matrices_id{ $mtx_aux_id}  ;
  my $str = join ("\t", 
		  $scanning_info{'matrice'},
		  #$matrix_list{$scanning_info{'matrice'}}{'id'},
		  $matrices_id{$scanning_info{'matrice'}},
		  $scanning_info{'variation'},
		  $scanning_info{'type'},
		  $scanning_info{'coord'},
		  $scanning_info{'best_score'},
		  $scanning_info{'worst_score'},
		  sprintf("%.2f",$scanning_info{'w_diff'}),
		  $scanning_info{'best_pval'},
		  $scanning_info{'worst_pval'},
		  sprintf("%.2f",$scanning_info{'pval_ratio'}),
		  $scanning_info{'best_variant'},
		  $scanning_info{'worst_variant'},
		  $scanning_info{'best_offset'},
		  $scanning_info{'worst_offset'},
		  $scanning_info{'min_offset_diff'},
		  $scanning_info{'best_strand'},
		  $scanning_info{'worst_strand'},
		  $scanning_info{'strand_change'},
		  $scanning_info{'best_seq'},
		  $scanning_info{'worst_seq'},
#		  $best_seq,
#		  $worst_seq,
		  $scanning_info{'original_ref'}, ## Print major allele (reference allele) original id is to diferiantate from when scanning is donde on reference allele
		  $is_ref_better, ## Flag if the best score comes from the major allele (reference allele)
		  $scanning_info{'minor_allele_freq'});

 # print "pval infor"," $scanning_info{'best_pval'}"."++"."$scanning_info{'worst_pval'}\n" ;
 # print $str."\n";
 # die "BOOM";
  print $out $str."\n"; ## Back
}

################################################################
## Read arguments
sub ReadArguments {
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);

=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
        $main::verbose = shift(@arguments);
      } else {
        $main::verbose = 1;
    }

=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();

=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();

=pod

=item B<-i #>

Variation file RSAT format

=cut
    } elsif ($arg eq "-i") {
      $main::infile{input} = shift(@arguments);

=pod

=item B<-m #>

The matrix file transfac format

=cut
    } elsif ($arg eq "-m") {
	push @matrix_files,  shift(@arguments);

=pod

=item B<-top_matrices #>

The matrix file transfac format

=cut
    } elsif ($arg eq "-top_matrices") {
	$top_matrices = shift(@arguments);
	&FatalError($top_matrices, "Invalid value for the option -top. Should be a strictly positive Natural number.")
	    unless ((&IsNatural($top_matrices)) && ($top_matrices >= 1));

=pod

=item B<-bg>

Background file

=cut
    } elsif ($arg eq "-bg") {
      $main::infile{bg} = shift(@arguments);

=pod

=item B<-m_format matrix_format>

Matrix file format

=cut
    } elsif ($arg eq "-m_format") {
      $main::matrix_format = lc(shift(@arguments));
	    unless ($supported_input_format{$main::matrix_format }) {
		&RSAT::error::FatalError("$main::matrix_format \tInvalid input format for matrix\tSupported:",$supported_input_formats);
	    }
=pod

=item B<-mml #>

Length of the longest Matrix, this values has to be consistent with the one used io
for retrieving the variant sequences (see <retrieve-variation-seq>).

=cut
    } elsif ($arg eq "-mml") {
      if (&IsNatural($arguments[0])) {
        $main::flank_len = shift(@arguments)-1;
      } else {
        &RSAT::error::FatalError("-mml argument : ",shift(@arguments)," is not natural");
      }

=pod

=item B<-top_matrix #>

Only work with the # top matrix

=cut
    } elsif ($arg eq "-top_matrix") {
      if (&IsNatural($arguments[0])) {
        $main::top_matrix = shift(@arguments);
      } else {
        &RSAT::error::FatalError("-top_matrix argument : ",shift(@arguments)," is not natual");
      }

=pod

=item B<-top_variationmatrix #>

Only work with the # top variation

=cut
    } elsif ($arg eq "-top_variation") {
      if (&IsNatural($arguments[0])) {
        $main::top_variation = shift(@arguments);
      } else {
        &RSAT::error::FatalError("-top_variation argument : ",shift(@arguments)," is not natural");
      }

=pod

=item B<-lth type #>

Only return rvar with type_score > #

=cut
    } elsif ($arg eq "-lth") {
	my $type =  shift(@arguments);
	if ( $supported_lth{$type} ) {
	    if (&RSAT::util::IsReal($arguments[0])) {
		$lth{$type} = shift(@arguments);
	    } else {
		&RSAT::error::FatalError("-lth argument : ",shift(@arguments)," is not natural");
	    }
	} else {
	    &RSAT::error::FatalError("type $type not supported with lth. Supperted type :",join(", ",keys(%supported_lth)));
	}
=pod


=item B<-uth type #>

=cut
    } elsif ($arg eq "-uth") {
        my $type =  shift(@arguments);
        if ( $supported_uth{$type} ) {
            if (&RSAT::util::IsReal($arguments[0])) {
                $uth{$type} = shift(@arguments);
            } else {
                &RSAT::error::FatalError("-uth argument : ",shift(@arguments)," is not natural");
            }
        } else {
	    &RSAT::error::FatalError("type $type not supported with uth. Supperted type :",join(", ",keys(%supported_uth)));
        }
=pod

=item B<-html #>

Convert the tab-delimited file into an HTML file, which facilitates the inspection of the results with a Web browser.
The HTML file has the same name as the output file, but the extension (.tab, .txt) is replaced by the .html extension

=cut
    } elsif ($arg eq "-html") {
      $main::html = 1;

=pod

=item B<-calc_distrib>

Calculate and save distribution of matrices

=cut
    } elsif ($arg eq "-calc_distrib") {
      $main::calc_distrib = 1;


=pod

=item B<-distrib_dir #>

Directory to store the distribution files.
Mandatory if -calc_distrib is being used.

=cut
    } elsif ($arg eq "-distrib_dir") {
      $main::distrib_dir = shift(@arguments);

=pod

=item B<-distrib_list #>

Name of the file containing the list of matrix distrib file name

/!\ This file must be in the same directory as the distrib file

=cut
    } elsif ($arg eq "-distrib_list") {
      $main::infile{'distrib_list'} = shift(@arguments);

=pod


=item B<-only_biggest>

Only return the biggest difference of score between two alleles of a
variation regarthless of the window, this option is usefull for insertions and deletions

=cut
    } elsif ($arg eq "-only_biggest") {
      $main::only_biggest = 1;

=pod


=item	B<-o outputfile>

The output file is in fasta format.

If no output file is specified, the standard output is used.  This
allows to use the command within a pipe.

=cut
    } elsif ($arg eq "-o") {
      $outfile{output} = shift(@arguments);

=pod

=back

=cut

    } else {
      &FatalError(join("\t", "Invalid option", $arg));
    }
  }
}

################################################################
## Verbose message
sub Verbose {
  print $out "; variation-scan ";

  &PrintArguments($out);

  printf $out "; %-22s\t%s\n", "Program version", $program_version;
  if (%main::infile) {
    print $out "; Input files\n";
    while (my ($key,$value) = each %main::infile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }

  if (%main::outfile) {
    print $out "; Output files\n";
    while (my ($key,$value) = each %main::outfile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
}
