#!/usr/bin/perl -w
############################################################
#
# $Id: get-genes2reactions,v 1.1 2011/12/02 11:53:32 jvanheld Exp $
#
############################################################

## use strict;

=pod

=head1 NAME

get-genes2reactions

=head1 VERSION

$program_version

=head1 DESCRIPTION

Retrieve a table with gene - reaction association from external
data sources.

=head1 DATA SOURCES


=head2 CEA

This first version supports only one data source: the Web service of
the CEA (developed by Francois Le Fevre and David Vallenet).

=head3 CEA WSDL

 http://www.genoscope.cns.fr/microcyc-webservice/services/fr.genoscope.agc.microcyc.business.MicrocycService.wsdl


=head1 AUTHORS

=over

=item Didier.Croes@ulb.ac.be

=item Jacques.van.Helden@ulb.ac.be

=back

=head1 CATEGORY

=over

=item metabolic pathways

=back

=head1 USAGE


=head2 Getting the list of supported organisms

 get-genes2reactions -supported_organisms

Returns the list of supported organisms at CEA. Note that when this
option is called, gene-reaction data is not retrieved.


=head2 Getting the gene-reaction links for one or several organisms

 get-genes2reactions -org organism1 -org organism2 ....

 get-genes2reactions -all_organisms

 get-genes2reactions -org_file organism_file.txt

=head1 INPUT FORMAT

=head2 Organism file

A list of organisms can be provided in a text file.

The first word of each row is taken as organism ID, the rest of the row is ignored.

=head1 OUTPUT FORMAT

=head2 List of supported organisms

The list of supported organisms is returned as a text with one row per
organisms.

=head2 Gene - reaction links

A tab-delimited text file with one row per link.

Note that the gene - reaction link is typically n-to-n for several reasons: 
 - a given gene can be linked to several EC numbers (multidomain
   enzymes);
 - a given EC number ca, be related to several reactions;

=head1 SEE ALSO

=over

=item I<pathway-extractor>

The primary goal of the script I<get-genes2reactions> is to obtain the
Gene-EC-Reaction (GER) files required as input for the RSAT script
I<pathway-extractor>.

=back

=head1 WISH LIST

=cut


BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}
require "RSA.lib";

use SOAP::Lite; ## Required to connect external Web services (e.g. CEA)

################################################################
## Main package
package main;
{

  ################################################################
  ## Initialise parameters
  local $start_time = &RSAT::util::StartScript();
  local $program_version = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  #    $program_version = "0.00";

  local %main::infile = ();
  local %main::outfile = ();

  local $main::verbose = 0;
  local $main::in = STDIN;
  local $out = STDOUT;

  local @supported_return_fields = ("supported_organisms", ## list of organisms supported on the data source
				   );
  local %supported_return_field = ();
  foreach my $field (@supported_return_fields) {
    $supported_return_field{$field} = 1;
  }
  local $supported_return_fields = (join ",", @supported_return_fields);
  local %return_field = ();

  local @selected_organisms = ();
  local $current_organism ="";

  ################################################################
  ## Read argument values
  &ReadArguments();

  ################################################################
  ## Check argument values


  ## At least one return field must be specified
  if (scalar(keys(%return_field)) < 1) {
    &RSAT::error::FatalError("At least one return field must be specified (option -return)");
  }

  ## The request for a list of supported organisms is incompatile with other fields
  if ($return_field{supported_organisms}) {
    if (scalar(keys(%return_field)) > 1) {
      &RSAT::message::Warning("The option -return supported_organism is incompatible with other return fields. Other fields will be ignored.") if ($main::verbose >= 1);
    }
  } else {
    ## At least one organism should be specified
    if (scalar(@organisms) < 1) {
      &RSAT::error::FatalError("At least one organism should be specified (option -org)");
    }
  }

  ################################################################
  ## Open output stream
  $out = &OpenOutputFile($outfile{output});


  ################################################################
  ## Return list of supported organisms
  if ($return_field{supported_organisms}) {
    my $CEA_WSDL='http://www.genoscope.cns.fr/microcyc-webservice/services/fr.genoscope.agc.microcyc.business.MicrocycService.wsdl';
    my $service = SOAP::Lite->service($CEA_WSDL);
    my $org_list_result = $service->getListingPgdbFrameVersion();

    &RSAT::message::Debug($org_list_result) if ($main::verbose >= 5);

    ## Print header
    print $out "#", join("\t", "ORGID_DBVERS", "ORGID", "DBVERS"), "\n";

    my %org_list_hash = %$org_list_result;
    foreach my $key (sort(keys(%org_list_hash))) {
      my $value = $org_list_hash{$key};
      &RSAT::message::Debug($key, $value) if ($main::verbose >= 5);
      my @orgid_dbvers = @$value;
      foreach my $orgid_dbvers (@orgid_dbvers) {
	my ($orgid, $dbvers) = split('\-', $orgid_dbvers);
	print $out join("\t", $orgid_dbvers, $orgid, $dbvers), "\n";
      }
    }
    exit(0);
  } else {

    foreach my $orgid_dbvers (@selected_organisms) {
      &RSAT::message::TimeWarn("Getting GPR for organism", $orgid_dbvers);
      my $gpr_result = $service -> getGPR($organismeid);# exampole ACIAD31-150
      &RSAT::message::Debug($gpr_result) if ($main::verbose >= 0);
      my $v = $gpr_result->{'gprs'};

      print "gprs => $v\n";
      #   my @listGPR = $v;
      if ($v){
	while ( my($ke,$va) = each %$v ){
	  print "$ke => $va\n";
	  my @listGPR = $va;
	  print  join "\n",@listGPR;
	  print "\n";
	  my $newhasharray = $listGPR[0];
	  my $n = 0;
	  while ($listGPR[0][$n]){
	    my $newhash = $listGPR[0][$n];
	    while ( my ($key,$val) = each %$newhash ){
	      print "$key => $val\n";
	      #     print "\n";
	    }
	    $n++;
	  }
	}
      }
    }
  }

#   ################################################################
#   ## Read input
#   ($main::in) = &OpenInputFile($main::infile{input});
#   while (<$main::in>) {

#   }
#   close $main::in if ($main::infile{input});

  ################################################################
  ## Print verbose
  &Verbose() if ($main::verbose >= 1);

  ################################################################
  ## Execute the command

  ################################################################
  ## Insert here output printing

  ################################################################
  ## Report execution time and close output stream
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
  print $out $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
  close $out if ($outfile{output});

  exit(0);
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################


################################################################
## Display full help message 
sub PrintHelp {
  system "pod2text -c $0";
  exit()
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}

################################################################
## Read arguments 
sub ReadArguments {
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);
    ## Verbosity

=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
	$main::verbose = shift(@arguments);
      } else {
	$main::verbose = 1;
      }


=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();


=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();


=pod

=item B<-i inputfile>

If no input file is specified, the standard input is used.  This
allows to use the command within a pipe.

=cut
    } elsif ($arg eq "-i") {
      $main::infile{input} = shift(@arguments);


=pod

=item B<-return return_field>

Field(s) to be returned.
Several return fields can be provied, separated by commas .

The option can also be used iteratively on the same command line to
specifiy multiple return fields.

B<Supported return fields>

=item I<supported_organism>

=cut
    } elsif ($arg eq "-return") {
      my $fields_to_return = shift(@arguments);
      my @fields_to_return = split (",", $fields_to_return);
      foreach $field (@fields_to_return) {
	if ($supported_return_field{$field}) {
	  $return_field{$field} = 1;
	} else {
	  &RSAT::error::FatalError(join("\t", $field, "Invalid return field. Supported:", $supported_return_fields));
	}
      }


=pod

=item	B<-org organism>

Organism.

The argument can be used iteratively on the same command line to
specify multiple organisms.

=cut
    } elsif ($arg eq "-org") {
      push @selected_organisms, shift(@arguments);

=pod

=item	B<-o outputfile>

If no output file is specified, the standard output is used.  This
allows to use the command within a pipe.

=cut
    } elsif ($arg eq "-o") {
      $outfile{output} = shift(@arguments);

    } else {
      &FatalError(join("\t", "Invalid option", $arg));

    }
  }

=pod

=back

=cut

}

################################################################
## Verbose message
sub Verbose {
  print $out "; get-genes2reactions ";
  &PrintArguments($out);
  printf $out "; %-22s\t%s\n", "Program version", $program_version;
  if (%main::infile) {
    print $out "; Input files\n";
    while (my ($key,$value) = each %main::infile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
  if (%main::outfile) {
    print $out "; Output files\n";
    while (my ($key,$value) = each %main::outfile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
}


__END__
