#!/usr/bin/perl -w
############################################################
#
# $Id: gene-info-from-galileo,v 1.4 2013/10/03 17:23:59 jvanheld Exp $
#
############################################################

## use strict;

=pod

=head1 NAME

supported-organisms-galileo

=head1 VERSION

$program_version

=head1 DESCRIPTION

Get information about all genes of a user-specified organism in the
Galileo (CEA/Genoscope/MICROSCOPE/MAGE) database.

=head1 AUTHORS

Aurelie.Bergon@inserm.fr

Jacques.van-Helden@univ-amu.fr

=head1 CATEGORY

=over

=item genome management

=item metabolic pathways

=back

=head1 USAGE

gene-info-from-galileo

=head1 OUTPUT FORMAT

The program returns a tab-delimited text file with one row per
gene, and one column per attribute.

Organism attribtues

=over

=item B<gene_name>


=item B<micrid>

ID of the gene in the Galileo database.

=item B<ensid>

ID of the gene in the EnsemblGenomes database.

=item B<RefseqID>

ID of the gene in Refseq database.

=item B<ECs>

EC numbers associated to the gene product.  For multi-EC association,
the EC numbers are concatenated with a semicolumn (;) as separator.

=back

=head1 SEE ALSO

=head1 WISH LIST

=over

=item B<wish 1>

=item B<wish 2>

=back

=cut


BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}
require "RSA.lib";
use REST::Client;
use JSON;
use MIME::Base64;
use JSON qw( decode_json );     # From CPAN


################################################################
## Main package
package main;
{

  ################################################################
  ## Initialise parameters
  our $start_time = &RSAT::util::StartScript();
  our $program_version = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  #    $program_version = "0.00";

  our %infile = ();
  our %outfile = ();

  our $verbose = 0;
  our $in = STDIN;
  our $out = STDOUT;


  our $username = '';
  our $password = '';
  our $qtaxid = 'organisms/n2m/';
  our $query = 'genomes/genomic-objects/list/';
  our $taxid = '';
  our $micrid ='';

  our $host = "http://mgalileo.genoscope.cns.fr";

  our %field_description = ();
  $field_description{GeneName} = "gene name ('goGeneName' in microscope).";
  $field_description{micrID} = "gene ID (goOriId) in the microscope database (http://www.genoscope.cns.fr/agc/microscope/).";
  $field_description{Refseq} = "gene ID into RefSeq ('refSeq' in microscope).";
  $field_description{Uniprot} = "uniprot ID ('uniprot' in microscope).";
  $field_description{trembl} = "";
  $field_description{protID} = "";
  $field_description{Label} = "";
  $field_description{Synonyms} = "";
  $field_description{Product} = "";
  $field_description{EcNumber} = "EC number ('goEc' in microscope).";
  $field_description{Reaction} = "";
  $field_description{Function} = "";
  $field_description{Process} = "";
  $field_description{Location} = "";
  $field_description{Begin} = "";
  $field_description{End} = "";
  $field_description{Frame} = "";
  $field_description{Type} = "";
  $field_description{Note} = "";

  our @out_fields = qw(GeneName micrID Refseq Uniprot trembl protID Label Synonyms Product EcNumber Reaction Function Process Location Begin End Frame Type Note);
  our $null = "<NA>";

  ################################################################
  ## Read argument values
  &ReadArguments();

  ################################################################
  ## Check argument values

  ################################################################
  ## Open output stream
  $out = &OpenOutputFile($outfile{output});


  ################################################################
  ## Execute the command

  ## First connexion
  # Set up the connection
  &RSAT::message::TimeWarn("Opening connection to Galileo host", $host) if ($main::verbose >= 2);
  my $client = REST::Client->new();
  $client->setHost($host);

  # Setup the basic authorization header with the encoded Userid and Password.
  # These need to match a UserId and Password for a Stingray user
  $client->addHeader("Authorization", "Basic " . encode_base64($username . ":" . $password));


  # Do the HTTP GET to get the lists of pools

  ## Convert query taxid (from NCBI) to MICROSCOPE-specific taxonomic ID (from microscope)
  &RSAT::message::TimeWarn("Getting MICROSCOPE species ID corresponding to NCBI taxid", $taxid) if ($main::verbose >= 2);
  $client->GET("/api/microscope/" . $qtaxid . $taxid);
  $micrid = $client->responseContent();
  &RSAT::message::Info("Taxid", $taxid, "corresponds to MICROSCOPE organism ID", $micrid) if ($main::verbose >= 2);


  ################################################################
  ## Print verbose
  &Verbose() if ($main::verbose >= 1);

  # Set up the connection
  &RSAT::message::TimeWarn("Opening connection to Galileo host", $host) if ($main::verbose >= 2);
  $client = REST::Client->new();
  $client->setHost($host);

  # Setup the basic authorization header with the encoded Userid and Password.
  # These need to match a UserId and Password for a Stingray user
  $client->addHeader("Authorization", "Basic " . encode_base64($username . ":" . $password));

  # Do the HTTP GET to get the lists of pools
  $client->GET("/api/microscope/" . $query . $micrid);

  #######################################################
  ##  json output (decoded)
  &RSAT::message::TimeWarn("Getting gene information for an organism from galileo") if ($main::verbose >= 2);
  my $response = decode_json($client->responseContent());



  if ($main::verbose >= 1) {
    print $out "; Column content\n";
    my $f = 0;
    foreach my $field (@out_fields) {
      $f++;
      print $out join ("\t", ";", $f, $field, $field_description{$field}), "\n";
    }
  }



  print $out "#", join("\t", @out_fields), "\n";

  my @decoded = @{ decode_json($client->responseContent())};


  foreach my $d ( @decoded ) {
    unless (defined($d->{'genomicObject'}{'goOriId'})) {
	$d->{'genomicObject'}{'goOriId'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goGeneName'})) {
	$d->{'genomicObject'}{'goGeneName'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goEc'})) {
	$d->{'genomicObject'}{'goEc'} = $null;
    }else{
# EC number need to be separated by ";" and not by ", "
        $d->{'genomicObject'}{'goEc'} =~ s/, /;/;
    }
    unless (defined($d->{'refSeq'})) {
        $d->{'refSeq'} = $null;
    }
    unless (defined($d->{'uniprot'})) {
        $d->{'uniprot'} = $null;
    }
    unless (defined($d->{'trembl'})) {
        $d->{'trembl'} = $null;
    }
    unless (defined($d->{'protId'})) {
        $d->{'protId'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goLabel'})) {
        $d->{'genomicObject'}{'goLabel'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goSynonyms'})) {
        $d->{'genomicObject'}{'goSynonyms'} = $null;
# field need to be separated by ";" and not by ", "
        $d->{'genomicObject'}{'goSynonyms'} =~ s/, /;/;
    }
    unless (defined($d->{'genomicObject'}{'goProduct'})) {
        $d->{'genomicObject'}{'goProduct'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goNote'})) {
        $d->{'genomicObject'}{'goNote'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goReaction'})) {
        $d->{'genomicObject'}{'goReaction'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goType'})) {
        $d->{'genomicObject'}{'goType'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goFrame'})) {
        $d->{'genomicObject'}{'goFrame'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goBegin'})) {
        $d->{'genomicObject'}{'goBegin'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goEnd'})) {
        $d->{'genomicObject'}{'goEnd'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goLocation'})) {
        $d->{'genomicObject'}{'goLocation'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goProcess'})) {
        $d->{'genomicObject'}{'goProcess'} = $null;
    }
    unless (defined($d->{'genomicObject'}{'goFunction'})) {
        $d->{'genomicObject'}{'goFunction'} = $null;
    }

    ## EnsemblID : not possible (not this information into microscope)
    ## NCBI id : not possible (not this information into microscope)
    print $out join ("\t",
		     $d->{'genomicObject'}{'goGeneName'},
		     $d->{'genomicObject'}{'goOriId'},
                     $d->{'refSeq'},
		     $d->{'uniprot'},
		     $d->{'trembl'},
		     $d->{'protId'},
		     $d->{'genomicObject'}{'goLabel'},
		     $d->{'genomicObject'}{'goSynonyms'},
 		     $d->{'genomicObject'}{'goProduct'}, ## with Bacillus subtilis (taxid 224308) we get a strange warning: Wide character in print
 		     $d->{'genomicObject'}{'goEc'},
		     $d->{'genomicObject'}{'goReaction'},
                     $d->{'genomicObject'}{'goFunction'},
                     $d->{'genomicObject'}{'goProcess'},
		     $d->{'genomicObject'}{'goLocation'},
                     $d->{'genomicObject'}{'goBegin'},
                     $d->{'genomicObject'}{'goEnd'},
                     $d->{'genomicObject'}{'goFrame'},
		     $d->{'genomicObject'}{'goType'},
                     $d->{'genomicObject'}{'goNote'},
		    ), "\n";

  }


  ################################################################
  ## Insert here output printing

  ################################################################
  ## Report execution time and close output stream
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
  print $out $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
  close $out if ($outfile{output});

  &RSAT::message::TimeWarn("Job finished") if ($main::verbose >= 2);
  exit(0);
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################


################################################################
## Display full help message
sub PrintHelp {
  system "pod2text -c $0";
  exit()
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}

################################################################
## Read arguments
sub ReadArguments {
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);
    ## Verbosity

=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
	$main::verbose = shift(@arguments);
      } else {
	$main::verbose = 1;
      }


=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();


=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();


=pod

=item B<-i inputfile>

If no input file is specified, the standard input is used.  This
allows to use the command within a pipe.

=cut
    } elsif ($arg eq "-i") {
      $main::infile{input} = shift(@arguments);


=pod

=item	B<-o outputfile>

If no output file is specified, the standard output is used.  This
allows to use the command within a pipe.

=cut
    } elsif ($arg eq "-o") {
      $outfile{output} = shift(@arguments);


=pod
                                                                                                                                                                                                                        
=item B<-taxid taxID> 

A microscope organisme ID is required to access gene information in galileo.

=cut
    } elsif ($arg eq "-taxid") {
	$main::taxid = shift(@arguments);


=pod
    


=item B<-username username>

(optional)

A username is occasionally required to access some unpublished genomes
in galileo.

=cut
    } elsif ($arg eq "-username") {
      $main::username = shift(@arguments);

=pod


=item B<-pass password>

(optional)

A password is occasionally required to access some unpublished genomes
in galileo.

=cut
    } elsif ($arg eq "-pass") {
      $main::password = shift(@arguments);

    } else {
      &FatalError(join("\t", "Invalid option", $arg));

    }
 

=pod

=back

=cut

  }}

################################################################
## Verbose message
sub Verbose {
  print $out "; supported-organisms-galileo ";
  &PrintArguments($out);
  printf $out "; %-22s\t%s\n", "Program version", $program_version;
  if (%main::infile) {
    print $out "; Input files\n";
    while (my ($key,$value) = each %main::infile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
  if (%main::outfile) {
    print $out "; Output files\n";
    while (my ($key,$value) = each %main::outfile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }

  printf $out ";\t%-13s\t%s\n", "Query taxid", $taxid;
  printf $out ";\t%-13s\t%s\n", "MICROSCOPE taxid", $micrid;


}


__END__
