#!/usr/bin/env perl
############################################################
#
# $Id: supported-organisms-ensembl,v 1.8 2013/08/18 10:00:33 jvanheld Exp $
#
# Time-stamp
#
############################################################

BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}
require "RSA.lib";

use strict;
use DBI();
use Bio::EnsEMBL::LookUp;

package main;
{
  ## Initialize parameters
  our $verbose = 0;

  ## By default, connect to the main ensembl
  our $host_db = "ensembldb.ensembl.org";
  our $port = 5306;
  our $db = "ensembl";
  our $org_counter = 0;
  our $branch_id;
  our $dbversion;

  our %outfile;
  $outfile{output} = "";
  
  my $start_time = &RSAT::util::StartScript();
  
  ## Read arguments
  &ReadArguments();

  ################################################################
  ## Open output stream
  our $out = &OpenOutputFile($outfile{output});


  ## STRANGE: when I select ensemblgenomes here, I only get 284 organisms
  if ($db eq "ensemblgenomes") {
    $host_db = 'mysql-eg-publicsql.ebi.ac.uk';
    $port = '4157';
  }

  ## Select species for a given taxon
  if ($branch_id) {
    my $lookup = Bio::EnsEMBL::LookUp->new();
    my @dbas = @{$lookup->get_all_by_taxon_branch($branch_id)};
    $org_counter = scalar(@dbas);
    foreach my $dba (@dbas) {
      my $species = ucfirst($dba->species());
      print $out $species, "\n";
    }
  } else {
    
    &RSAT::message::TimeWarn("Opening connection to DB", $host_db) if ($main::verbose >= 2);
    our $dbh = DBI->connect("DBI:mysql:host=".$host_db.":port=".$port, "anonymous", "", {'RaiseError' => 1});
    
    our $sth = $dbh->prepare("SHOW DATABASES");
    $sth->execute();
    our $previous_org = "bogus";
    $org_counter = 0;
    while (my $ref = $sth->fetchrow_hashref()) {
      if ($ref->{'Database'} =~ /_core_\d+/) {
	$dbversion = $ref->{'Database'};
	$dbversion =~ s/.+_core_//;
	$dbversion =~ s/_.+//;
	$ref->{'Database'} =~s/_core_.+//;
	if ($ref->{'Database'} ne $previous_org) {
	  $org_counter++;
	  print $out ucfirst($ref->{'Database'}), "\n";
	  $previous_org = $ref->{'Database'};
	}
      }
    }
    $sth->finish();
    $dbh->disconnect();

  }
  
  ## Summary of theresults
  if ($main::verbose >= 1) {	
      print $out "; EnsEMBL host database : ", $host_db, "\n";
      print $out "; EnsEMBL database version : ", $dbversion, "\n";
      print $out "; Organisms: ", $org_counter, "\n";
  }
    
  ## Close output stream
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time);
  print $main::out $exec_time if ($main::verbose >= 1);
  close $out if ($outfile{output});

  exit(0);
}

################################################################
##################### SUBROUTINE DEFINITION ####################
################################################################

################################################################
#### Display full help message
sub PrintHelp {
  open HELP, "| less";
  print HELP <<End_of_help;
NAME
	supported-organisms-ensembl

        2008 by Olivier Sand (oly\@bigre.ulb.ac.be)

USAGE
        supported-organisms-ensembl

AUTHOR
	Olivier Sand <oly\@bigre.ulb.ac.be>

DESCRIPTION

	Get the list of organisms supported on the Ensembl database.

CATEGORY
	genomics
	administration

OPTIONS
	-h	(must be first argument) display full help message

	-help	(must be first argument) display options

	-db database
		Supported: ensembl | ensemblgenomes

	-branch_id
		Select only species belonging to a given phylogenetic
		branch, given its taxonomic ID.

	-o file	Output file. If not specified, the result is printed on the
	   	standard output.


End_of_help
    close HELP;
  exit(0);
}

################################################################
#### Display short help message #####
sub PrintOptions {
  open HELP, "| less";
  print HELP <<End_short_help;
supported-organisms-ensembl options
-----------------------------------
-h	(must be first argument) display full help message
-help	(must be first argument) display options
-db     ensembl | ensemblgenomes
-branch_id   taxonomic ID of selected branch
-o 	output file
End_short_help
    close HELP;
    exit;
  }


################################################################
#### Read arguments 
sub ReadArguments {
  foreach my $a (0..$#ARGV) {
    ## Verbosity
    if ($ARGV[$a] eq "-v") {
	if (&IsNatural($ARGV[$a+1])) {
	    $main::verbose = $ARGV[$a+1];
	} else {
	    $main::verbose = 1;
	}
    }
	
    ## Detailed help
    if ($ARGV[$a] eq "-h") {
      &PrintHelp();

      ## List of options
    } elsif ($ARGV[$a] eq "-help") {
      &PrintOptions();


      ## Host database
    } elsif ($ARGV[$a] eq "-host") {
      $main::host_db = $ARGV[$a+1];

      ## Database (ensembl or ensemblgenomes)
    } elsif ($ARGV[$a] eq "-db") {
      $main::db = $ARGV[$a+1];

      ## Database (ensembl or ensemblgenomes)
    } elsif ($ARGV[$a] eq "-branch_id") {
      $main::branch_id = $ARGV[$a+1];

      ## Output file  
    } elsif ($ARGV[$a] eq "-o") {
	$main::outfile{output} = $ARGV[$a+1];

    }
  }
}
