#!/usr/bin/perl -w

############################################################
#
# $Id: download-ensembl-features,v 1.40 2013/10/13 08:16:28 jvanheld Exp $
#
############################################################

use warnings;

=pod

=head1 NAME

download-ensembl-features

=head1 VERSION

$program_version

=head1 DESCRIPTION

Download the genomic feature of a user-specified organism from the
Ensembl server, and convert the original format (dat) into tab files
required for RSAT use.

=head1 AUTHORS

Jeremy.Delerce@univ-amu.fr

Revised by Jacques.van-Helden@univ-amu.fr

=head1 CATEGORY

=over

=item util

=back

=head1 USAGE

 download-ensembl-features -species # [-version #] [-no_name] [-o #] [-dir genomesDirectory] [-available_species] [-v #]

=head2 Examples

Get a list of species with feature available at Ensembl

 download-ensembl-features -available_species

Get genomic feature coordinate for the human genome (Homo sapiens)

 download-ensembl-features -species Homo_sapiens

=head1 OUTPUT FORMAT

Multiple tab formatted file (gene,CDS,mRNA,lincRNA..).

=head1 SEE ALSO

=head2 download-ensembl-genome.pl

Downloads genome for a species

=head1 WISH LIST

=cut

BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}

require "RSA.lib";
require "RSAT_to_ensembl.lib.pl";
use Bio::EnsEMBL::Registry;

################################################################
## Main package
package main;
{

  ###############################################################
  ## Initialise parameters
  our $start_time = &RSAT::util::StartScript();
  our $program_version = do { my @r = (q$Revision: 1.40 $ =~ /\d+/g); sprintf"%d."."%02d" x $#r, @r };

  our $out = SDTOUT;
  our %outfile = ();

  our $verbose = 0;
  our $data_dir = &Get_data_dir();
  our $registry = 'Bio::EnsEMBL::Registry';
  our $db = "ensembl";
  our $safe_ensembl_version = &Get_ensembl_version_safe($db);
  our $latest_ensembl_version = &Get_ensembl_version($db);
  our $ensembl_version = $safe_ensembl_version;
  our $species = "";
  our $assembly_version = "";

  our $only_api = 0;
  our $get_available_species = 0;

  our $max_genes = 0; ## For testing only

  our @features_to_download = qw(
    protein_coding
    processed_transcript
    lincRNA
    sRNA
    snRNA
    miRNA
    misc_RNA
    snoRNA
  );


  ################################################################
  ## Read argument values
  &ReadArguments();

  if ($ensembl_version eq "safe") {
    $ensembl_version = $safe_ensembl_version;

  } elsif ($ensembl_version eq "latest") {
    $ensembl_version = $latest_ensembl_version;

  } else {
    &RSAT::error::FatalError($ensembl_version, "is not a valid Ensembl version. Minimun supported version is 70.") if ($ensembl_version < 70);
    &RSAT::error::FatalError("$ensembl_version is not supported as Ensembl version. Can't be superior to latest ensembl_version",$latest_ensembl_version) if ($ensembl_version > $latest_ensembl_version);
  }

  my ($host,$port) = &Get_host_port($db);

  ################################################################
  ## Print verbose
  $out = &OpenOutputFile($outfile{output});
  &Verbose() if ($main::verbose >= 1 && $get_available_species);

  ################################################################
  ##Print available species
  if ($get_available_species) {
    &RSAT::message::TimeWarn("Getting the list of available species (download-ensembl-features)") if ($main::verbose >= 1);
    $registry->load_registry_from_db(
      -host => $host,
      -port => $port,
      -user => 'anonymous',
      -db_version => $ensembl_version
    );

    my @db_adaptors = @{ $registry->get_all_DBAdaptors() };
    my %species_taxon = ();

    if ($db eq "ensembl_genomes") {
      my $ens_version = &Get_ensembl_version_safe($db);
      %species_taxon = &Get_species_taxon($db,$ens_version);
    }

    @sort_species = sort {$a->species() cmp $b->species()} @db_adaptors;

    foreach my $db_adaptor (@sort_species) {
    my $db_connection = $db_adaptor->dbc();
      if ($db_adaptor->group() eq "core") {
        print $out ucfirst($db_adaptor->species());
        print $out "\t",$species_taxon{$db_adaptor->species()} if ($species_taxon{$db_adaptor->species()});
        print $out "\n";
      }
    }
    $registry->disconnect_all();

    my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
    print $out $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
    close $out;
    exit(0);
  }

  &RSAT::error::FatalError("No species indicate. Use -species option") unless ($species);

  ################################################################
  ## Connecting to ensembl
  &RSAT::message::TimeWarn("Loading registry from Ensembl") if ($main::verbose >= 2);
  $registry->load_registry_from_db(
    -host => $host,
    -port => $port,
    -user => 'anonymous',
    -db_version => $ensembl_version,
    -species => $species
  );

  ################################################################
  ## Get Adaptator
  &RSAT::message::TimeWarn("Getting species slices from Ensembl") if ($main::verbose >= 2);
  my $slice_adaptor = $registry->get_adaptor($species, 'core', 'slice');
  my $mca = $registry->get_adaptor( $species, 'Core', 'MetaContainer' );
  my $tla = $registry->get_adaptor( $species, 'Core', 'Translation' );

  my $dba = $registry->get_DBAdaptor( $species, 'Core' );
  my $dbc = $dba->dbc();
  my $sql_helper = $dbc->sql_helper();

  ################################################################
  # Get all Top Level slice
  &RSAT::message::TimeWarn("Fetching all slices for species",$species) if ($verbose >= 2);
  my @slices = @{$slice_adaptor->fetch_all('toplevel')};

  ## Fix an Ensembl error with the two Y human chromosomes: on the
  ## API, the Human chromosome covers two slices, because a large
  ## fragment (covering 2.6Mb) at the beginning of chromosome Y is
  ## homologous to the X chromosome. The Ensembl dataset presents two
  ## separated slices to avoir redundancy in the annotations. However
  ## this is not compatible with RSAT data model -> we merge them in a
  ## single contig covering the whole Y chromosome.
  ##
  ## In addition, there are apparently some inconsistencies between
  ## some feature annotations and the limits of the Y chromosome
  ## (feature coordinates are provided relative to the chromosome
  ## start, but the slice only starts 2.6Mb further).
  if ($species eq "homo_sapiens") {
      my @slice_tmp = ();
      foreach my $slice (@slices) {
        push (@slice_tmp,$slice) unless ($slice->name() =~ /:Y:/);
      }
      push (@slice_tmp,$slice_adaptor->fetch_by_region( 'chromosome', 'Y' ));
      @slices = @slice_tmp;
  }

  # Get all Top Level slice name
  my %slice_names = ();
  foreach my $slice (@slices) {
    $slice_names{$slice->seq_region_name()} = $slice->name();
  }


  ################################################################
  ## Get genome_dir
  my @token = split(":",$slices[0]->name());
  $assembly_version = $token[1];
  my $genome_dir = &Get_genome_dir($data_dir,$species, $assembly_version,$ensembl_version);

  &RSAT::util::CheckOutDir($genome_dir);

  ################################################################
  ## Open outputfile file
  my %out_stream = &OpenOutputStreams(@features_to_download);

  my $outstream_organism = $out_stream{'organism'};
  my $outstream_organism_names = $out_stream{'organism_names'};
  my $outstream_gene = $out_stream{'gene'};
  my $outstream_gene_names = $out_stream{'gene_names'};
  my $outstream_gene_to_reaction = $out_stream{'gene_to_reaction'};
  my $outstream_exon = $out_stream{'exon'};
  my $outstream_intron = $out_stream{'intron'};
  my $outstream_coding_exon = $out_stream{'coding_exon'};
  my $outstream_cds = $out_stream{'cds'};
  my $outstream_utr = $out_stream{'utr'};
  my $outstream_cds_names = $out_stream{'cds_names'};
  my $outstream_aa_seq = $out_stream{'aa_seq'};

  &Verbose() if ($verbose >= 1);

  ################################################################
  ## Transform transcript feature to download from array to hash
  my %feat_to_download = ();
  foreach (@features_to_download) {
    $feat_to_download{$_} = 1;
  }

  ################################################################
  # Change Ensembl version to Ensembl Genomes version
  my $ftp_ensembl_version = $ensembl_version;
  if ($db eq "ensembl_genomes") {
    &RSAT::message::TimeWarn("Converting Ensembl version to EnsemblGenomes version") if ($main::verbose >= 2);
    $registry->load_registry_from_db(
        -host => $host,
        -port => $port,
        -user => 'anonymous',
	      -species => 'multi',
        -db_version => $ensembl_version
    );

    my @dbas = @{ $registry->get_all_DBAdaptors() };
    foreach my $dba (@dbas) {
      if ($dba->species() eq "multi") {
        @token = split("_",$dba->dbc()->dbname());
        $ftp_ensembl_version = $token[-2];
      }
    }
  }

  ################################################################
  # Dowloading feature


  ## Organism
  &RSAT::message::TimeWarn("Getting Organism") if ($verbose >= 2);

  my ($c1,$c2) = split(" ",$mca->get_scientific_name());

  print $outstream_organism $mca->get_taxonomy_id(),"\t";
  print $outstream_organism join(";",reverse(@{$mca->get_classification ( )})),";",$c1,";",ucfirst($c2),"\t";
  print $outstream_organism $mca->get_scientific_name(),"\n";



  ## Get Organism Names
  &RSAT::message::TimeWarn("Getting Organism Names") if ($verbose >= 2);

  print $outstream_organism_names $mca->get_taxonomy_id(),"\t",$species,"\t","primary","\n";
  my @alias = @{$registry->get_all_aliases($species)};
  foreach (@alias) {
    print $outstream_organism_names $mca->get_taxonomy_id(),"\t",$_,"\t","alternate","\n";
  }

  ################################################################
  ## By default, we collect features by SQL, much faster than via the
  ## object-oriented API.
  if (! $only_api) {

    ##Get Gene
    &RSAT::message::TimeWarn("Getting Gene") if ($verbose >= 2);

    my $gene_list = $sql_helper->execute(
      -SQL      => "select g.stable_id, g.status, g.biotype, xref.display_label, g.seq_region_start, g.seq_region_end, g.seq_region_strand, g.description, s.name from gene g join xref on (g.display_xref_id = xref.xref_id) join seq_region s on (g.seq_region_id=s.seq_region_id) join coord_system cs using (coord_system_id) join meta using (species_id) where meta.meta_key='species.production_name' and meta_value=?",
      -CALLBACK => sub {
        my @row = @{ shift @_ };
         return { 
          gene_id => $row[0], gene_status => $row[1], gene_biotype => $row[2], gene_external_name => $row[3], 
          gene_start => $row[4], gene_end => $row[5], gene_strand => $row[6], gene_description => $row[7],
          seq_name => $row[8]
        };
      },
      -PARAMS => [$species]
    );

    while ( my $row = shift @{$gene_list} ) {
      my %row = %{$row};
      next unless ( $slice_names{ $row{'seq_name'} });

      print $outstream_gene $row{'gene_id'},"\t",$row{'gene_status'},"\t",$row{'gene_biotype'},"\t",$row{'gene_external_name'},"\t";
      print $outstream_gene $slice_names{ $row{'seq_name'} },"\t"; #slice name
      print $outstream_gene $row{'gene_start'},"\t",$row{'gene_end'},"\t",&PrintStrand($row{'gene_strand'}),"\t",&PrintDescription($row{'gene_description'}),"\n";
    }
    $gene_list = "";


    ##Get gene names
    &RSAT::message::TimeWarn("Getting Gene Names") if ($verbose >= 2);

    my $gene_names_list = $sql_helper->execute(
      -SQL      => "select g.stable_id, e1.db_name, x1.dbprimary_acc, x1.display_label, s.name from gene g join object_xref ox1 on (g.gene_id=ox1.ensembl_id and ox1.ensembl_object_type='Gene') join xref x1 on (x1.xref_id=ox1.xref_id) join external_db e1 on (e1.external_db_id=x1.external_db_id) join seq_region s on (g.seq_region_id=s.seq_region_id) join coord_system using (coord_system_id) join meta using (species_id) where meta.meta_key='species.production_name' and meta_value=?",
      -CALLBACK => sub {
        my @row = @{ shift @_ };
        return { gene_id => $row[0], source => $row[1], synonym1 => $row[2], synonym2 => $row[3], seq_name => $row[4]};
      },
      -PARAMS => [$species]
    );

    while ( my $row = shift @{$gene_names_list} ) {
      my %row = %{$row};
      my %printed_name = ();
      next unless ( $slice_names{ $row{'seq_name'} });

      print $outstream_gene_names $row{'gene_id'},"\t",$row{'gene_id'},"\t";
      print $outstream_gene_names 'primary',"\n";
      $printed_name{ $row{'gene_id'} } = 1;

      unless ( $printed_name{ $row{'synonym1'} }) {
        print $outstream_gene_names $row{'gene_id'},"\t",$row{'synonym1'},"\t";
        print $outstream_gene_names 'alternate',"\t",$row{'source'},"\n";
        $printed_name{ $row{'synonym1'} } = 1;
      }

      unless ( $printed_name{ $row{'synonym2'} }) {
        print $outstream_gene_names $row{'gene_id'},"\t",$row{'synonym2'},"\t";
        print $outstream_gene_names 'alternate',"\t",$row{'source'},"\n";
      }
    }
    $gene_names_list = "";


    ##Get gene_to_reaction
    &RSAT::message::TimeWarn("Getting Gene_to_reaction") if ($verbose >= 2);

#     my $sql_query = "select  g.stable_id, gx.display_label, g.description, e1.db_display_name, x1.display_label";
#     $sql_query .= " from gene g";
#     $sql_query .= "   join xref gx on (g.display_xref_id=gx.xref_id) join transcript tr using (gene_id) ";
#     $sql_query .= "   join translation tl using (transcript_id)";
#     $sql_query .= "   join object_xref ox1 on (tl.translation_id=ox1.ensembl_id and ox1.ensembl_object_type='Translation')";
#     $sql_query .= "   join xref x1 on (x1.xref_id=ox1.xref_id)";
#     $sql_query .= "    join external_db e1 on (e1.external_db_id=x1.external_db_id)";
#     $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id) ";
#     $sql_query .= " coord_system using (coord_system_id) ";
#     $sql_query .= " meta using (species_id) ";
#     $sql_query .= " where meta.meta_key='species.production_name'";
#     $sql_query .= "   and meta_value=? ";
#     $sql_query .= "   and e1.db_name in ('Uniprot/SWISSPROT','Uniprot/SPTREMBL','IntEnz','Rhea')";

#     my $gene_reaction_list = $sql_helper->execute(-SQL      => $sql_query,
# 						  -CALLBACK => sub {
# 						    my @row = @{ shift @_ };
# 						    return { gene_id => $row[0], gene_external_name => $row[1], gene_description => $row[2], db => $row[3], db_label => $row[4] };
# 						  },
# 						  -PARAMS => [$species]
# 						 );
    my $gene_reaction_list = $sql_helper->execute(
      -SQL      => "select  g.stable_id, gx.display_label, g.description, e1.db_display_name, x1.display_label from gene g  join xref gx on (g.display_xref_id=gx.xref_id) join transcript tr using (gene_id)  join translation tl using (transcript_id)  join object_xref ox1 on (tl.translation_id=ox1.ensembl_id and ox1.ensembl_object_type='Translation')  join xref x1 on (x1.xref_id=ox1.xref_id)  join external_db e1 on (e1.external_db_id=x1.external_db_id) join seq_region s on (g.seq_region_id=s.seq_region_id)  join coord_system using (coord_system_id)  join meta using (species_id)  where meta.meta_key='species.production_name' and meta_value=? and e1.db_name in ('Uniprot/SWISSPROT','Uniprot/SPTREMBL','IntEnz','Rhea')",
     -CALLBACK => sub {
        my @row = @{ shift @_ };
          return { gene_id => $row[0], gene_external_name => $row[1], gene_description => $row[2], db => $row[3], db_label => $row[4] };
        },
     -PARAMS => [$species]
    );

    while ( my $row = shift @{$gene_reaction_list} ) {
      my %row = %{$row};
      #      next unless ( $row{'db_label'} =~ /\./);

      print $outstream_gene_to_reaction join ("\t",  $row{'gene_id'},
					      $row{'gene_external_name'},
					      &PrintDescription($row{'gene_description'}),
					      $row{'db'},
					      $row{'db_label'}),"\n";
    }

	##Get transcripts
	&RSAT::message::TimeWarn("Getting Transcript") if ($verbose >= 2);

    my $transcript_list = $sql_helper->execute(
      -SQL      => "select g.stable_id, g.description,  tr.stable_id,tr.status, tr.biotype, xref.display_label, tr.seq_region_start, tr.seq_region_end, tr.seq_region_strand, s.name from gene g join transcript tr using (gene_id) join xref on (g.display_xref_id = xref.xref_id) join seq_region s on (g.seq_region_id=s.seq_region_id) join coord_system cs using (coord_system_id) join meta using (species_id) where meta.meta_key='species.production_name' and meta_value=?",
      -CALLBACK => sub {
        my @row = @{ shift @_ };
        return {
          gene_id => $row[0], gene_description => $row[1],
          transcript_id => $row[2], transcript_status => $row[3], transcript_biotype => $row[4], transcript_external_name => $row[5], 
          transcript_start => $row[6], transcript_end => $row[7], transcript_strand => $row[8],
          seq_name => $row[9] };
      },
      -PARAMS => [$species]
    );

    while ( my $row = shift @{$transcript_list} ) {
      my %row = %{$row};
      next unless ( $slice_names{ $row{'seq_name'} });
      next unless ($feat_to_download{$row{'transcript_biotype'}});

      my $outstream_transcript = $out_stream{$row{'transcript_biotype'}};

      print $outstream_transcript $row{'transcript_id'},"\t",$row{'transcript_status'},"\t",&PrintTrBiotype($row{'transcript_biotype'}),"\t",$row{'transcript_external_name'},"\t";
      print $outstream_transcript $slice_names{ $row{'seq_name'} },"\t"; #slice name
      print $outstream_transcript $row{'transcript_start'},"\t",$row{'transcript_end'},"\t",&PrintStrand($row{'transcript_strand'}),"\t";
      print $outstream_transcript &PrintDescription($row{'gene_description'}),"\t",$row{'gene_id'},"\n";
    }
    $transcript_list = ();


    ## Transcript names
    &RSAT::message::TimeWarn("Getting Transcript Names") if ($verbose >= 2);
    my $transcript_names_list = $sql_helper->execute(
      -SQL      => "select tr.stable_id,tr.biotype, g.stable_id, e1.db_name, x1.dbprimary_acc, x1.display_label, s.name from gene g join transcript tr using (gene_id) join object_xref ox1 on (tr.transcript_id=ox1.ensembl_id and ox1.ensembl_object_type='Transcript') join xref x1 on (x1.xref_id=ox1.xref_id) join external_db e1 on (e1.external_db_id=x1.external_db_id) join seq_region s on (g.seq_region_id=s.seq_region_id) join coord_system using (coord_system_id) join meta using (species_id) where meta.meta_key='species.production_name' and meta_value=?",
      -CALLBACK => sub {
        my @row = @{ shift @_ };
        return { transcript_id => $row[0], transcript_biotype => $row[1], gene_id => $row[2], source => $row[3], synonym1 => $row[4], synonym2 => $row[5], seq_name => $row[6] };
      },
      -PARAMS => [$species] );

    while ( my $row = shift @{$transcript_names_list} ) {
      my %row = %{$row};
      my %printed_name = ();
      next unless ( $slice_names{ $row{'seq_name'} });
      next unless ($feat_to_download{$row{'transcript_biotype'}});

      my $outstream_transcript_names = $out_stream{$row{'transcript_biotype'}."_names"};

      print $outstream_transcript_names $row{'transcript_id'},"\t",$row{'transcript_id'},"\t";
      print $outstream_transcript_names 'primary',"\t",'transcript_id',"\n";
      $printed_name{ $row{'transcript_id'} } = 1;

      unless ( $printed_name{ $row{'gene_id'} }) {
        print $outstream_transcript_names $row{'transcript_id'},"\t",$row{'gene_id'},"\t";
        print $outstream_transcript_names 'alternate',"\t",'gene_id',"\n";
        $printed_name{ $row{'gene_id'} } = 1;
      }

      unless ( $printed_name{ $row{'synonym1'} }) {
        print $outstream_transcript_names $row{'transcript_id'},"\t",$row{'synonym1'},"\t";
        print $outstream_transcript_names 'alternate',"\t",$row{'source'},"\n";
        $printed_name{ $row{'synonym1'} } = 1;
      }

      unless ( $printed_name{ $row{'synonym2'} }) {
        print $outstream_transcript_names $row{'transcript_id'},"\t",$row{'synonym2'},"\t";
        print $outstream_transcript_names 'alternate',"\t",$row{'source'},"\n";
      }
    }
    $transcript_names_list = ();



    ## Getting Exon and intron
    &RSAT::message::TimeWarn("Getting exons and introns") if ($verbose >= 2);
    my $exon_list = $sql_helper->execute(
      -SQL      => "select g.stable_id, tr.stable_id, tr.biotype, ex.stable_id, ex.seq_region_start, ex.seq_region_end, ex.seq_region_strand, s.name, xref.display_label from gene g join transcript tr using (gene_id) join exon_transcript cod_ex on (tr.transcript_id=cod_ex.transcript_id) join exon ex on (ex.exon_id=cod_ex.exon_id) join xref on (g.display_xref_id = xref.xref_id) join seq_region s on (g.seq_region_id=s.seq_region_id) join coord_system cs using (coord_system_id) join meta using (species_id) where meta.meta_key='species.production_name' and meta_value=?",
      -CALLBACK => sub {
        my @row = @{ shift @_ };
        return { 
          gene_id => $row[0],
          transcript_id => $row[1], transcript_biotype => $row[2], transcript_external_name => $row[8], 
          exon_id => $row[3], exon_start => $row[4], exon_end => $row[5], exon_strand => $row[6], 
          seq_name => $row[7]
        };
      },
      -PARAMS => [$species]
    );

    my $last_row = "";
    my %last_row = ();
    while (1) {
      $last_row = shift @{$exon_list};
      %last_row = %{$last_row};
      last if ( $slice_names{ $last_row{'seq_name'} } && $feat_to_download{$last_row{'transcript_biotype'}} );
    }

    print $outstream_exon $last_row{'exon_id'},"\t","exon","\t",$last_row{'exon_id'},"\t";
    print $outstream_exon $slice_names{ $last_row{'seq_name'} },"\t"; #slice name
    print $outstream_exon $last_row{'exon_start'},"\t",$last_row{'exon_end'},"\t",&PrintStrand($last_row{'exon_strand'}),"\t";
    print $outstream_exon $last_row{'transcript_id'},"\t",$last_row{'gene_id'},"\n";

    my $i = 1;
    while ( my $row = shift @{$exon_list} ) {
      my %row = %{$row};
      next unless ( $slice_names{ $row{'seq_name'} });
      next unless ( $feat_to_download{$row{'transcript_biotype'}} );

      print $outstream_exon $row{'exon_id'},"\t","exon","\t",$row{'exon_id'},"\t";
      print $outstream_exon $slice_names{ $row{'seq_name'} },"\t"; #slice name
      print $outstream_exon $row{'exon_start'},"\t",$row{'exon_end'},"\t",&PrintStrand($row{'exon_strand'}),"\t";
      print $outstream_exon $row{'transcript_id'},"\t",$row{'gene_id'},"\n";

      if ( $last_row{'transcript_id'} eq $row{'transcript_id'}) {
        print $outstream_intron "INTRON".$row{'transcript_id'},"\t","intron","\t","INTRON".$row{'transcript_external_name'},"\t";
        print $outstream_intron $slice_names{ $row{'seq_name'} },"\t"; #slice name
        print $outstream_intron $last_row{'exon_end'}+1,"\t",$row{'exon_start'}-1,"\t",&PrintStrand($row{'exon_strand'}),"\t";
        print $outstream_intron $row{'transcript_id'},"\t",$row{'gene_id'},"\n";
        $i++;
      } else {
        $i=1
      }
      
      %last_row = %row;

    }
    $exon_list = ();



    ## Getting Coding Exon
    &RSAT::message::TimeWarn("Getting coding exons") if ($verbose >= 2);
    my $coding_exon_list = $sql_helper->execute(
      -SQL      => "select g.stable_id, tr.stable_id, tr.biotype, ex.stable_id, ex.seq_region_start, ex.seq_region_end, ex.seq_region_strand, s.name from gene g join transcript tr using (gene_id) join translation tl using (transcript_id) join exon_transcript ex_tran on (tr.transcript_id=ex_tran.transcript_id)  join exon ex on (ex.exon_id=ex_tran.exon_id) join seq_region s on (g.seq_region_id=s.seq_region_id) join coord_system cs using (coord_system_id) join meta using (species_id) where meta.meta_key='species.production_name' and meta_value=?",
      -CALLBACK => sub {
        my @row = @{ shift @_ };
        return {
		gene_id => $row[0],
		transcript_id => $row[1], transcript_biotype => $row[2],
		exon_id => $row[3], exon_start => $row[4], exon_end => $row[5], exon_strand => $row[6], 
		seq_name => $row[7]
	       };
      },
						-PARAMS => [$species]
					       );

    while ( my $row = shift @{$coding_exon_list} ) {
      my %row = %{$row};
      next unless ( $slice_names{ $row{'seq_name'} });
      next unless ( $feat_to_download{$row{'transcript_biotype'}} );

      print $outstream_coding_exon $row{'exon_id'},"\t","coding_exon","\t",$row{'exon_id'},"\t";
      print $outstream_coding_exon $slice_names{ $row{'seq_name'} },"\t"; #slice name
      print $outstream_coding_exon $row{'exon_start'},"\t",$row{'exon_end'},"\t",&PrintStrand($row{'exon_strand'}),"\t";
      print $outstream_coding_exon $row{'transcript_id'},"\t",$row{'gene_id'},"\n";
    }
    $coding_exon_list = ();



    ##Get CDS and UTR
    &RSAT::message::TimeWarn("Getting CDS and UTR") if ($verbose >= 2);

    my $cds_utr_list = $sql_helper->execute(
      -SQL      => "select g.stable_id, g.description, tr.stable_id, tr.status, xref.display_label, tr.seq_region_start, tr.seq_region_end, tr.seq_region_strand, tl.stable_id, tl.seq_start, tl.seq_end, ex.seq_region_start, ex.seq_region_end, s.name from gene g join transcript tr using (gene_id) join translation tl using (transcript_id) join exon ex on (ex.exon_id=tl.end_exon_id) join xref on (g.display_xref_id = xref.xref_id) join seq_region s on (g.seq_region_id=s.seq_region_id) join coord_system cs using (coord_system_id) join meta using (species_id) where meta.meta_key='species.production_name' and meta_value=?",
      -CALLBACK => sub {
       my @row = @{ shift @_ };
        return { 
          gene_id => $row[0], gene_description => $row[1], 
          transcript_id => $row[2], transcript_status => $row[3], transcript_external_name => $row[4], 
          transcript_start => $row[5], transcript_end => $row[6], transcript_strand => $row[7], 
          translate_id => $row[8], translate_start => $row[9], translate_end => $row[10], 
          exon_start => $row[11], exon_end => $row[12], 
          seq_name => $row[13]
          };
      },
      -PARAMS => [$species]
    );

    while ( my $row = shift @{$cds_utr_list} ) {
      my %row = %{$row};
      next unless ( $slice_names{ $row{'seq_name'} });

     if ( $row{'translate_id'} eq 'EFP73577' ) {
      print $row{'transcript_start'},"\t",$row{'transcript_end'},"\n";
      print $row{'translate_start'},"\t",$row{'translate_end'},"\n";
      print "\n";
      print $row{'transcript_start'}+$row{'translate_start'}-1,"\t",$row{'transcript_start'}+$row{'translate_end'}-1,"\n";
      print $row{'transcript_start'}+($row{'transcript_end'}-$row{'transcript_start'}+$row{'translate_end'}-1),"\t";
      print $row{'transcript_end'}-($row{'transcript_start'}-$row{'transcript_start'}+$row{'translate_start'}-1),"\n";
      print $row{'transcript_start'}+($row{'transcript_end'}-($row{'transcript_start'}+$row{'translate_end'}-1)),"\t";
      print $row{'transcript_end'}-($row{'transcript_start'}-($row{'transcript_start'}+$row{'translate_start'}-1)),"\n";
     }

      my $translate_start = $row{'transcript_start'}+$row{'translate_start'}-1;
      my $translate_end = $row{'transcript_start'}+$row{'translate_end'}-1;

      if ( $row{'transcript_strand'} == -1 ) {
        my $translate_start_tmp = $row{'transcript_start'}+($row{'transcript_end'}-$translate_end);
        $translate_end = $row{'transcript_end'}-($translate_start-$row{'transcript_start'});
        $translate_start = $translate_start_tmp;
      }

      ### CDS
      print $outstream_cds $row{'translate_id'},"\t",$row{'transcript_status'},"\t",'CDS',"\t",$row{'translate_id'},"\t"; # translate id, status, external name
      print $outstream_cds $slice_names{ $row{'seq_name'} },"\t"; #slice name
      print $outstream_cds $translate_start,"\t",$translate_end,"\t",&PrintStrand($row{'transcript_strand'}),"\t"; # translate coordinate
      print $outstream_cds &PrintDescription($row{'gene_description'}),"\t",$row{'transcript_id'},"\t",$row{'gene_id'},"\n"; # description transcript_id, gene_id

      ### UTR
      my $left_utr = "5'UTR";
      my $left_start = $row{'transcript_start'};  #$row{'transcript_start'};
      my $left_end = $translate_start;          #$row{'transcript_start'} + $row{'translate_start'}-2;
      my $right_utr = "3'UTR";
      my $right_start = $translate_end;         #$row{'exon_start'} + $row{'translate_end'};
      my $right_end = $row{'transcript_end'};     #$row{'transcript_end'};

      if ( $row{'transcript_strand'} == -1 ) {
        $left_utr = "3'UTR";
#        $right_start = $row{'transcript_start'};  #$row{'transcript_start'} ;
#        $right_end = $translate_start-1;          #$row{'exon_end'} - $row{'translate_end'};
        $right_utr = "5'UTR";
#        $left_start = $translate_end+1;           #$row{'transcript_end'} - $row{'translate_start'}+2;
#       $left_end = $row{'transcript_end'};       #$row{'transcript_end'};
      }

      if ( $left_start ne $left_end ) {
        print $outstream_utr $left_utr."-".$row{'transcript_id'},"\t",$left_utr,"\t",$left_utr."-".$row{'transcript_external_name'},"\t"; # utr id, status, external name
        print $outstream_utr $slice_names{ $row{'seq_name'} },"\t"; #slice name
        print $outstream_utr $left_start,"\t",$left_end-1,"\t",&PrintStrand($row{'transcript_strand'}),"\t"; # utr coordinate
        print $outstream_utr $row{'transcript_id'},"\t",$row{'gene_id'},"\n"; # transcript_id, gene_id
      }

      if ( $right_start ne $right_end ) {
        print $outstream_utr $right_utr."-".$row{'transcript_id'},"\t",$right_utr,"\t",$right_utr."-".$row{'transcript_external_name'},"\t"; # utr id, status, external name
        print $outstream_utr $slice_names{ $row{'seq_name'} },"\t"; #slice name
        print $outstream_utr $right_start+1,"\t",$right_end,"\t",&PrintStrand($row{'transcript_strand'}),"\t"; # utr coordinate
        print $outstream_utr $row{'transcript_id'},"\t",$row{'gene_id'},"\n"; # transcript_id, gene_id
      }
    }
    $cds_utr_list = ();



    ## CDS names
    &RSAT::message::TimeWarn("Getting CDS names") if ($verbose >= 2);

    my $cds_names_list = $sql_helper->execute(
      -SQL      => "select tl.stable_id,tr.stable_id,g.stable_id, e1.db_name, x1.dbprimary_acc, x1.display_label, s.name from gene g join transcript tr using (gene_id) join translation tl using (transcript_id) join object_xref ox1 on (tl.translation_id=ox1.ensembl_id and ox1.ensembl_object_type='Translation') join xref x1 on (x1.xref_id=ox1.xref_id) join external_db e1 on (e1.external_db_id=x1.external_db_id) join seq_region s on (g.seq_region_id=s.seq_region_id) join coord_system using (coord_system_id) join meta using (species_id) where meta.meta_key='species.production_name' and meta_value=?",
      -CALLBACK => sub {
        my @row = @{ shift @_ };
        return { translate_id => $row[0], transcript_id => $row[1], gene_id => $row[2], source => $row[3], synonym1 => $row[4], synonym2 => $row[5], seq_name => $row[6] };
      },
      -PARAMS => [$species] );

    while ( my $row = shift @{$cds_names_list} ) {
      my %row = %{$row};
      my %printed_name = ();
      next unless ( $slice_names{ $row{'seq_name'} });

      print $outstream_cds_names $row{'translate_id'},"\t",$row{'translate_id'},"\t";
      print $outstream_cds_names 'primary',"\t",'translate_id',"\n";
      $printed_name{ $row{'translate_id'} } = 1;

      unless ( $printed_name{ $row{'gene_id'} }) {
        print $outstream_cds_names $row{'translate_id'},"\t",$row{'gene_id'},"\t";
        print $outstream_cds_names 'alternate',"\t",'gene_id',"\n";
        $printed_name{ $row{'gene_id'} } = 1;
      }

      unless ( $printed_name{ $row{'transcript_id'} }) {
        print $outstream_cds_names $row{'translate_id'},"\t",$row{'transcript_id'},"\t";
        print $outstream_cds_names 'alternate',"\t",'transcript_id',"\n";
        $printed_name{ $row{'transcript_id'} } = 1;
      }

      unless ( $printed_name{ $row{'synonym1'} }) {
        print $outstream_cds_names $row{'transcript_id'},"\t",$row{'synonym1'},"\t";
        print $outstream_cds_names 'alternate',"\t",$row{'source'},"\n";
        $printed_name{ $row{'synonym1'} } = 1;
      }

      unless ( $printed_name{ $row{'synonym2'} }) {
        print $outstream_cds_names $row{'transcript_id'},"\t",$row{'synonym2'},"\t";
        print $outstream_cds_names 'alternate',"\t",$row{'source'},"\n";
      }
    }
    $cds_names_list = ();


    ##Get AA_seq
    &RSAT::message::TimeWarn("Getting protein sequences") if ($verbose >= 2);

    my $prot_seq_ftp = &Get_pep_fasta_ftp($db,$species,$ftp_ensembl_version);
    system("wget -NL -nv $prot_seq_ftp -P $genome_dir");

    @token = split(/\//, $prot_seq_ftp);
    my $aa_file = $genome_dir."/".&Get_species_dir_name($species, $assembly_version,$ensembl_version)."_aa.fasta".".gz";
    system("mv $genome_dir/$token[-1] $aa_file");
    system("gzip -df $aa_file");
  }

  ##################
  ## Via API ONLY
  else {

    ## Get all features
    my $s=0;
    my $nb_slices = scalar(@slices);

    foreach my $slice (@slices) {
      $s++;

      ## Gene
      my @genes = @{$slice->get_all_Genes()};
      my $nb_genes = scalar(@genes);

      &RSAT::message::TimeWarn("Getting features for slice", $s."/".$nb_slices, $slice->name(), $nb_genes." genes") if ($verbose >= 2);

      my $g=0;
      while ( my $gene = shift @genes ) {
        $g++;

        if (($max_genes > 0) && ($g > $max_genes)) {
	        &RSAT::message::Warning("Stopping gene names after", $g, "genes");
	        last;
        }

        my $gene_known = "Novel";
        $gene_known = "Known" if ($gene->is_known());

        my $gene_external_name = $gene->external_name();
        $gene_external_name = $gene->display_id() unless ($gene->external_name());

        print $outstream_gene $gene->display_id(),"\t",$gene_known,"\t",$gene->biotype(),"\t",$gene_external_name,"\t";
        print $outstream_gene $slice->name(),"\t",$gene->start(),"\t",$gene->end(),"\t",&PrintStrand($gene->strand()),"\t",&PrintDescription($gene->description),"\n";

        if ( $g%200 == 1 && $verbose >= 3) {
          &RSAT::message::TimeWarn("Getting gene", $gene_external_name, $g."/".$nb_genes);
        }

        ## Gene names
        my %print_name = ();

        my @names_list = @{ $gene->get_all_DBEntries() };
        foreach my $name (@names_list) {
          next if ($print_name{$name->display_id()});

          my $status = "alternate";
          $status = "primary" if ($name->display_id() eq $gene_external_name);

          print $outstream_gene_names $gene->display_id(),"\t",$name->display_id(),"\t";
          print $outstream_gene_names $status,"\t",$name->dbname(),"\n";

          $print_name{$name->display_id()} = 1;
        }

	      unless ($print_name{$gene_external_name}) {
          print $outstream_gene_names $gene->display_id(),"\t",$gene_external_name,"\t";
          print $outstream_gene_names 'primary',"\t",'Gene_name',"\n";
	      }

        ## Transcript
        my $transcripts = $gene->get_all_Transcripts();

        while ( my $transcript = shift @{$transcripts} ) {
          next unless ($feat_to_download{$transcript->biotype()});

        	## Choose the appropriate output stream depending on the biotype of the current transcript
          my $outstream_transcript = $out_stream{$transcript->biotype()};

          ## Define transcript (evidence) type
          my $trans_known = "putative";
          $trans_known = "known" if ($transcript->is_known());

          ## Define external_name for current transcript
          my $trans_external_name = $transcript->external_name() || $transcript->display_id();

          my $coding_s = ".";
          my $coding_e = ".";
          $coding_s = $transcript->coding_region_start() if ($transcript->coding_region_start());
          $coding_e = $transcript->coding_region_end() if ($transcript->coding_region_end());

          print $outstream_transcript $transcript->display_id(),"\t",$trans_known,"\t", &PrintTrBiotype($transcript->biotype()),"\t",$gene_external_name,"\t";
          print $outstream_transcript $transcript->slice()->name(),"\t",$transcript->start(),"\t", $transcript->end(),"\t",&PrintStrand($gene->strand()),"\t";
          print $outstream_transcript $coding_s,"\t",$coding_e,"\t";
          print $outstream_transcript &PrintDescription($gene->description),"\t",$gene->display_id(),"\n";

          ## Transcript names
          my $outstream_transcript_names = $out_stream{$transcript->biotype()."_names"};

          my %print_name = ();
          my @names_list = @{ $transcript->get_all_DBEntries() };
          foreach $name (@names_list) {
            next if ($print_name{$name->display_id()});

            my $status = "alternate";
            $status = "primary" if (($trans_external_name) && ($name->display_id() eq $trans_external_name));

            print $outstream_transcript_names $transcript->display_id(),"\t",$name->display_id(),"\t";
            print $outstream_transcript_names $status,"\t",$name->dbname(),"\n";

            $print_name{$name->display_id()} = 1;
          }

          unless ($print_name{$trans_external_name}) {
            print $outstream_transcript_names $transcript->display_id(),"\t",$trans_external_name,"\t";
            print $outstream_transcript_names 'primary',"\t",'',"\n";
          }

      	  ### Add GeneId as synonym for transcript
      	  print $outstream_transcript_names join("\t",
      						 $transcript->display_id(),
      						 $gene->display_id(),
      						 "alternate",
      						 "Gene_Id"), "\n";

          ### Add gene name as synonym for transcript
      	  print $outstream_transcript_names join("\t",
      						 $transcript->display_id(),
      						 $gene_external_name,
      						 "alternate",
      						 "Gene_name"), "\n";

          ## Exon
          my @exons = @{ $transcript->get_all_Exons() };

          foreach $exon (@exons) {
            my $exon_line = $exon->display_id()."\t"."exon"."\t".$gene_external_name."\t";
            $exon_line .= $exon->start()."\t".$exon->end()."\t".&PrintStrand($gene->strand())."\t";
            $exon_line .= $transcript->display_id()."\t".$gene->display_id()."\n";

            print $outstream_exon $exon_line;

            if ($transcript->coding_region_start() && $transcript->coding_region_end() ) {
              if ($exon->end >= $transcript->coding_region_start() && $exon->start <= $transcript->coding_region_end() ) {
                print $outstream_coding_exon $exon_line;
              }
            }

          }

          ## Intron
          my @introns = @{$transcript->get_all_Introns()};
          my $i = 1;
          foreach $intron (@introns) {
            print $outstream_intron "INTRON".$i."-".$transcript->display_id(),"\t","intron","\t","INTRON".$i."-".$gene_external_name,"\t";
            print $outstream_intron $slice->name(),"\t",$intron->start(),"\t",$intron->end(),"\t",&PrintStrand($gene->strand()),"\t";
            print $outstream_intron $transcript->display_id(),"\t",$gene->display_id(),"\n";
            $i++;
          }

          ## UTR
          my $outstream_utr = $out_stream{'utr'};

          if ($transcript->cdna_coding_start()) {
            if ( $transcript->five_prime_utr_Feature() ) {
              print $outstream_utr "5'UTR-".$transcript->display_id(),"\t","5'UTR","\t","5'UTR-".$gene_external_name,"\t";
              print $outstream_utr $slice->name(),"\t";
              print $outstream_utr $transcript->five_prime_utr_Feature()->start(),"\t",$transcript->five_prime_utr_Feature()->end(),"\t",&PrintStrand($gene->strand()),"\t";
              print $outstream_utr $transcript->display_id(),"\t",$gene->display_id(),"\n";
            }
          }

          if ($transcript->cdna_coding_end()) {
            if ( $transcript->three_prime_utr_Feature() ) {
              print $outstream_utr "3'UTR-".$transcript->display_id(),"\t","3'UTR","\t","3'UTR-".$gene_external_name,"\t";
              print $outstream_utr $slice->name(),"\t",;
              print $outstream_utr $transcript->three_prime_utr_Feature()->start(),"\t",$transcript->three_prime_utr_Feature()->end(),"\t",&PrintStrand($gene->strand()),"\t";
              print $outstream_utr $transcript->display_id(),"\t",$gene->display_id(),"\n";
            }
          }

          ## CDS
          if ($transcript->cdna_coding_start() && $transcript->cdna_coding_end()) {
            $prot = $transcript->translation();
            print $outstream_cds $prot->display_id(),"\t","CDS","\t",$gene_external_name,"\t";
            print $outstream_cds $slice->name(),"\t",$prot->genomic_start(),"\t",$prot->genomic_end(),"\t",&PrintStrand($gene->strand()),"\t";
            print $outstream_cds &PrintDescription($gene->description),"\t",$transcript->display_id(),"\t",$gene->display_id(),"\n";

            ## CDS Name
            %print_name = ();

            my @names_list = @{$prot->get_all_DBEntries()};
            foreach my $name (@names_list) {
              next if ($print_name{$name->display_id()});

              my $status = "alternate";
              $status = "primary" if (($trans_external_name) && ($name->display_id() eq $trans_external_name));

              print $outstream_cds_names $prot->display_id(),"\t",$name->display_id(),"\t";
              print $outstream_cds_names $status,"\t",$name->dbname(),"\n";

              $print_name{$name->display_id()} = 1;
            }

            unless ($print_name{$trans_external_name}) {
              print $outstream_cds_names $prot->display_id(),"\t",$trans_external_name,"\t";
              print $outstream_cds_names 'primary',"\t",'',"\n";
            }

            ## Add GeneId as synonym for CDS
	          print $outstream_cds_names join("\t",
					    $prot->display_id(),
					    $gene->display_id(),
					    "alternate",
					    "Gene_Id"), "\n";

            ## Add gene name as synonym for cds
	          print $outstream_cds_names join("\t",
					    $prot->display_id(),
					    $gene_external_name,
					    "alternate",
					    "Gene_Name"), "\n";

            ## Aminoacid sequences
            if ($prot->seq()) {
              my $outstream_aa_seq = $out_stream{'aa_seq'};
              &PrintNextSequence($outstream_aa_seq, "fasta", 60, $prot->seq(), $prot->display_id());
            }
          }
        }
      }
    }
  }


  ################################################################
  ## Report execution time and close output stream
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
  print $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
  close $out;

  exit(0);
}


################################################################
################### SUBROUTINE DEFINITION ######################
################################################################

################################################################
## Display full help message
sub PrintHelp {
  system "pod2text -c $0";
  exit(0);
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}

################################################################
## Format output
sub PrintStrand {
  my $strand = $_[0];

  if ( $strand == 1) {
    return "D";
  } else {
    return "R";
  }
}

sub PrintTrBiotype {
  my $tr_biotype = $_[0];

  if ( $tr_biotype eq 'protein_coding') {
    return 'mrna';
  } else {
    return $tr_biotype;
  }
}


sub PrintDescription {
  my $description = $_[0];

  if ( $description) {
    return $description;
  } else {
    return "<no descrition>";
  }
}


################################################################
## Verbose message
sub Verbose {
  print $out "; download-ensembl-features ";
  &PrintArguments($out);

  if (%main::outfile) {
    print $out "; Output files\n";
    while (my ($key,$value) = each %main::outfile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
  printf $out "; %-22s\t%s\n", "Ensembl safe version", $safe_ensembl_version;
  printf $out "; %-22s\t%s\n", "Ensembl version", $ensembl_version;
}

################################################################
## Read arguments
sub ReadArguments {
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);

=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
        $main::verbose = shift(@arguments);
      } else {
        $main::verbose = 1;
      }

=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();

=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();

=pod

=item B<-species species_name>

Species that you want download variation (Homo_sapiens, Mus_musculus).

=cut
    } elsif (($arg eq "-species") || ($arg eq "-org")) {
      $main::species = lc(shift(@arguments));

=pod

=item B<-only_api>

Only use API to download.

This option is maintained for the sake of safety, in case the SQL
interface would be modified.  The download of features (genes, CDS,
mRNA, ...) initially relied on Ensembl Perl Application Programmatic
Interface (API). However, this required a lot of time, because the API
required to treat objects sequentially in order to get the dependent
objects (e.g. several thousands of queries to collect the gene
synonyms). 

In the current versio, we replaced this by a direct extraction of all
the required information by SQL queries. This however assumes that the
schema of the relational database is unchanged. In case of trouble
with changes of the SQL structure, the API should in principle still
allow to download the data.

=cut
    } elsif ($arg eq "-only_api") {
      $main::only_api = 1;

=pod


=item B<-type #>

Transcript biotype you want to download

Default :

protein_coding
- processed_transcript
- lincRNA
- sRNA
- snRNA
- miRNA
- misc_RNA
- snoRNA

Other biotype :

unprocessed_pseudogene
- processed_pseudogene
- transcribed_processed_pseudogene
- transcribed_unprocessed_pseudogene
- sense_intronic
- nonsense_mediated_decay
- antisense
- IG_V_pseudogene
- retained_intron
- pseudogene
- unitary_pseudogene
- IG_V_gene
- IG_J_gene
- IG_C_gene
- IG_C_pseudogene
- sense_overlapping
- polymorphic_pseudogene


This option can be used iteratively to download multiple types. Example:
   -type IG_V_gene -type IG_J_gene

Multiple types can also be specified separated by commas.
  -type IG_V_gene,IG_J_gene

=cut
    } elsif ($arg eq "-type") {
      my $type = shift(@arguments);
      push @main::features_to_download, split ",", $type;

=item B<-dir #>

The directory in wich RSAT genomes must be installed. The selected
species will be isntalled in a sub-directory composed of Species name
and Ensembl genome version.

Default : $RSAT/data/genomes/

=cut
    } elsif ($arg eq "-dir") {
      $main::data_dir = shift(@arguments);

=pod

=item   B<-o outputfile>

The output file is used to hold a trace of the transfers (verbosity),
and to store the list of species when the option -available_species is
activated.

If no output file is specified, the standard output is used.

=cut
    } elsif ($arg eq "-o") {
      $outfile{output} = shift(@arguments);

=pod

=item B<-ensembl_genomes>

Download genome from ensembl genomes (Protist, fungi ...);

=cut
    } elsif ($arg eq "-ensembl_genomes") {
      $main::db = "ensembl_genomes";

=pod

=item B<-available_species>

Get all available species on Ensembl

=cut
    } elsif ($arg eq "-available_species") {
      $main::get_available_species = 1;

=pod

=item B<-version #>

The release version of ensEMBL.

Supported versions: 70 to 72, safe, latest

Default : I<safe>

=over

=item I<safe>

Some Ensembl API function may change between two Ensembl release.

For this reason, we defined the "safe" version, which corresponds to
the latest version of ensembl which has been checked to work with this
script.

=item latest

This corresponds to the lastest version of Ensembl. Beware: this
version is not guaranteed to be compatible with RSAT, in case Ensembl
would change their file formats or locations.

=cut
    } elsif ($arg eq "-version") {
      my $version = shift(@arguments);
      if ( &IsNatural($version) || $version eq "safe" || $version eq "latest" ) {
        $main::ensembl_version = $version;
      } else {
        &RSAT::error::FatalError($version,"is not supported");
      }

    } else {
      &FatalError(join("\t", "Invalid option", $arg));
    }
  }

=pod

=back

=cut

}

################################################################
## Open all outputs
sub PrintHeader {
  my ($file,$time,$class,$table,$type,@col) = @_;
  my $i = 1;

  print $file  "-- dump date","\t",$time,"\n";
  print $file  "-- class","\t","EnsEMBL::".$class,"\n";
  print $file  "-- table","\t",$table,"\n";
  print $file  "-- table","\t",$type,"\n";

  foreach my $field ( @col ) {
    print $file  "-- field ".$i,"\t",$field,"\n";
    $i ++;
  }

  print $file  "-- header","\n";
  print $file  "-- ",join("\t",@col),"\n";
}

sub OpenOutputStreams {
  my (@features_to_download) = @_;
  my %out_stream = ();

  my $time = &AlphaDate();

  ## Organism File
  my $file_name = 'organism';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id taxonomy name));

  ## Organism Name File
  $file_name = 'organism_names';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, 'organism', $file_name, 'lateral', qw(id names status) );

  ## Gene file
  $file_name = 'gene';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id status type name contig start_pos end_pos strand description));

  ## Gene Names file
  $file_name = 'gene_names';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, 'gene', $file_name, 'lateral', qw(id names status source) );

  ## Gene_to_reaction file
  $file_name = 'gene_to_reaction';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id name description source reaction));

  ## Transcripts files & Transcripts Names files
  foreach (@features_to_download) {

    my $file_name = $_;
    $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
    $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
    &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id status type name contig start_pos end_pos coding_start coding_end strand description GeneID) );

    $file_name .= "_names";
    $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
    $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
    &PrintHeader($out_stream{$file_name}, $time, $_, $file_name, 'lateral', qw(id names status source) );
  }

  ## mRNA files
  $outfile{'mrna'} = &Get_feature_file($data_dir, $species, $assembly_version,$ensembl_version,'mrna');
  &doit("ln -s ".$outfile{'protein_coding'}." ".$outfile{'mrna'}, 0, 0, 0);

  ## mRNA Names files
  $outfile{'mrna_names'} = &Get_feature_file($data_dir, $species, $assembly_version,$ensembl_version,'mrna_names');
  &doit("ln -s ".$outfile{'protein_coding_names'}." ".$outfile{'mrna_names'}, 0, 0, 0);

  ## Exon File
  $file_name = 'exon';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id type name contig start_pos end_pos strand transcriptID GeneID) );

  ## Coding Exon File
  $file_name = 'coding_exon';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id type name contig start_pos end_pos strand transcriptID GeneID) );

  ## Intron file
  $file_name = 'intron';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id type name contig start_pos end_pos strand transcriptID GeneID) );

  ## UTR files
  $file_name = 'utr';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id type name contig start_pos end_pos strand transcriptID GeneID) );

  ## CDS files
  $file_name = 'cds';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id status type name contig start_pos end_pos strand description transcriptID GeneID) );

  ## CDS Names files
  $file_name = 'cds_names';
  $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, 'cds', $file_name, 'lateral', qw(id names status source) );

  ## AA seeq
  if ($only_api) {
    $file_name = 'aa_seq';
    $outfile{$file_name} = &Get_feature_file($data_dir,$species, $assembly_version,$ensembl_version,$file_name);
    $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  }

  return %out_stream;
}

