#!/usr/bin/perl -w

############################################################
#
# $Id: download-ensembl-features,v 1.40 2013/10/13 08:16:28 jvanheld Exp $
#
############################################################

use warnings;

=pod

=head1 NAME

download-ensembl-features

=head1 VERSION

$program_version

=head1 DESCRIPTION

Download the genomic feature of a user-specified organism from the
Ensembl server, and convert the original format (dat) into tab files
required for RSAT use.

=head1 AUTHORS

=over

=item I<Jeremy Delerce> (Master 2 thesis 2013)

=item I<Alejandra Medina-Rivera> (amedina@lcg.unam.mx)

=item I<Jacques van Helden> (Jacques.van-Helden\@univ-amu.fr)

=back

=head1 CATEGORY

=over

=item util

=back

=head1 USAGE

C<download-ensembl-features -species # [-version #] [-no_name] [-o #] [-dir genomesDirectory] [-available_species] [-v #]>

=head2 Examples

Get the list of species available at Ensembl (L<http://ensembl.org/>).

C<download-ensembl-features -available_species>

Get the list of species available at EnsemblGenomes
(L<http://ensemblgenomes.org/>).

C<download-ensembl-features -available_species -db ensemblgenomes>

Get genomic feature coordinates for the human genome (Homo sapiens).

C<download-ensembl-features -species Homo_sapiens>

=head1 OUTPUT FORMAT

The output consists in a series of tab-delimited text files providing
the genomic location and description of the different feature types
(gene, transcript, CDS, mRNA, lincRNA, ..).

=head1 SEE ALSO

=head2 download-ensembl-genome

Downloads genome for a species.

=head1 WISH LIST

=cut

BEGIN {
  if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
  }
}

require "RSA.lib";
require "RSAT_to_ensembl.lib.pl";
use Bio::EnsEMBL::Registry;

################################################################
## Main package
package main;
{

  ###############################################################
  ## Initialise parameters
  our $start_time = &RSAT::util::StartScript();
  our $program_version = do { my @r = (q$Revision: 1.40 $ =~ /\d+/g); sprintf"%d."."%02d" x $#r, @r };

  our $out = SDTOUT;
  our %outfile = ();

  our $verbose = 0;
  our $db = "ensembl";
  our $species = "";
  our $assembly_version = "";
  our $species_suffix = "";
  our $full_species_ID = "";
  our $registry = 'Bio::EnsEMBL::Registry';
  our $safe_ensembl_version = &get_ensembl_version_safe($db);
  our $latest_ensembl_version = &get_ensembl_version_latest($db);
  our $ensembl_version = &get_ensembl_version($db);
  our $dry_run = 0; ## Do not run the SQL queries, but print them out for checking
  our $null = "<NULL>";

#  our $only_api = 0;
  our $query_mode = "sql";
  our $get_available_species = 0;

  our $max_genes = 0; ## For testing only

  our @features_to_download = qw(
    protein_coding
    processed_transcript
    lincRNA
    sRNA
    snRNA
    miRNA
    misc_RNA
    snoRNA
  );

  our $sql_query; ## To spare electrons in the context of Kyoto agreements, we recycle the same variable for all the sql queries

  ################################################################
  ## Read argument values
  &ReadArguments();

  ## Check that the user-selected Ensembl version is supported.
  &check_ensembl_version($db,$ensembl_version);

#  my ($host,$port) = &Get_host_port($db);

  ################################################################
  ## Print verbose
  $out = &OpenOutputFile($outfile{output});
  &Verbose() if ($main::verbose >= 1 && $get_available_species);

  ################################################################
  ## Print available species
  if ($get_available_species) {
    &RSAT::message::TimeWarn("download-ensembl-features", "Getting the list of available species", "db=".$db) if ($main::verbose >= 1);
    &LoadRegistry($registry, $db, $ensembl_version);

    my @db_adaptors = @{ $registry->get_all_DBAdaptors() };
    my %species_taxon = ();

    if (lc($db) eq "ensemblgenomes") {
      my $ens_version = &get_ensembl_version_safe($db);
      %species_taxon = &Get_species_taxon($db,$ens_version);
    }

    @sort_species = sort {$a->species() cmp $b->species()} @db_adaptors;

    foreach my $db_adaptor (@sort_species) {
    my $db_connection = $db_adaptor->dbc();
      if ($db_adaptor->group() eq "core") {
        print $out ucfirst($db_adaptor->species());
        print $out "\t",$species_taxon{$db_adaptor->species()} if ($species_taxon{$db_adaptor->species()});
        print $out "\n";
      }
    }
    $registry->disconnect_all();

    my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
    print $out $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
    close $out;
    exit(0);
  }

  &RSAT::error::FatalError("No species indicate. Use -species option") unless ($species);

  ################################################################
  ## Connecting to ensembl
  &LoadRegistry($registry, $db, $ensembl_version);

  ################################################################
  ## Get Adaptator
  &RSAT::message::TimeWarn("Getting species slices from Ensembl") if ($main::verbose >= 2);
  my $slice_adaptor = $registry->get_adaptor($species, 'core', 'slice');
  my $mca = $registry->get_adaptor( $species, 'Core', 'MetaContainer' );
  my $tla = $registry->get_adaptor( $species, 'Core', 'Translation' );

  my $dba = $registry->get_DBAdaptor( $species, 'Core' );
  my $dbc = $dba->dbc();
  my $sql_helper = $dbc->sql_helper();

  ################################################################
  # Get all Top Level slice
  &RSAT::message::TimeWarn("Fetching all slices for species",$species) if ($verbose >= 2);
  my @slices = @{$slice_adaptor->fetch_all('toplevel')};

  ## Fix an Ensembl error with the two Y human chromosomes: on the
  ## API, the Human chromosome covers two slices, because a large
  ## fragment (covering 2.6Mb) at the beginning of chromosome Y is
  ## homologous to the X chromosome. The Ensembl dataset presents two
  ## separated slices to avoir redundancy in the annotations. However
  ## this is not compatible with RSAT data model -> we merge them in a
  ## single contig covering the whole Y chromosome.
  ##
  ## In addition, there are apparently some inconsistencies between
  ## some feature annotations and the limits of the Y chromosome
  ## (feature coordinates are provided relative to the chromosome
  ## start, but the slice only starts 2.6Mb further).
  if ($species eq "homo_sapiens") {
      my @slice_tmp = ();
      foreach my $slice (@slices) {
        push (@slice_tmp,$slice) unless ($slice->name() =~ /:Y:/);
      }
      push (@slice_tmp,$slice_adaptor->fetch_by_region( 'chromosome', 'Y' ));
      @slices = @slice_tmp;
  }

  # Get all Top Level slice name
  my %slice_names = ();
  foreach my $slice (@slices) {
    $slice_names{$slice->seq_region_name()} = $slice->name();
  }


  ################################################################
  ## Get genome_dir
  my @fields = split(":",$slices[0]->name());
  $assembly_version = $fields[1];
  $full_species_ID = &Get_full_species_ID($species, $assembly_version,$ensembl_version, $species_suffix);
  &RSAT::message::Info("Full species ID", $full_species_ID) if ($main::verbose >= 2);
  $genome_dir = &Get_genome_dir($species, $assembly_version,$ensembl_version, $species_suffix);
  &RSAT::message::Info("Genome directory", $genome_dir) if ($main::verbose >= 2);
  printf $out ("; %-22s\t%s\n", "Full species ID", $full_species_ID);
  printf $out ("; %-22s\t%s\n", "Genome directory", $genome_dir);
  &RSAT::util::CheckOutDir($genome_dir);

  ################################################################
  ## Open outputfile file
  my %out_stream = &OpenOutputStreams(@features_to_download);

  my $outstream_organism = $out_stream{'organism'};
  my $outstream_organism_names = $out_stream{'organism_names'};
  my $outstream_gene = $out_stream{'gene'};
  my $outstream_gene_names = $out_stream{'gene_names'};
  my $outstream_gene_to_reaction = $out_stream{'gene_to_reaction'};
  my $outstream_exon = $out_stream{'exon'};
  my $outstream_intron = $out_stream{'intron'};
  my $outstream_coding_exon = $out_stream{'coding_exon'};
  my $outstream_cds = $out_stream{'cds'};
  my $outstream_utr = $out_stream{'utr'};
  my $outstream_cds_names = $out_stream{'cds_names'};
  my $outstream_aa_seq = $out_stream{'aa_seq'};

  &Verbose() if ($verbose >= 1);

  ################################################################
  ## Transform transcript feature to download from array to hash
  my %feat_to_download = ();
  foreach (@features_to_download) {
    $feat_to_download{$_} = 1;
  }

  ################################################################
  # Change Ensembl version to Ensembl Genomes version
  my $ftp_ensembl_version = $ensembl_version;
  if (lc($db) eq "ensemblgenomes") {
    &LoadRegistry($registry, $db, $ensembl_version);

    my @dbas = @{ $registry->get_all_DBAdaptors() };
    foreach my $dba (@dbas) {
      if ($dba->species() eq "multi") {
        @fields = split("_",$dba->dbc()->dbname());
        $ftp_ensembl_version = $fields[-2];
      }
    }
  }

  ################################################################
  # Dowloading feature


  ## Organism
  &RSAT::message::TimeWarn("Getting Organism") if ($verbose >= 2);

  my ($c1,$c2) = split(" ",$mca->get_scientific_name());

  print $outstream_organism $mca->get_taxonomy_id(),"\t";
  print $outstream_organism join(";",reverse(@{$mca->get_classification ( )})),";",$c1,";",ucfirst($c2),"\t";
  print $outstream_organism $mca->get_scientific_name(),"\n";


  ## Get Organism Names
  &RSAT::message::TimeWarn("Getting Organism Names") if ($verbose >= 2);

  print $outstream_organism_names $mca->get_taxonomy_id(),"\t",$species,"\t","primary","\n";
  my @alias = @{$registry->get_all_aliases($species)};
  foreach (@alias) {
    print $outstream_organism_names $mca->get_taxonomy_id(),"\t",$_,"\t","alternate","\n";
  }

  ################################################################
  ## By default, we collect features by SQL, much faster than via the
  ## object-oriented API. However we maintain the API access, which is
  ## supposed to be stable.
  ##
  ## SQL queries were kindly provided by Dan STAINES
  ## <dstaines@ebi.ac.uk>.
  ##
  ## The documentation of the SQL schema can be found at
  ## http://www.ensembl.org/info/docs/api/core/core_schema.html

  if ($query_mode eq "sql") {



    ################################################################
    ## Get genes
    &RSAT::message::TimeWarn("Getting genes  (SQL query)") if ($verbose >= 2);

    ## Corrected query sent by Dan Staines 2014-06-13  (LEFT join)
    $sql_query = "select";
    $sql_query .= "    g.stable_id,"; # Field 0
    $sql_query .= "    g.status,"; # Field 1
    $sql_query .= "    g.biotype,"; # Field 2
    $sql_query .= "    xref.display_label,"; # Field 3
    $sql_query .= "    g.seq_region_start,"; # Field 4
    $sql_query .= "    g.seq_region_end,"; # Field 5
    $sql_query .= "    g.seq_region_strand,"; # Field 6
    $sql_query .= "    g.description,"; # Field 7
    $sql_query .= "    s.name"; # Field 8
    $sql_query .= " from gene g";
    $sql_query .= "    LEFT join xref on (g.display_xref_id = xref.xref_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system cs using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where  meta.meta_key='species.production_name'";
    $sql_query .= "    and meta_value=?",


    ## New correction from Dan, sent 2014-10-28
    $sql_query = "select";
    $sql_query .= "    g.stable_id,";
    $sql_query .= "    g.status,";
    $sql_query .= "    g.biotype,";
    $sql_query .= "    xref.display_label,";
    $sql_query .= "    g.seq_region_start,";
    $sql_query .= "    g.seq_region_end,";
    $sql_query .= "    g.seq_region_strand,";
    $sql_query .= "    g.description,";
    $sql_query .= "    s.name";
    $sql_query .= " from gene g";
    $sql_query .= "    LEFT join xref on (g.display_xref_id = xref.xref_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system cs using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where";
    $sql_query .= "    meta.meta_key='species.production_name'";
    $sql_query .= "    and meta.meta_value=?";

    &RSAT::message::Info("\tSQL query", $sql_query) if (($dry_run) || ($verbose >= 3)); ## Report SQL query for info

    unless ($dry_run) {
      my $gene_list = $sql_helper->execute(
	-SQL => $sql_query,
	-CALLBACK => sub {
	  my @row = @{ shift @_ };
	  return { 
	    gene_id => $row[0], 
	    gene_status => $row[1], 
	    gene_biotype => $row[2], 
	    gene_external_name => $row[3] || $row[0], 
	    gene_start => $row[4], 
	    gene_end => $row[5], 
	    gene_strand => $row[6], 
	    gene_description => $row[7],
	    seq_name => $row[8]
	  };
	},
	-PARAMS => [$species]
	  );
      
      ## Report number of genes
      my $gene_nb = scalar(@{$gene_list});
      &RSAT::message::TimeWarn("\tgot", $gene_nb, "genes") if ($main::verbose >= 2);

      ## Print gene information
      while ( my $row = shift @{$gene_list} ) {
	my %row = %{$row};
	next unless ( $slice_names{ $row{'seq_name'} });
	print $outstream_gene (join("\t", 
				    $row{'gene_id'},
				    $row{'gene_status'},
				    $row{'gene_biotype'},
				    $row{'gene_external_name'},
				    $slice_names{ $row{'seq_name'} },
				    $row{'gene_start'},
				    $row{'gene_end'},
				    &PrintStrand($row{'gene_strand'}),
				    &PrintDescription($row{'gene_description'})
			       ),"\n");
      }
      $gene_list = "";
    }

    ################################################################
    ## Get gene names
    &RSAT::message::TimeWarn("Getting gene names (SQL query)") if ($verbose >= 2);

    $sql_query = "select";
    $sql_query .= "    g.stable_id,";
    $sql_query .= "    e1.db_name,";
    $sql_query .= "    x1.dbprimary_acc,";
    $sql_query .= "    x1.display_label,";
    $sql_query .= "    s.name";
    $sql_query .= " from gene g";
    $sql_query .= "    LEFT join object_xref ox1 on (g.gene_id=ox1.ensembl_id and ox1.ensembl_object_type='Gene')";
    $sql_query .= "    join xref x1 on (x1.xref_id=ox1.xref_id)";
    $sql_query .= "    join external_db e1 on (e1.external_db_id=x1.external_db_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where meta.meta_key='species.production_name'";
    $sql_query .= "    and meta_value=?";
    
    &RSAT::message::Info("\tSQL query", $sql_query) if (($dry_run) || ($verbose >= 3)); ## Report SQL query for info

    unless ($dry_run) {
      my $gene_names_list = $sql_helper->execute(
	-SQL      => $sql_query,
	-CALLBACK => sub {
	  my @row = @{ shift @_ };
	  return { gene_id => $row[0], 
		   source => $row[1], 
		   synonym1 => $row[2], 
		   synonym2 => $row[3], 
		 seq_name => $row[4]
	  };
	},
	-PARAMS => [$species]
	  );
      
      ## Report number of gene names
      my $gene_names_nb = scalar(@{$gene_names_list});
      &RSAT::message::TimeWarn("\tgot", $gene_names_nb, "gene names") if ($main::verbose >= 2);
      
      ## Print gene name information
      while ( my $row = shift @{$gene_names_list} ) {
	my %row = %{$row};
	my %printed_name = ();
	next unless ( $slice_names{ $row{'seq_name'} });
	
	print $outstream_gene_names (join("\t", 
					  $row{'gene_id'},
					  $row{'gene_id'},
					  'primary'
				     ),"\n");
	$printed_name{ $row{'gene_id'} } = 1;
	
	unless ( $printed_name{ $row{'synonym1'} }) {
	  print $outstream_gene_names (join("\t", 
					    $row{'gene_id'},
					    $row{'synonym1'},
					    'alternate',
					    $row{'source'}
				       ),"\n");
	  $printed_name{ $row{'synonym1'} } = 1;
	}
	
	unless ( $printed_name{ $row{'synonym2'} }) {
	  print $outstream_gene_names (join("\t", 
					    $row{'gene_id'},
					    $row{'synonym2'},
					    'alternate',$row{'source'}
				       ),"\n");
	}
      }
      $gene_names_list = "";
    }

    ################################################################
    ## Get gene-reaction relationships
    &RSAT::message::TimeWarn("Getting gene-reaction relationships  (SQL query)") if ($verbose >= 2);

    $sql_query = "select";
    $sql_query .= "     g.stable_id,";
    $sql_query .= "     gx.display_label,";
    $sql_query .= "     g.description,";
    $sql_query .= "     e1.db_display_name,";
    $sql_query .= "     x1.display_label";
    $sql_query .= " from gene g";
    $sql_query .= "    LEFT join xref gx on (g.display_xref_id=gx.xref_id)";
    $sql_query .= "    join transcript tr using (gene_id)  join translation tl using (transcript_id)";
    $sql_query .= "    join object_xref ox1 on (tl.translation_id=ox1.ensembl_id and ox1.ensembl_object_type='Translation')";
    $sql_query .= "    join xref x1 on (x1.xref_id=ox1.xref_id)  join external_db e1 on (e1.external_db_id=x1.external_db_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where meta.meta_key='species.production_name'";
    $sql_query .= "    and meta_value=?";
    $sql_query .= "    and e1.db_name in ('Uniprot/SWISSPROT','Uniprot/SPTREMBL','IntEnz','Rhea')",

    &RSAT::message::Info("\tSQL query", $sql_query) if (($dry_run) || ($verbose >= 3)); ## Report SQL query for info

    unless ($dry_run) {
      my $gene_reaction_list = $sql_helper->execute(
	-SQL      => $sql_query,
	-CALLBACK => sub {
	  my @row = @{ shift @_ };
	  return { 
	    gene_id => $row[0], 
	    gene_external_name => $row[1] || $row[0],
	    gene_description => $row[2], 
	    db => $row[3], 
	    db_label => $row[4] 
	  };
	},
	-PARAMS => [$species]
	  );
      
      ## Report number of gene-reaction relationships
      my $gene_reaction_nb =  scalar(@{$gene_reaction_list});
      &RSAT::message::TimeWarn("\tgot", $gene_reaction_nb, "gene-reaction relationships") if ($main::verbose >= 2);
      
      ## Print gene-reaction relationships
      while (my $row = shift @{$gene_reaction_list}) {
	my %row = %{$row};
	#      next unless ( $row{'db_label'} =~ /\./);
	print $outstream_gene_to_reaction join ("\t",  $row{'gene_id'},
						$row{'gene_external_name'},
						&PrintDescription($row{'gene_description'}),
						$row{'db'},
						$row{'db_label'}),"\n";
      }
    }

    ################################################################
    ## Get transcripts
    &RSAT::message::TimeWarn("Getting transcripts (SQL query)") if ($verbose >= 2);

    $sql_query = "select";
    $sql_query .= "    g.stable_id,";
    $sql_query .= "    g.description,";
    $sql_query .= "    tr.stable_id,";
    $sql_query .= "    tr.status,";
    $sql_query .= "    tr.biotype,";
    $sql_query .= "    xref.display_label,";
    $sql_query .= "    tr.seq_region_start,";
    $sql_query .= "    tr.seq_region_end,";
    $sql_query .= "    tr.seq_region_strand,";
    $sql_query .= "    s.name";
    $sql_query .= " from gene g";
    $sql_query .= "    join transcript tr using (gene_id)";
    $sql_query .= "    LEFT join xref on (g.display_xref_id = xref.xref_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system cs using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where meta.meta_key='species.production_name'";
    $sql_query .= "       and meta_value=?";


    &RSAT::message::Info("\tSQL query", $sql_query) if (($dry_run) || ($verbose >= 3)); ## Report SQL query for info

    unless ($dry_run) {
      my $transcript_list = $sql_helper->execute(
	-SQL      => $sql_query,
	-CALLBACK => sub {
	  my @row = @{ shift @_ };
	  return {
	    gene_id => $row[0], 
	    gene_description => $row[1],
	    transcript_id => $row[2],
	    transcript_status => $row[3],
	    transcript_biotype => $row[4],
	    transcript_external_name => $row[5] || $row[2], 
	    transcript_start => $row[6],
	    transcript_end => $row[7],
	    transcript_strand => $row[8],
	    seq_name => $row[9] };
	},
	-PARAMS => [$species]
	  );

      ## Report number of transcripts
      my $transcript_nb = scalar(@{$transcript_list});
      &RSAT::message::TimeWarn("\tgot", $transcript_nb, "transcripts") if ($main::verbose >= 2);

      ## Print transcript information
      while ( my $row = shift @{$transcript_list} ) {
	my %row = %{$row};
	next unless ( $slice_names{ $row{'seq_name'} });
	next unless ($feat_to_download{$row{'transcript_biotype'}});

	my $outstream_transcript = $out_stream{$row{'transcript_biotype'}};

	print $outstream_transcript $row{'transcript_id'},"\t",$row{'transcript_status'},"\t",&PrintTrBiotype($row{'transcript_biotype'}),"\t",$row{'transcript_external_name'},"\t";
	print $outstream_transcript $slice_names{ $row{'seq_name'} },"\t"; #slice name
	print $outstream_transcript $row{'transcript_start'},"\t",$row{'transcript_end'},"\t",&PrintStrand($row{'transcript_strand'}),"\t";
	print $outstream_transcript &PrintDescription($row{'gene_description'}),"\t",$row{'gene_id'},"\n";
      }
      $transcript_list = ();
    }

    ################################################################
    ## Transcript names
    &RSAT::message::TimeWarn("Getting transcript names (SQL query)") if ($verbose >= 2);

    $sql_query = "select";
    $sql_query .= "    tr.stable_id,";
    $sql_query .= "    tr.biotype,";
    $sql_query .= "    g.stable_id,";
    $sql_query .= "    e1.db_name,";
    $sql_query .= "    x1.dbprimary_acc,";
    $sql_query .= "    x1.display_label,";
    $sql_query .= "    s.name";
    $sql_query .= " from gene g";
    $sql_query .= "    LEFT join transcript tr using (gene_id)";
    $sql_query .= "    join object_xref ox1 on (tr.transcript_id=ox1.ensembl_id and ox1.ensembl_object_type='Transcript')";
    $sql_query .= "    join xref x1 on (x1.xref_id=ox1.xref_id)";
    $sql_query .= "    join external_db e1 on (e1.external_db_id=x1.external_db_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where  meta.meta_key='species.production_name'";
    $sql_query .= "    and meta_value=?";


    &RSAT::message::Info("\tSQL query", $sql_query) if (($dry_run) || ($verbose >= 3)); ## Report SQL query for info

    unless ($dry_run) {
      my $transcript_names_list = $sql_helper->execute(
	-SQL      => $sql_query,
	-CALLBACK => sub {
	  my @row = @{ shift @_ };
	  return { transcript_id => $row[0],
		   transcript_biotype => $row[1],
		   gene_id => $row[2],
		   source => $row[3],
		   synonym1 => $row[4],
		   synonym2 => $row[5],
		   seq_name => $row[6] };
	},
	-PARAMS => [$species] );

      ## Report number of transcript names
      my $transcript_names_nb = scalar(@{$transcript_names_list});
      &RSAT::message::TimeWarn("\tgot", $transcript_names_nb, "transcript names") if ($main::verbose >= 2);

      ## Print transcript name information
      while ( my $row = shift @{$transcript_names_list} ) {
	my %row = %{$row};
	my %printed_name = ();
	next unless ( $slice_names{ $row{'seq_name'} });
	next unless ($feat_to_download{$row{'transcript_biotype'}});

	my $outstream_transcript_names = $out_stream{$row{'transcript_biotype'}."_names"};

	print $outstream_transcript_names $row{'transcript_id'},"\t",$row{'transcript_id'},"\t";
	print $outstream_transcript_names 'primary',"\t",'transcript_id',"\n";
	$printed_name{ $row{'transcript_id'} } = 1;

	unless ( $printed_name{ $row{'gene_id'} }) {
	  print $outstream_transcript_names $row{'transcript_id'},"\t",$row{'gene_id'},"\t";
	  print $outstream_transcript_names 'alternate',"\t",'gene_id',"\n";
	  $printed_name{ $row{'gene_id'} } = 1;
	}

	unless ( $printed_name{ $row{'synonym1'} }) {
	  print $outstream_transcript_names $row{'transcript_id'},"\t",$row{'synonym1'},"\t";
	  print $outstream_transcript_names 'alternate',"\t",$row{'source'},"\n";
	  $printed_name{ $row{'synonym1'} } = 1;
	}

	unless ( $printed_name{ $row{'synonym2'} }) {
	  print $outstream_transcript_names $row{'transcript_id'},"\t",$row{'synonym2'},"\t";
	  print $outstream_transcript_names 'alternate',"\t",$row{'source'},"\n";
	}
      }
      $transcript_names_list = ();
    }

    ################################################################
    ## Getting exons and introns
    &RSAT::message::TimeWarn("Getting exons and introns (SQL query)") if ($verbose >= 2);

    $sql_query = "select";
    $sql_query .= "    g.stable_id,";
    $sql_query .= "    tr.stable_id,";
    $sql_query .= "    tr.biotype,";
    $sql_query .= "    ex.stable_id,";
    $sql_query .= "    ex.seq_region_start,";
    $sql_query .= "    ex.seq_region_end,";
    $sql_query .= "    ex.seq_region_strand,";
    $sql_query .= "    s.name,";
    $sql_query .= "    xref.display_label";
    $sql_query .= " from gene g";
    $sql_query .= "    join transcript tr using (gene_id)";
    $sql_query .= "    join exon_transcript cod_ex on (tr.transcript_id=cod_ex.transcript_id)";
    $sql_query .= "    join exon ex on (ex.exon_id=cod_ex.exon_id)";
    $sql_query .= "    LEFT join xref on (g.display_xref_id = xref.xref_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system cs using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where  meta.meta_key='species.production_name'";
    $sql_query .= "    and meta_value=?";

    &RSAT::message::Info("\tSQL query", $sql_query) if (($dry_run) || ($verbose >= 3)); ## Report SQL query for info

    unless ($dry_run) {
      my $exon_list = $sql_helper->execute(
	-SQL      => $sql_query,
	-CALLBACK => sub {
	  my @row = @{ shift @_ };
	  return { 
	    gene_id => $row[0],
	    transcript_id => $row[1],
	    transcript_biotype => $row[2],
	    transcript_external_name => $row[8] || $row[1], 
	    exon_id => $row[3],
	    exon_start => $row[4],
	    exon_end => $row[5],
	    exon_strand => $row[6], 
	    seq_name => $row[7]
	  };
	},
	-PARAMS => [$species]
	  );

      ## Report number of exons
      my $exon_nb =  scalar(@{$exon_list});
      &RSAT::message::TimeWarn("\tgot", $exon_nb, "exons") if ($main::verbose >= 2);
      
      ## Print exons
      my $last_row = "";
      my %last_row = ();
      while (1) {
	$last_row = shift @{$exon_list};
	%last_row = %{$last_row};
	last if ( $slice_names{ $last_row{'seq_name'} } && $feat_to_download{$last_row{'transcript_biotype'}} );
      }

      print $outstream_exon join ("\t",
				  $last_row{'exon_id'},
				  "exon",
				  $last_row{'exon_id'},
				  $last_row{'exon_start'},
				  $last_row{'exon_end'},
				  &PrintStrand($last_row{'exon_strand'}),
				  $last_row{'transcript_id'},
				  $last_row{'gene_id'}),"\n";

      my $i = 1;
      while ( my $row = shift @{$exon_list} ) {
	my %row = %{$row};
	next unless ( $slice_names{ $row{'seq_name'} });
	next unless ( $feat_to_download{$row{'transcript_biotype'}} );

	print $outstream_exon join("\t",  $row{'exon_id'},
				   "exon",
				   $row{'exon_id'},
				   $slice_names{$row{'seq_name'}},#slice name,
				   $row{'exon_start'},
				   $row{'exon_end'},
				   &PrintStrand($row{'exon_strand'}),
				   $row{'transcript_id'},
				   $row{'gene_id'}),"\n";

	if ( $last_row{'transcript_id'} eq $row{'transcript_id'}) {
	  print $outstream_intron join("\t", 
				       "INTRON".$row{'transcript_id'},
				       "intron",
				       "INTRON".$row{'transcript_external_name'},
				       $slice_names{ $row{'seq_name'} }, #slice name
				       $last_row{'exon_end'}+1,
				       $row{'exon_start'}-1,
				       &PrintStrand($row{'exon_strand'}),
				       $row{'transcript_id'},
				       $row{'gene_id'}),"\n";
	  $i++;
	} else {
	  $i=1
	}
	
	%last_row = %row;

      }
      $exon_list = ();
    }

    ################################################################
    ## Getting coding exons
    &RSAT::message::TimeWarn("Getting coding exons (SQL query)") if ($verbose >= 2);

    $sql_query = "select";
    $sql_query .= "    g.stable_id,";
    $sql_query .= "    tr.stable_id,";
    $sql_query .= "    tr.biotype,";
    $sql_query .= "    ex.stable_id,";
    $sql_query .= "    ex.seq_region_start,";
    $sql_query .= "    ex.seq_region_end,";
    $sql_query .= "    ex.seq_region_strand,";
    $sql_query .= "    s.name";
    $sql_query .= " from gene g";
    $sql_query .= "    LEFT join transcript tr using (gene_id)";
    $sql_query .= "    join translation tl using (transcript_id)";
    $sql_query .= "    join exon_transcript ex_tran on (tr.transcript_id=ex_tran.transcript_id) ";
    $sql_query .= "    join exon ex on (ex.exon_id=ex_tran.exon_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system cs using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where  meta.meta_key='species.production_name'";
    $sql_query .= "    and meta_value=?";

    &RSAT::message::Info("\tSQL query", $sql_query) if (($dry_run) || ($verbose >= 3)); ## Report SQL query for info

    unless ($dry_run) {
      my $coding_exon_list = $sql_helper->execute(
	-SQL      => $sql_query,
	-CALLBACK => sub {
	  my @row = @{ shift @_ };
	  return {
	    gene_id => $row[0],
	    transcript_id => $row[1],
	    transcript_biotype => $row[2],
	    exon_id => $row[3],
	    exon_start => $row[4],
	    exon_end => $row[5],
	    exon_strand => $row[6], 
	    seq_name => $row[7]
	  };
	},
	-PARAMS => [$species]
	  );

      ## Report number of coding exons
      my $coding_exon_nb =  scalar(@{$coding_exon_list});
      &RSAT::message::TimeWarn("\tgot", $coding_exon_nb, "coding exons") if ($main::verbose >= 2);
      
      ## Print coding exons
      while ( my $row = shift @{$coding_exon_list} ) {
	my %row = %{$row};
	next unless ( $slice_names{ $row{'seq_name'} });
	next unless ( $feat_to_download{$row{'transcript_biotype'}} );

	print $outstream_coding_exon join ("\t", $row{'exon_id'},
					   "coding_exon",
					   $row{'exon_id'},
					   $slice_names{ $row{'seq_name'} },
					   print $outstream_coding_exon $row{'exon_start'},
					   $row{'exon_end'},
					   &PrintStrand($row{'exon_strand'}),
					   $row{'transcript_id'},
					   $row{'gene_id'}),"\n";
      }
      $coding_exon_list = ();
    }


    ################################################################
    ## Get CDS and UTR
    &RSAT::message::TimeWarn("Getting CDSs and UTRs (SQL query)") if ($verbose >= 2);

    $sql_query = "select";
    $sql_query .= "    g.stable_id,"; ## field 0
    $sql_query .= "    g.description,"; ## field 1
    $sql_query .= "    g.display_xref_id,"; ## field 2
    $sql_query .= "    tr.stable_id,"; ## field 3
    $sql_query .= "    tr.status,"; ## field 4
    $sql_query .= "    xref.display_label,"; ## field 5
    $sql_query .= "    tr.seq_region_start,"; ## field 6
    $sql_query .= "    tr.seq_region_end,"; ## field 7
    $sql_query .= "    tr.seq_region_strand,"; ## field 8
    $sql_query .= "    tl.stable_id,"; ## field 9
    $sql_query .= "    tl.seq_start,"; ## field 10
    $sql_query .= "    tl.seq_end,"; ## field 11
    $sql_query .= "    tl.start_exon_id,"; ## field 12
    $sql_query .= "    startex.seq_region_start,"; ## field 13
    $sql_query .= "    startex.seq_region_end,"; ## field 14
    $sql_query .= "    tl.end_exon_id,"; ## field 15
    $sql_query .= "    endex.seq_region_start,"; ## field 16
    $sql_query .= "    endex.seq_region_end,"; ## field 17
    $sql_query .= "    s.name"; ## field 18
    $sql_query .= " from gene g";
    $sql_query .= "    join transcript tr using (gene_id)";
    $sql_query .= "    join translation tl using (transcript_id)";
    $sql_query .= "    join exon startex on (startex.exon_id=tl.start_exon_id)"; ## I do a double join with exons, in order to get separately the start and end exons.
    $sql_query .= "    join exon endex on (endex.exon_id=tl.end_exon_id)";
    $sql_query .= "    LEFT join xref on (g.display_xref_id = xref.xref_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system cs using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where  meta.meta_key='species.production_name'";
    $sql_query .= "    and meta_value=?";

    &RSAT::message::Info("\tSQL query", $sql_query) if (($dry_run) || ($verbose >= 3)); ## Report SQL query for info

    unless ($dry_run) {
      my $cds_utr_list = $sql_helper->execute(
	-SQL      => $sql_query,
	-CALLBACK => sub {
	  my @row = @{ shift @_ };
	  return { 
	    gene_id => $row[0],
	    gene_description => $row[1],
	    gene_name => $row[2] || $row[0],
	    transcript_id => $row[3],
	    transcript_status => $row[4],
	    transcript_external_name => $row[5] || $row[3],
	    transcript_start => $row[6],
	    transcript_end => $row[7],
	    transcript_strand => $row[8],
	    translate_id => $row[9],
	    translate_start => $row[10],
	    translate_end => $row[11],
	    start_exon_id => $row[12],
	    start_exon_start => $row[13],
	    start_exon_end => $row[14],
	    end_exon_id => $row[15],
	    end_exon_start => $row[16],
	    end_exon_end => $row[17],
	    seq_name => $row[18]
	  };
	},
	-PARAMS => [$species]
	  );

      ## Report number of CDS UTRs
      my $cds_utr_nb =  scalar(@{$cds_utr_list});
      &RSAT::message::TimeWarn("\tgot", $cds_utr_nb, "CDS UTRs") if ($main::verbose >= 2);
      
      ## Print CDSs and UTRs
      my $r = 0; ## Row counter
      my $c = 0; ## CDS counter
      while ( my $row = shift @{$cds_utr_list} ) {
	$r++;
	my %row = %{$row};
	next unless ( $slice_names{ $row{'seq_name'} });
	$c++;

	## JvH (2014-06-14): What is this ??? It seems to be a specific patch for a single translation ID. Which genome ? (human ?)
	# if ( $row{'translate_id'} eq 'EFP73577' ) {
	#  print $row{'transcript_start'},"\t",$row{'transcript_end'},"\n";
	#  print $row{'translate_start'},"\t",$row{'translate_end'},"\n";
	#  print "\n";
	#  print $row{'transcript_start'}+$row{'translate_start'}-1,"\t",$row{'transcript_start'}+$row{'translate_end'}-1,"\n";
	#  print $row{'transcript_start'}+($row{'transcript_end'}-$row{'transcript_start'}+$row{'translate_end'}-1),"\t";
	#  print $row{'transcript_end'}-($row{'transcript_start'}-$row{'transcript_start'}+$row{'translate_start'}-1),"\n";
	#  print $row{'transcript_start'}+($row{'transcript_end'}-($row{'transcript_start'}+$row{'translate_end'}-1)),"\t";
	#  print $row{'transcript_end'}-($row{'transcript_start'}-($row{'transcript_start'}+$row{'translate_start'}-1)),"\n";
	# }

	# Excel: =SI(strand=1,tr_start+tl_start-1,tr_end-tl_start+1)

	my ($cds_start, $cds_end, $cds_left, $cds_right);

	# if ($row{'transcript_strand'} == -1 ) {
	#   $cds_start = $cds_right = $row{'transcript_end'} - $row{'translate_start'} + 1;
	#   $cds_end = $cds_left = $row{'transcript_start'} + $row{'translate_end'} - 1;
	# } else {
	#   $cds_start = $cds_left = $row{'transcript_start'} + $row{'translate_start'} - 1;
	#   $cds_end = $cds_right = $row{'transcript_end'} - $row{'translate_end'} + 1;
	# }


	if ($row{'transcript_strand'} == -1 ) {
	  $cds_start = $cds_right = $row{'end_exon_end'} - $row{'translate_start'} + 1;
	  $cds_end = $cds_left = $row{'start_exon_end'} - $row{'translate_end'} + 1;
	} else {
	  $cds_start = $cds_left = $row{'start_exon_start'} + $row{'translate_start'} - 1;
	  $cds_end = $cds_right = $row{'end_exon_start'} + $row{'translate_end'} - 1;
	}

	## JvH DEBUG FOR THE PROBLEM OF CDS START AND STOP
	if ($main::verbose >= 10) {
	  if ($c==1) {
	    &RSAT::message::Debug("row", 
				  "cds",
				  "gene_id",
				  "transcript_id",
				  "translate_id",
				  "tr_start",
				  "tr_end",
				  "tr_strand",
				  "tl_start",
				  "tl_end",
				  "start_exon_id",
				  "start_exon_start",
				  "start_exon_end",
				  "end_exon_id",
				  "end_exon_start",
				  "end_exon_end",
				  "cds_start",
				  "cds_end",
		) if ($main::verbose >= 0);
	  }
	  &RSAT::message::Debug($r,
				$c, 
				$row{'gene_id'},
				$row{'transcript_id'},
				$row{'translate_id'},
				$row{'transcript_start'},
				$row{'transcript_end'},
				$row{'transcript_strand'},
				$row{'translate_start'},
				$row{'translate_end'},
				$row{'start_exon_id'},
				$row{'start_exon_start'},
				$row{'start_exon_end'},
				$row{'end_exon_id'},
				$row{'end_exon_start'},
				$row{'end_exon_end'},
				$cds_start,
				$cds_end,
	      );
	}

	## Print CDS
	print $outstream_cds (join("\t",  
				   $row{'translate_id'}, 
				   $row{'transcript_status'}, 
				   'CDS',
				   $row{'translate_id'}, 
				   $slice_names{$row{'seq_name'}},
				   $cds_left,
				   $cds_right,
				   &PrintStrand($row{'transcript_strand'}),
				   &PrintDescription($row{'gene_description'}),
				   $row{'transcript_id'},
				   $row{'transcript_start'},
				   $row{'transcript_end'},
				   $row{'gene_id'},
				   $row{'gene_name'},
				   $row{'start_exon_id'},
				   $row{'start_exon_start'},
				   $row{'start_exon_end'},
				   $row{'end_exon_id'},
				   $row{'end_exon_start'},
				   $row{'end_exon_end'}),
			      "\n");

	## JvH: THE UTR COORDINATES ARE PROBABLY FALSE I HAVE TO DEBUG THEM

	### UTR
	my $left_utr = "5'UTR";
	my $left_start = $row{'transcript_start'};  #$row{'transcript_start'};
	my $left_end = $cds_start;          #$row{'transcript_start'} + $row{'translate_start'}-2;
	my $right_utr = "3'UTR";
	my $right_start = $cds_end;         #$row{'start_exon_start'} + $row{'translate_end'};
	my $right_end = $row{'transcript_end'};     #$row{'transcript_end'};

	if ( $row{'transcript_strand'} == -1 ) {
	  $left_utr = "3'UTR";
#        $right_start = $row{'transcript_start'};  #$row{'transcript_start'} ;
#        $right_end = $cds_start-1;          #$row{'end_exon_end'} - $row{'translate_end'};
	  $right_utr = "5'UTR";
#        $left_start = $cds_end+1;           #$row{'transcript_end'} - $row{'translate_start'}+2;
#       $left_end = $row{'transcript_end'};       #$row{'transcript_end'};
	}

	if ( $left_start ne $left_end ) {
	  print $outstream_utr join ("\t", 
				     $left_utr."-".$row{'transcript_id'},
				     $left_utr,
				     $left_utr."-".$row{'transcript_external_name'},
				     $slice_names{ $row{'seq_name'} },
				     $left_start,
				     $left_end-1,
				     &PrintStrand($row{'transcript_strand'}),
				     $row{'transcript_id'},
				     $row{'gene_id'}),"\n"; # transcript_id, gene_id
	}

	if ( $right_start ne $right_end ) {
	  print $outstream_utr join ("\t", 
				     $right_utr."-".$row{'transcript_id'},
				     $right_utr,
				     $right_utr."-".$row{'transcript_external_name'},
				     $slice_names{ $row{'seq_name'} },
				     $right_start+1,
				     $right_end,
				     &PrintStrand($row{'transcript_strand'}),
				     $row{'transcript_id'}, 
				     $row{'gene_id'}),"\n";
	}
      }
      $cds_utr_list = ();
    }
#    die "HELLO";

    ################################################################
    ## CDS names
    &RSAT::message::TimeWarn("Getting CDS names (SQL query)") if ($verbose >= 2);

    $sql_query = "select";
    $sql_query .= "    tl.stable_id,";
    $sql_query .= "    tr.stable_id,";
    $sql_query .= "    g.stable_id,";
    $sql_query .= "    e1.db_name,";
    $sql_query .= "    x1.dbprimary_acc,";
    $sql_query .= "    x1.display_label,";
    $sql_query .= "    s.name";
    $sql_query .= " from gene g";
    $sql_query .= "    LEFT join transcript tr using (gene_id)";
    $sql_query .= "    join translation tl using (transcript_id)";
    $sql_query .= "    join object_xref ox1 on (tl.translation_id=ox1.ensembl_id and ox1.ensembl_object_type='Translation')";
    $sql_query .= "    join xref x1 on (x1.xref_id=ox1.xref_id)";
    $sql_query .= "    join external_db e1 on (e1.external_db_id=x1.external_db_id)";
    $sql_query .= "    join seq_region s on (g.seq_region_id=s.seq_region_id)";
    $sql_query .= "    join coord_system using (coord_system_id)";
    $sql_query .= "    join meta using (species_id)";
    $sql_query .= " where  meta.meta_key='species.production_name'";
    $sql_query .= "    and meta_value=?";

    &RSAT::message::Info("\tSQL query", $sql_query) if (($dry_run) || ($verbose >= 3)); ## Report SQL query for info

    unless ($dry_run) {
      my $cds_names_list = $sql_helper->execute(
	-SQL      => $sql_query,
	-CALLBACK => sub {
	  my @row = @{ shift @_ };
	  return { translate_id => $row[0], 
		   transcript_id => $row[1], 
		   gene_id => $row[2], 
		   source => $row[3], 
		   synonym1 => $row[4], 
		   synonym2 => $row[5], 
		   seq_name => $row[6] };
	},
	-PARAMS => [$species] );

      ## Report number of CDS names
      my $cds_names_nb =  scalar(@{$cds_names_list});
      &RSAT::message::TimeWarn("\tgot", $cds_names_nb, "CDS names") if ($main::verbose >= 2);
      
      ## Print CDS names
      while ( my $row = shift @{$cds_names_list} ) {
	my %row = %{$row};
	my %printed_name = ();
	next unless ( $slice_names{ $row{'seq_name'} });

	print $outstream_cds_names join ("\t", 
					 $row{'translate_id'},
					 $row{'translate_id'},
					 'primary',
					 'translate_id'),"\n";
	$printed_name{ $row{'translate_id'} } = 1;

	unless ( $printed_name{ $row{'gene_id'} }) {
	  print $outstream_cds_names join ("\t",  $row{'translate_id'},
					   $row{'gene_id'},
					   'alternate',"\t",'gene_id'),"\n";
	  $printed_name{ $row{'gene_id'} } = 1;
	}

	unless ( $printed_name{ $row{'transcript_id'} }) {
	  print $outstream_cds_names join ("\t",
					   $row{'translate_id'},
					   $row{'transcript_id'},
					   'alternate',
					   'transcript_id'),"\n";
	  $printed_name{ $row{'transcript_id'} } = 1;
	}
	
	unless ( $printed_name{ $row{'synonym1'} }) {
	  print $outstream_cds_names join ("\t", 
					   $row{'transcript_id'},
					   $row{'synonym1'},
					   'alternate',
					   $row{'source'}),"\n";
	  $printed_name{ $row{'synonym1'} } = 1;
	}

	unless ( $printed_name{ $row{'synonym2'} }) {
	  print $outstream_cds_names join ("\t", 
					   $row{'transcript_id'},
					   $row{'synonym2'},
					   'alternate',
					   $row{'source'}),"\n";
	}
      }
      $cds_names_list = ();
    }

    ## Get protein sequences (file [Organism_name]_aa.fasta)
    &RSAT::message::TimeWarn("Getting protein sequences (FTP)") if ($verbose >= 2);

    my $prot_seq_ftp = &Get_pep_fasta_ftp($db,$species,$ftp_ensembl_version);
    &RSAT::util::doit("wget -NL -nv $prot_seq_ftp -P $genome_dir");

    @fields = split(/\//, $prot_seq_ftp);
    my $aa_file = $genome_dir."/".$full_species_ID."_aa.fasta".".gz";
    &RSAT::util::doit("mv $genome_dir/$fields[-1] $aa_file");
    &RSAT::util::doit("gzip -df $aa_file");
  }

  ################################################################
  ## Via API ONLY
  else {

    ## Get all features
    my $s=0;
    my $nb_slices = scalar(@slices);

    foreach my $slice (@slices) {
      $s++;

      ## Gene
      my @genes = @{$slice->get_all_Genes()};
      my $nb_genes = scalar(@genes);

      &RSAT::message::TimeWarn("Getting features for slice", $s."/".$nb_slices, $slice->name(), $nb_genes." genes (API)") if ($verbose >= 2);

      my $g=0;
      while ( my $gene = shift @genes ) {
        $g++;

        if (($max_genes > 0) && ($g > $max_genes)) {
	        &RSAT::message::Warning("Stopping gene names after", $g, "genes");
	        last;
        }

        my $gene_known = "Novel";
        $gene_known = "Known" if ($gene->is_known());

        my $gene_external_name = $gene->external_name();
        $gene_external_name = $gene->display_id() unless ($gene->external_name());

        print $outstream_gene $gene->display_id(),"\t",$gene_known,"\t",$gene->biotype(),"\t",$gene_external_name,"\t";
        print $outstream_gene $slice->name(),"\t",$gene->start(),"\t",$gene->end(),"\t",&PrintStrand($gene->strand()),"\t",&PrintDescription($gene->description),"\n";

        if ( $g%200 == 1 && $verbose >= 3) {
          &RSAT::message::TimeWarn("Getting gene", $gene_external_name, $g."/".$nb_genes);
        }

        ## Gene names
        my %print_name = ();

        my @names_list = @{ $gene->get_all_DBEntries() };
        foreach my $name (@names_list) {
          next if ($print_name{$name->display_id()});

          my $status = "alternate";
          $status = "primary" if ($name->display_id() eq $gene_external_name);

          print $outstream_gene_names $gene->display_id(),"\t",$name->display_id(),"\t";
          print $outstream_gene_names $status,"\t",$name->dbname(),"\n";

          $print_name{$name->display_id()} = 1;
        }

	      unless ($print_name{$gene_external_name}) {
          print $outstream_gene_names $gene->display_id(),"\t",$gene_external_name,"\t";
          print $outstream_gene_names 'primary',"\t",'Gene_name',"\n";
	      }

        ## Transcript
        my $transcripts = $gene->get_all_Transcripts();

        while ( my $transcript = shift @{$transcripts} ) {
          next unless ($feat_to_download{$transcript->biotype()});

        	## Choose the appropriate output stream depending on the biotype of the current transcript
          my $outstream_transcript = $out_stream{$transcript->biotype()};

          ## Define transcript (evidence) type
          my $trans_known = "putative";
          $trans_known = "known" if ($transcript->is_known());

          ## Define external_name for current transcript
          my $trans_external_name = $transcript->external_name() || $transcript->display_id();

          my $coding_s = ".";
          my $coding_e = ".";
          $coding_s = $transcript->coding_region_start() if ($transcript->coding_region_start());
          $coding_e = $transcript->coding_region_end() if ($transcript->coding_region_end());

          print $outstream_transcript $transcript->display_id(),"\t",$trans_known,"\t", &PrintTrBiotype($transcript->biotype()),"\t",$gene_external_name,"\t";
          print $outstream_transcript $transcript->slice()->name(),"\t",$transcript->start(),"\t", $transcript->end(),"\t",&PrintStrand($gene->strand()),"\t";
          print $outstream_transcript $coding_s,"\t",$coding_e,"\t";
          print $outstream_transcript &PrintDescription($gene->description),"\t",$gene->display_id(),"\n";

          ## Transcript names
          my $outstream_transcript_names = $out_stream{$transcript->biotype()."_names"};

          my %print_name = ();
          my @names_list = @{ $transcript->get_all_DBEntries() };
          foreach $name (@names_list) {
            next if ($print_name{$name->display_id()});

            my $status = "alternate";
            $status = "primary" if (($trans_external_name) && ($name->display_id() eq $trans_external_name));

            print $outstream_transcript_names $transcript->display_id(),"\t",$name->display_id(),"\t";
            print $outstream_transcript_names $status,"\t",$name->dbname(),"\n";

            $print_name{$name->display_id()} = 1;
          }

          unless ($print_name{$trans_external_name}) {
            print $outstream_transcript_names $transcript->display_id(),"\t",$trans_external_name,"\t";
            print $outstream_transcript_names 'primary',"\t",'',"\n";
          }

      	  ### Add GeneId as synonym for transcript
      	  print $outstream_transcript_names join("\t",
      						 $transcript->display_id(),
      						 $gene->display_id(),
      						 "alternate",
      						 "Gene_Id"), "\n";

          ### Add gene name as synonym for transcript
      	  print $outstream_transcript_names join("\t",
      						 $transcript->display_id(),
      						 $gene_external_name,
      						 "alternate",
      						 "Gene_name"), "\n";

          ## Exon
          my @exons = @{ $transcript->get_all_Exons() };

          foreach $exon (@exons) {
            my $exon_line = $exon->display_id()."\t"."exon"."\t".$gene_external_name."\t";
            $exon_line .= $exon->start()."\t".$exon->end()."\t".&PrintStrand($gene->strand())."\t";
            $exon_line .= $transcript->display_id()."\t".$gene->display_id()."\n";

            print $outstream_exon $exon_line;

            if ($transcript->coding_region_start() && $transcript->coding_region_end() ) {
              if ($exon->end >= $transcript->coding_region_start() && $exon->start <= $transcript->coding_region_end() ) {
                print $outstream_coding_exon $exon_line;
              }
            }

          }

          ## Intron
          my @introns = @{$transcript->get_all_Introns()};
          my $i = 1;
          foreach $intron (@introns) {
            print $outstream_intron "INTRON".$i."-".$transcript->display_id(),"\t","intron","\t","INTRON".$i."-".$gene_external_name,"\t";
            print $outstream_intron $slice->name(),"\t",$intron->start(),"\t",$intron->end(),"\t",&PrintStrand($gene->strand()),"\t";
            print $outstream_intron $transcript->display_id(),"\t",$gene->display_id(),"\n";
            $i++;
          }

          ## UTR
          my $outstream_utr = $out_stream{'utr'};

          if ($transcript->cdna_coding_start()) {
            if ( $transcript->five_prime_utr_Feature() ) {
              print $outstream_utr "5'UTR-".$transcript->display_id(),"\t","5'UTR","\t","5'UTR-".$gene_external_name,"\t";
              print $outstream_utr $slice->name(),"\t";
              print $outstream_utr $transcript->five_prime_utr_Feature()->start(),"\t",$transcript->five_prime_utr_Feature()->end(),"\t",&PrintStrand($gene->strand()),"\t";
              print $outstream_utr $transcript->display_id(),"\t",$gene->display_id(),"\n";
            }
          }

          if ($transcript->cdna_coding_end()) {
            if ( $transcript->three_prime_utr_Feature() ) {
              print $outstream_utr "3'UTR-".$transcript->display_id(),"\t","3'UTR","\t","3'UTR-".$gene_external_name,"\t";
              print $outstream_utr $slice->name(),"\t",;
              print $outstream_utr $transcript->three_prime_utr_Feature()->start(),"\t",$transcript->three_prime_utr_Feature()->end(),"\t",&PrintStrand($gene->strand()),"\t";
              print $outstream_utr $transcript->display_id(),"\t",$gene->display_id(),"\n";
            }
          }

          ## CDS
          if ($transcript->cdna_coding_start() && $transcript->cdna_coding_end()) {
            $prot = $transcript->translation();
            print $outstream_cds $prot->display_id(),"\t","CDS","\t",$gene_external_name,"\t";
            print $outstream_cds $slice->name(),"\t",$prot->genomic_start(),"\t",$prot->genomic_end(),"\t",&PrintStrand($gene->strand()),"\t";
            print $outstream_cds &PrintDescription($gene->description),"\t",$transcript->display_id(),"\t",$gene->display_id(),"\n";

            ## CDS Name
            %print_name = ();

            my @names_list = @{$prot->get_all_DBEntries()};
            foreach my $name (@names_list) {
              next if ($print_name{$name->display_id()});

              my $status = "alternate";
              $status = "primary" if (($trans_external_name) && ($name->display_id() eq $trans_external_name));

              print $outstream_cds_names $prot->display_id(),"\t",$name->display_id(),"\t";
              print $outstream_cds_names $status,"\t",$name->dbname(),"\n";

              $print_name{$name->display_id()} = 1;
            }

            unless ($print_name{$trans_external_name}) {
              print $outstream_cds_names $prot->display_id(),"\t",$trans_external_name,"\t";
              print $outstream_cds_names 'primary',"\t",'',"\n";
            }

            ## Add GeneId as synonym for CDS
	          print $outstream_cds_names join("\t",
					    $prot->display_id(),
					    $gene->display_id(),
					    "alternate",
					    "Gene_Id"), "\n";

            ## Add gene name as synonym for cds
	          print $outstream_cds_names join("\t",
					    $prot->display_id(),
					    $gene_external_name,
					    "alternate",
					    "Gene_Name"), "\n";

            ## Aminoacid sequences
            if ($prot->seq()) {
              my $outstream_aa_seq = $out_stream{'aa_seq'};
              &PrintNextSequence($outstream_aa_seq, "fasta", 60, $prot->seq(), $prot->display_id());
            }
          }
        }
      }
    }
  }


  ################################################################
  ## Report execution time and close output stream
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
  print $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
  close $out;

  exit(0);
}


################################################################
################### SUBROUTINE DEFINITION ######################
################################################################

################################################################
## Display full help message
sub PrintHelp {
  system "pod2text -c $0";
  exit(0);
}

################################################################
## Display short help message
sub PrintOptions {
  &PrintHelp();
}

################################################################
## Format output
sub PrintStrand {
  my $strand = $_[0];

  if ( $strand == 1) {
    return "D";
  } else {
    return "R";
  }
}

sub PrintTrBiotype {
  my $tr_biotype = $_[0];

  if ( $tr_biotype eq 'protein_coding') {
    return 'mrna';
  } else {
    return $tr_biotype;
  }
}


sub PrintDescription {
  my $description = $_[0];

  if ( $description) {
    return $description;
  } else {
    return "<no descrition>";
  }
}


################################################################
## Verbose message
sub Verbose {
  print $out "; download-ensembl-features ";
  &PrintArguments($out);

  if (%main::outfile) {
    print $out "; Output files\n";
    while (my ($key,$value) = each %main::outfile) {
      printf $out ";\t%-13s\t%s\n", $key, $value;
    }
  }
  printf $out ("; %-22s\t%s\n", "Ensembl safe version", $safe_ensembl_version);
  printf $out ("; %-22s\t%s\n", "Ensembl version",$ensembl_version);
  printf $out ("; %-22s\t%s\n", "Species suffix", $species_suffix) if ($species_suffix);
}

################################################################
## Read arguments
sub ReadArguments {
  my $arg;
  my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
  while (scalar(@arguments) >= 1) {
    $arg = shift (@arguments);

=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
    if ($arg eq "-v") {
      if (&IsNatural($arguments[0])) {
        $main::verbose = shift(@arguments);
      } else {
        $main::verbose = 1;
      }

=pod

=item B<-h>

Display full help message

=cut
    } elsif ($arg eq "-h") {
      &PrintHelp();

=pod

=item B<-help>

Same as -h

=cut
    } elsif ($arg eq "-help") {
      &PrintOptions();

=pod

=item B<-species species_name>

Species that you want download variation (Homo_sapiens, Mus_musculus).

=cut
    } elsif (($arg eq "-species") || ($arg eq "-org")) {
      $main::species = lc(shift(@arguments));

=pod

=item B<-only_api>

Only use API to download.

This option is maintained for the sake of safety, in case the SQL
interface would be modified.  The download of features (genes, CDS,
mRNA, ...) initially relied on Ensembl Perl Application Programmatic
Interface (API). However, this required a lot of time, because the API
required to treat objects sequentially in order to get the dependent
objects (e.g. several thousands of queries to collect the gene
synonyms).

In the current version, we replaced this by a direct extraction of all
the required information by SQL queries. This however assumes that the
schema of the relational database is unchanged. In case of trouble
with changes of the SQL structure, the API should in principle still
allow to download the data.

=cut
    } elsif ($arg eq "-only_api") {
      $main::query_mode = "api";


=pod

=item B<-query_mode>

=cut

    } elsif ($arg eq "-query_mode") {
      $main::query_mode = lc(shift(@arguments));

=pod


=item B<-type #>

Transcript biotype you want to download

Default :

protein_coding
- processed_transcript
- lincRNA
- sRNA
- snRNA
- miRNA
- misc_RNA
- snoRNA

Other biotype :

unprocessed_pseudogene
- processed_pseudogene
- transcribed_processed_pseudogene
- transcribed_unprocessed_pseudogene
- sense_intronic
- nonsense_mediated_decay
- antisense
- IG_V_pseudogene
- retained_intron
- pseudogene
- unitary_pseudogene
- IG_V_gene
- IG_J_gene
- IG_C_gene
- IG_C_pseudogene
- sense_overlapping
- polymorphic_pseudogene


This option can be used iteratively to download multiple types. Example:
   -type IG_V_gene -type IG_J_gene

Multiple types can also be specified separated by commas.
  -type IG_V_gene,IG_J_gene

=cut
    } elsif ($arg eq "-type") {
      my $type = shift(@arguments);
      push @main::features_to_download, split ",", $type;


# =pod
#
# =item B<-dir #>
#
# The directory in wich RSAT genomes must be installed. The selected
# species will be isntalled in a sub-directory composed of Species name
# and Ensembl genome version.
#
# Default : $RSAT/data/genomes/
#
# =cut
#     } elsif ($arg eq "-dir") {
#       $main::data_dir = shift(@arguments);

=pod

=item   B<-o outputfile>

The output file is used to hold a trace of the transfers (verbosity),
and to store the list of species when the option -available_species is
activated.

If no output file is specified, the standard output is used.

=cut
    } elsif ($arg eq "-o") {
      $outfile{output} = shift(@arguments);


=pod

=item B<-db ensembl|ensemblgenomes>

Default: Ensembl

Select the source database. 

=over

=item I<ensembl>

The "historical" Ensembl database (L<http://ensembl.org/>), restricted
to a series of genomes from model organisms (69 supported species on
Oct 30, 2014).

=item I<ensemblgenomes>

The exended EnsemblGenomes database (L<http://ensemblgenomes.org/>),
which comprises repositories for the following taxa: Bacteria
(actually includes Archaea), Fungi, Metazoa, Plants, Protists.

In Oct 2014, EnsemblGenomes supports >15,000 species.

=item I<ensemblall>

Load both Ensembl and Ensembl Genomes.

=back

=cut
   } elsif ($arg eq "-db") {
    $main::db = lc(shift(@arguments));
    unless (($main::db eq "ensembl")
	    || ($main::db eq "ensemblgenomes")
	    || ($main::db eq "ensemblall")
            ) {
	&RSAT::error::FatalError($main::db, "Invalid value for the option -db. Supported: ensembl,ensemblgenomes,ensemblall");
    }

=pod

=item B<-available_species>

Get all available species on Ensembl

=cut
    } elsif ($arg eq "-available_species") {
      $main::get_available_species = 1;

=pod

=item B<-version #>

The release version of ensEMBL.

Supported versions: 70 to 72, safe, latest

Default : I<safe>

=over

=item I<safe>

Some Ensembl API function may change between two Ensembl release.

For this reason, we defined the "safe" version, which corresponds to
the latest version of ensembl which has been checked to work with this
script.

=item latest

This corresponds to the lastest version of Ensembl. Beware: this
version is not guaranteed to be compatible with RSAT, in case Ensembl
would change their file formats or locations.

=cut
    } elsif ($arg eq "-version") {
      my $version = shift(@arguments);
      if ( &IsNatural($version) || $version eq "safe" || $version eq "latest" ) {
        $main::ensembl_version = $version;
      } else {
        &RSAT::error::FatalError($version,"is not supported");
      }

=pod

=item B<-species_suffix>

Suffix to append to the full species ID.

By default, the full species ID is composed by concatenating the
Ensembl species and assembly version. The option I<-species_suffix>
allows to specify a string (e.g. _ensembl76, _for_testing, ...) that
will be appended to the full species ID.

=cut
    } elsif ($arg eq "-species_suffix") {
    $species_suffix = shift(@arguments);


=pod

=item B<-dry>

Dry run: do not execute the SQL queries, but print them out for checking and debugging.

=cut

    } elsif ($arg eq "-dry") {
      $main::dry_run = 1;

    } else {
      &FatalError(join("\t", "Invalid option", $arg));
    }
  }

=pod

=back

=cut

}

################################################################
## Open all outputs
sub PrintHeader {
  my ($file,$time,$class,$table,$type,@col) = @_;
  my $i = 1;

  print $file  "-- dump date","\t",$time,"\n";
  print $file  "-- class","\t","EnsEMBL::".$class,"\n";
  print $file  "-- table","\t",$table,"\n";
  print $file  "-- table","\t",$type,"\n";

  foreach my $field ( @col ) {
    print $file  "-- field ".$i,"\t",$field,"\n";
    $i ++;
  }

  print $file  "-- header","\n";
  print $file  "-- ",join("\t",@col),"\n";
}

sub OpenOutputStreams {
  my (@features_to_download) = @_;
  my %out_stream = ();

  my $time = &AlphaDate();

  ## Organism File
  my $file_name = 'organism';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id taxonomy name));

  ## Organism Name File
  $file_name = 'organism_names';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, 'organism', $file_name, 'lateral', qw(id names status) );

  ## Gene file
  $file_name = 'gene';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id status type name contig start_pos end_pos strand description));

  ## Gene Names file
  $file_name = 'gene_names';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, 'gene', $file_name, 'lateral', qw(id names status source) );

  ## Gene_to_reaction file
  $file_name = 'gene_to_reaction';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id name description source reaction));

  ## Transcripts files & Transcripts Names files
  foreach (@features_to_download) {

    my $file_name = $_;
    $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
    $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
    &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id status type name contig start_pos end_pos coding_start coding_end strand description gene_id) );

    $file_name .= "_names";
    $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
    $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
    &PrintHeader($out_stream{$file_name}, $time, $_, $file_name, 'lateral', qw(id names status source) );
  }

  ## mRNA files
  $outfile{'mrna'} = &Get_feature_file( $species, $assembly_version,$ensembl_version, $species_suffix,'mrna');
  &doit("ln -fs ".$outfile{'protein_coding'}." ".$outfile{'mrna'}, 0, 0, 0);

  ## mRNA Names files
  $outfile{'mrna_names'} = &Get_feature_file( $species, $assembly_version,$ensembl_version, $species_suffix,'mrna_names');
  &doit("ln -fs ".$outfile{'protein_coding_names'}." ".$outfile{'mrna_names'}, 0, 0, 0);

  ## Exon File
  $file_name = 'exon';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id type name contig start_pos end_pos strand transcript_id gene_id) );

  ## Coding Exon File
  $file_name = 'coding_exon';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id type name contig start_pos end_pos strand transcript_id gene_id) );

  ## Intron file
  $file_name = 'intron';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id type name contig start_pos end_pos strand transcript_id gene_id) );

  ## UTR files
  $file_name = 'utr';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id type name contig start_pos end_pos strand transcript_id gene_id) );

  ## CDS file
  $file_name = 'cds';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});

  ## TEMPORARY: for debugging, I add columns to the SQL output file
  if ($query_mode eq "sql") {
    &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id status type name contig start_pos end_pos strand description transcript_id transcript_start transcript_end gene_id gene_name start_exon_id start_exon_start start_exon_end end_exon_id end_exon_start end_exon_end) );
  } else {
    &PrintHeader($out_stream{$file_name}, $time, $file_name, $file_name, 'main', qw(id status type name contig start_pos end_pos strand description transcriptID GeneID) );
  }

  ## CDS Names files
  $file_name = 'cds_names';
  $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
  $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  &PrintHeader($out_stream{$file_name}, $time, 'cds', $file_name, 'lateral', qw(id names status source) );

  ## AA seq
  if ($query_mode eq "api") {
    $file_name = 'aa_seq';
    $outfile{$file_name} = &Get_feature_file($species, $assembly_version,$ensembl_version, $species_suffix,$file_name);
    $out_stream{$file_name} = &OpenOutputFile($outfile{$file_name});
  }

  return %out_stream;
}

