#!/usr/bin/perl -w
# get-all-ensembl-human-seqs

use strict;

my $dbversion = '61';
my @organisms = ('homo_sapiens');
my $upstream_length = 2000;


foreach my $org (@organisms) {
  my $organism = ucfirst($org);
  my @chromosomes = ('1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','20','21','22','X','Y');

  print ";INFO: retrieving sequences for organism $organism\n";

  `mkdir $organism`;

  ###
  ### Retrieve-ensembl-seq parameters
  ###

  ### Feature types
  foreach my $_feattype ('mRNA', 'CDS', 'intron', 'firstintron', 'utr') {

    my $feattype;

    ## Type
    my $type;
    my $from;
    my $to;
    if (($_feattype eq 'mRNA') || ($_feattype eq 'CDS')) {
      $type = 'upstream'; ## The -type option value; other example:'-type downstream'
      $from = - $upstream_length;		## Start position of the sequence
      $to = -1;			## End position of the sequence
    } else {
      $type = '';
      $from = '';
      $to = '';
    }

    ## First intron
    my $firstintron = '';
    if ($_feattype eq 'firstintron') {
      $firstintron = '-firstintron';
      $feattype = 'intron';
    } else {
      $feattype = $_feattype;
    }

    #		my $noorf = '-noorf';
    my $noorf = '';

    my $maskcoding = '-maskcoding';
    #		my $maskcoding = '';

    ### Repeats masked or not
    foreach my $rm ('', '-rm') {
      ## retrieve sequences for each chromosome
      foreach my $chrom (@chromosomes) {
	print ";INFO: retrieving sequences from chromosome $chrom\n";

	my $file_name = $organism."_chrom".$chrom."_".$type."_".$_feattype.$from.$to.$maskcoding.$rm.$noorf.".fasta";
	$file_name =~ s/__/_/;

	print ";INFO: Saving result to file $file_name\n";

	my $command = "retrieve-ensembl-seq.pl -dbversion $dbversion -org $organism -all -chrom $chrom -from $from -to $to -feattype $feattype -type $type $firstintron $maskcoding $rm $noorf -alltranscripts -uniqseqs -o $organism/$file_name";
	`$command`;
      }

      ## Concatenate all chromosome files
      my $generic_file_name = $organism."_chrom*_".$type."_".$_feattype.$from.$to.$maskcoding.$rm.$noorf.".fasta";
      $generic_file_name =~ s/__/_/;
      my $concatenation_file_name = $organism."_".$type."_".$_feattype.$from.$to.$maskcoding.$rm.$noorf.".fasta";
      $concatenation_file_name =~ s/__/_/;
      my $cat_command = "cat $organism/$generic_file_name > $organism/$concatenation_file_name";
      `$cat_command`;

    }				## rm
  }				## feattypes
}				## organisms
