#!/usr/bin/perl
if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
}
require "RSA.lib";

### initialization
$start_time = &RSAT::util::StartScript();
$rev = 0;

#### default parameters
$col{'ec'} = 1;
$col{'gene'} = 0;

&ReadArguments();

################################################################
#### check arguments

#### selected organism
&RSAT::error::FatalError ("You should specify an organism\n")
    unless ($organism_name);

#### EC file
$ec_file = $supported_organism{$organism_name}->{'data'}."/genome/cds_ec_number.tab";
#$ec_file = "$ENV{RSAT}/public_html/data/${organism_name}/genome/cds_EC_number_${organism_name}.tab";
&RSAT::error::FatalError("EC file does not exist for organism $organism_name\t$ec_file\n")
    unless (-e $ec_file);

#### load organism data
#### organism
warn "; Checking organism $organism_name\n" if ($verbose >= 1);
&CheckOrganism($organism_name, $feature_table_file);

unless ($feature_table_file) {
    $feature_table_file = $supported_organism{$organism_name}->{'features'};
}
warn "; Reading ORF table for $organism_name\n" if ($verbose >= 1);
&ReadFeatures($organism_name, $feature_table_file, "cds,mrna");
warn "; Reading synonyms for $organism_name\n" if ($verbose >= 1);
&ReadSynonyms($organism_name);



### open output file ###
$out = &OpenOutputFile($outputfile);


################################################################
#### verbose ####
if ($verbose) {
    print $out "; gene2ec ";
    &PrintArguments($out);
    if ($inputfile ne "") {
	print $out ";Input file	$inputfile\n";
    }
    if ($outputfile ne "") {
	print $out ";Output file	$outputfile\n";
    }
    print $out "; Organism\t$organism_name\t$selected_organism\n";
    print $out "; EC file\t${ec_file}\n";
}

################################################################
###### read EC numbers
($ecs) = &OpenInputFile($ec_file);
while (<$ecs>) {
    warn "Reading ECs\n" if ($verbose >= 2);
    next if (/^;/);
    next if (/^--/);
    next if (/^\#/);
    next unless (/\S/);
    chomp;

    @fields = split "\t";
    $ec = $fields[$col{'ec'}];
    $orf_id = $orf_id{uc($fields[$col{'gene'}])};
    
    push @{$ecs{$orf_id}}, $ec;

    warn join ("\t", $ec, $orf_id), "\n" if ($verbose >= 3);

}
close $ecs if $ec_file;



################################################################
##### read query file
($in, $input_dir) = &OpenInputFile($inputfile);
my @queries = ();
while (<$in>) {
    next if (/^;/);
    next unless (/\S/);
    chomp;
    my @fields = split /\s+/;
    my $query = $fields[0];
    push @queries, $query;
    my $orf_id = $orf_id{uc($query)};
    if ($orf_id) {

	print $out ("; Query $query\t$orf_id\n") if ($verbose >=2);
	$query{uc($orf_id)} = 1;
	if ($#{$ecs{$orf_id}} >= 0) {
	    foreach $ec (@{$ecs{$orf_id}}) {
		print $out join ("\t", $ec, @fields), "\n";
		if ($rev) {
		     #### reverse EC for aMAZE path builder (the EC followed by a *)
		    print $out join ("\t", "${ec}*", @fields), "\n";
		}
	    }
	} else {
	    print $out ("; No EC associated with gene $query\n") if ($verbose >=2);
	}

    } else {
	print $out ("; WARNING: Cannot identify gene with name $query\n") if ($verbose >=1);
    }
}
close $in unless ($inputfile eq "");

################################################################
## Report execution time and close output stream
my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
print $main::out $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
close $main::out if ($outputfile);


exit(0);


########################## subroutine definition ############################


sub PrintHelp {
#### display full help message #####
  open HELP, "| more";
  print HELP <<End_of_help;
NAME
	gene2ec

        1999 by Jacques van Helden (jvanheld\@bigre.ulb.ac.be)
	
USAGE
        gene2ec -org organism [-i inputfile] [-o outputfile] [-v]

DESCRIPTION
	Takes as input a list of genes, and returns the list 
	of reactions (EC numbers) catalyzed by their products in 
	the selected organism.

CATEGORY
	genomics

OPTIONS
	-h	(must be first argument) display full help message
	-help	(must be first argument) display options
	-v	verbose
	-i inputfile
		if not specified, the standard input is used.
		This allows to place the command within a pipe.
	-o outputfile
		if not specified, the standard output is used.
		This allows to place the command within a pipe.
	-org organism
		supported organisms :
$supported_organisms
	
INPUT FORMAT
	Each query gene must come as the first 
	word of a new line. 

	Lines starting with a semicolumn (;) are considered
	as comments and skipped. 
	
End_of_help
  close HELP;
  exit;
}

sub PrintOptions {
#### display short help message #####
  open HELP, "| more";
  print HELP <<End_short_help;
gene2ec options
----------------
-h	(must be first argument) display full help message
-help	(must be first argument) display options
-i	input file
-o	output file
-v	verbose
-org	organism

Supported:
$supported_organisms
End_short_help
  close HELP;
  exit;
}


sub ReadArguments {
#### read arguments ####
    foreach $a (0..$#ARGV) {
	### verbose ###
	if ($ARGV[$a] eq "-v") {
	    if (&IsNatural($ARGV[$a+1])) {
		$verbose = $ARGV[$a+1];
	    } else {
		$verbose = 1;
	    }
	    
	    ### detailed help
	} elsif ($ARGV[$a] eq "-h") {
	    &PrintHelp;
	    
	    ### list of options
	} elsif ($ARGV[$a] eq "-help") {
	    &PrintOptions;
	    
	    ### input file ###
	} elsif ($ARGV[$a] eq "-i") {
	    $inputfile = $ARGV[$a+1];
	    
	    ### output file ###
	} elsif ($ARGV[$a] eq "-o") {
	    $outputfile = $ARGV[$a+1];
	    
	    ### return reverse ECs (a * after the EC)
	} elsif ($ARGV[$a] =~ /^-rev/) {
	    $rev = 1;
	    
	    ### organism ###
	} elsif ($ARGV[$a] eq "-org") {
	    $organism_name = $ARGV[$a+1];
	    if ($supported_organism{$organism_name}) {
		$selected_organism = $supported_organism{$organism_name}->{'name'};
	    } else {
		die "Error: organism $org is not supported\n$supported_organisms\n";
	    }
	}
    }
}
