#!/usr/bin/env perl

############################################################
#
# $Id: retrieve-matrix,v 1.0 2017/10/19 14:27:53 thnguyen $
#
# Time-stamp: <2003-08-05 10:24:33 jvanheld>
#
############################################################

=pod

=head1 NAME

retrieve-matrix

=head1 DESCRIPTION

The program extract specific matrices from a collection of matrices. It takes as input a file containing a collection of matrices in TRANSFAC format. and a list of identifiers to retrieve.
The program outputs each matrix in TRANSFAC format.

=head1 AUTHORS

=over

=item Thi-Thuy-Nga NGUYEN <thnguyen@biologie.ens.fr>

=item Morgane Thomas-Chollier <mthomas@biologie.ens.fr>

=back

=head1 CATEGORY

util
matrix

=head1 USAGE

retrieve-matrix -i collection_file -id identifier(s) [-o outputfile] [-v #] [...]

=head1 INPUT FORMAT

The collection of matrices must be in TRANSFAC format. The program convert-matrix can be used to obtain this format.


=head1 OUTPUT FORMAT
#}
Matrices are exported in TRANSFAC format. The format can then be changed with the program convert-matrix.

=head1 EXAMPLES

retrieve-matrix -i $ENV{RSAT}/public_html/motif_databases/RSAT_nonredundant_vertebrates_2017.tf -id cluster_1,cluster_100 -o result.txt

will return the matrices with the accessions: cluster_1 and cluster_100 from the collection RSAT non-redundant vertebrates (2017).

=cut

if ($0 =~ /([^(\/)]+)$/) {
push (@INC, "$`lib/");
}
require "RSA.lib";
require "RSA.seq.lib";

#use experimental 'smartmatch';

## Initialize parameters
local $start_time = &RSAT::util::StartScript();

my $outputfile = "";
my $collection_file = "";
my $id = "";
my $id_file = "";

&ReadArguments();

if(!$collection_file){
    &RSAT::error::FatalError("You must specify the path to a collection file (-i)");
}
#if(!$id || !$id_file){
   &RSAT::error::FatalError("You must specify either a list of identifiers (-id), or the path to a file of identifiers list (-id_file)") unless ($id || $id_file) ;
#}
my @ids;
if ($id ne "") {
  @ids = split(",", $id);
} elsif($id_file ne "") {
  open(my $fh, "<", $id_file) or die "Cannot open file $id_file";
  while(my $row = <$fh>) {
    chomp $row;
    push @ids, $row;
  }
}

################################################################################
############################### check parameters ###############################
################################################################################

&RSAT::message::TimeWarn("Checking parameters") if ($main::verbose >= 2);

open(my $fh, "<", $collection_file) or die "Cannot open file $!";
my $result_matrix = "";
while(my $row = <$fh>){
    chomp $row;
    if($row =~ /^AC\s+/){
        @f = split(/\s+/, $row);
        if($f[1] ~~ @ids){
            $isprint = 1;
            $result_matrix .= $row . "\n";
        }else{
            $isprint = 0;
        }
    }elsif($isprint == 1) {
        $result_matrix .= $row . "\n";
    }
}

$out = &OpenOutputFile($outputfile);
print $out $result_matrix;

&Verbose() if ($verbose);
exit(0);

################################################################
#### subroutine definitions
################################################################

################################################################
## Display full help message
sub PrintHelp {
    system "pod2text -c $0";
    exit()
}

################################################################
## Display short help message
sub PrintShortHelp {
open(HELP, "| more");
print HELP<<End_short_help;
retrieve-matrix options
--------------------
-i		collection file path
-id		matrix identifier(s), can be a list of accessions separated with commas
-id_file    matrix identifier(s) file which contains a list of accessions (one per line)
-o		outputfile.
End_short_help
close HELP;
exit;
}


################################################################
## Read arguments
sub ReadArguments {
    foreach $a (0..$#ARGV) {

=pod

=head1 OPTIONS

=over 4

=item B<-o>

the path to the output file

=cut
        ### output file name
        if ($ARGV[$a] eq "-o") {
            $outputfile = $ARGV[$a+1];

=pod

=item B<-i collection file>

the path to the collection file which contains the matrices.

=cut
            ### input collection file
        } elsif ($ARGV[$a] eq "-i") {
            $collection_file = $ARGV[$a+1];

=pod

=item B<-h>

Display full help message

=cut
            ### help request
        } elsif ($ARGV[$a] eq "-h") {
            &PrintHelp;
=pod

=item B<-help>

Display concise help message

=cut
        } elsif ($ARGV[$a] eq "-help") {
            &PrintShortHelp;
=pod
            die "BOOM".$id_file;

=item B<-id identifiers>

list of the accession of each matrix in request, separated by commas.

=cut
            ### input identifiers
        } elsif ($ARGV[$a] eq "-id") {
            $id = $ARGV[$a+1];
=pod

=item B<-id_file identifiers file>

the path to the identifiers file which contains the list of the accession
of each matrix in request, one per line.

=cut

            ### input identifiers file
        } elsif ($ARGV[$a] eq "-id_file") {
            $id_file = $ARGV[$a+1];


=pod

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=back

=cut
		### verbose
        } elsif ($ARGV[$a] eq "-v") {
            if (&IsNatural($ARGV[$a+1])) {
                $verbose = $ARGV[$a+1];
            } else {
                $verbose = 1;
            }
        }
    }
}


################################################################
#### verbose ####
sub Verbose {
    print $out "; retrieve-matrix";
    &PrintArguments($out);
    printf $out "; %-14s\t%s\n", "collection", $collection_file;
    if ($id) {
        printf $out "; %-14s\t%s\n", "identifiers list", $id;
    }elsif($id_file){
        printf $out "; %-14s\t%s\n", "identifiers list file", $id_file;
    }
    printf $out "; %-14s\t%s\n", "output file file", $outputfile if ($outputfile);
    print $out ";\n";
}

__END__

=pod

=head1 SEE ALSO

=over

=item convert-matrix

=back

=cut
