#!/usr/bin/perl -w
############################################################
#
# $Id: parse-footprinter,v 1.9 2011/02/17 04:54:49 rsat Exp $
#
# Time-stamp: <2003-07-04 12:48:55 jvanheld>
#
############################################################
#use strict;;
if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
}
require "RSA.lib";

################################################################
#### initialise parameters
local $start_time = &RSAT::util::StartScript();
my $comment_char = "; ";
my $out_format = "tab";

local %infile = ();
local %outfile = ();

local $verbose = 0;
local $in = STDIN;
local $out = STDOUT;

&ReadArguments();

################################################################
#### check argument values


################################################################
### open output stream
$out = &OpenOutputFile($outfile{output});

################################################################
#### print verbose
&Verbose() if ($verbose);

################################################################
##### read input
($in) = &OpenInputFile($infile{input});
my $id = "";
my $score = "";
while ($line = <$in>) {

    ## Remove some tags to convert HTML to text
    next unless ($line =~ /\S/);
    $line =~ s/\r/\n/g; ## Windows-specific carriage return
    $line =~ s/\n//g;
    $line =~ s/<br>/\n/g;
    $line =~ s/<p>/\n\n/g;
    $line =~ s/&nbsp;*/ /g;
    $line =~ s/<[^<>]*>/ /g;
    $line =~ s/\n +/\n/g;
    $line =~ s/^ +//g;

    ## print the result in text format
    if ($out_format eq "txt") {
	print $out $line;
	next;
    }


    ################################################################
    ## print the result in tab-delimited format (default)

#    warn $line if ($main::verbose >= 10);

    ## Detect the start of a motif description
    if ($line =~ /Motif\s+#(\d+)/) {
	$id = $1;
	warn "; New motif\t$id\n" if ($main::verbose >= 2);
	if ($line =~ /score\:\s+(\S+)/) {
	    $score = $1;
	    warn "Invalid parsimony score\t$score\n" unless (&IsReal($score));
	} 
    } elsif ($id eq "") {
	## Print as comment all the lines preceding the first motif    
	$line =~ s/\n/\n${comment_char}/g;
	$line =~ s/\n${comment_char}$/\n/g;
	print $out $comment_char.$line;
	next;
    } elsif ($line =~ /\S/) {
	my ($organism, $position, $sequence) = split /\s+/, $line;
	
	warn join ("\t", "; new site", $organism, $position, $sequence), "\n" if ($main::verbose >= 3);

	## Print the new site in the output file
	print $out join ("\t", 
			 $id,
			 $score,
			 $organism,
			 $position,
			 $sequence,
			), "\n";

	## echo the remaining text
	warn $line if ($main::verbose >= 10);
	
    }
    

}

close $in if ($infile{input});


################################################################
###### close output stream
my $exec_time = &RSAT::util::ReportExecutionTime($start_time);
print $main::out $exec_time if ($main::verbose >= 1);
close $out if ($outfile{output});


exit(0);


################################################################
################### subroutine definition ######################
################################################################


################################################################
#### display full help message 
sub PrintHelp {
  open HELP, "| more";
  print HELP <<End_of_help;
NAME
	parse-footprinter

        2004 by Jacques van Helden (jvanheld\@bigre.ulb.ac.be)
	
DESCRIPTION
	Parse the motif html file resulting of FootPrinter, and
	exports it either as atext file (just remove the tags) or as a
	tab-delimited file.

CATEGORY
	util

USAGE
        parse-footprinter [-i inputfile] [-o outputfile] [-v]

OPTIONS
	-h	display full help message
	-help	display options
	-v	verbose
	-i inputfile
		if not specified, the standard input is used.
		This allows to place the command within a pipe.
	-o outputfile
		if not specified, the standard output is used.
		This allows to place the command within a pipe.
	-outformat	
		output format. Supported: txt, tab (default)

OUTPUT
   TABLE FORMAT
       The tab-delimited file contains one row per motif instance
       (site). Column contents:
       	   1. motif ID
       	   2. parsimony score
	   3. organism
	   4. site position 
	   5. site sequence
End_of_help
  close HELP;
  exit;
}

################################################################
#### display short help message
sub PrintOptions {
  open HELP, "| more";
  print HELP <<End_short_help;
parse-footprinter options
----------------
-h		(must be first argument) display full help message
-help		(must be first argument) display options
-i		input file
-o		output file
-v		verbose
-outformat	output format. Supported: txt, tab (default)
End_short_help
  close HELP;
  exit;
}


################################################################
#### read arguments 
sub ReadArguments {
    foreach my $a (0..$#ARGV) {
	### verbose  
	if ($ARGV[$a] eq "-v") {
	    if (&IsNatural($ARGV[$a+1])) {
		$verbose = $ARGV[$a+1];
	    } else {
		$verbose = 1;
	    }
	    
	    ### detailed help
	} elsif ($ARGV[$a] eq "-h") {
	    &PrintHelp();
	    
	    ### list of options
	} elsif ($ARGV[$a] eq "-help") {
	    &PrintOptions();
	    
	    ### input file  
	} elsif ($ARGV[$a] eq "-i") {
	    $infile{input} = $ARGV[$a+1];
	    
	    ### output file  
	} elsif ($ARGV[$a] eq "-o") {
	    $outfile{output} = $ARGV[$a+1];
	    
	    ### output format  
	} elsif ($ARGV[$a] eq "-outformat") {
	    $out_format = $ARGV[$a+1];
	    
	}
    }
}

################################################################
#### verbose message
sub Verbose {
    print $out "; parse-footprinter ";
    &PrintArguments($out);
    if (%main::infile) {
	print $out "; Input files\n";
	while (($key,$value) = each %infile) {
	    print $out ";\t$key\t$value\n";
	}
    }
    if (%main::outfile) {
	print $out "; Output files\n";
	while (($key,$value) = each %outfile) {
	    print $out ";\t$key\t$value\n";
	}
    }
}
