#!/usr/bin/perl -w
# workflow_nature_protocols.pl

use File::Path;
use MIME::Base64;
use SOAP::WSDL;

use lib 'RSATWS';
use MyInterfaces::RSATWebServices::RSATWSPortType;

## Service call
my $soap=MyInterfaces::RSATWebServices::RSATWSPortType->new();

# $soap->get_transport()->timeout(1000); ## Uncomment and adapt this line if you encounter timeout errors (default value of 180 sec is usually enough)

################################################################
## Specify the fixed parameters for this analysis

## File containing the gene clusters
my $cluster_file = "Harbison_2004_sig1_selection.fam";

## Directory where the results will be stored
my $result_dir = "results/sig1_selection";
&File::Path::mkpath($result_dir);
die ("Cannot create result directory ", $result_dir) unless (-d $result_dir);

## Options for retrieve-seq
my $organism = 'Saccharomyces_cerevisiae'; ## Query organism
my $noorf = 1; ## Prevent overlap with upstream sequences

## Options for oligo-analysis (pattern discovery)
my $length = 6; ## oligonucleotide length
my $background = 'upstream-noorf'; ## background model
my $stats = 'occ,proba,rank'; ## fields to return
my $noov = 1; ## Prevent counting mutually overlapping occurrences
my $str = 2; ## Count oligonucleotides on both strands
my $sort = 1; ## Sort oligonucleotides by significance rather than alphabetically
my @lth = ('occ_sig 0'); ## Lower threshold on significance

## Options for dna-pattern (pattern matching)
my $origin = '-0'; ## Use the end of the sequence as reference for positions
my $score_column = 8; ## Use the 8th column as matching score 

## Options for convert-features
my $feature_in = 'dnapat'; ## Input format (dna-pattern result file)
my $feature_out = 'ft'; ## Output format (feature-map input file)

## Options for feature-map
my $legend = 1; ## Draw a legend box
my $scalebar = 1; ## Draw a scale bar
my $scalestep = 50; ## Spacing between steps on the scale bar
my $map_from = -800; ## Left limit for the feature map + scale bar
my $map_to = 0; ## Right limit for the feature map + scale bar
my $scorethick = 1; ## Set box thickness proportional pattern score
my $map_format = 'png'; ## Output format for images

################################################################
## Read clusters from the file


################################################################
## Read the gene clusters
my %clusters = ();
open INPUT, $cluster_file or die "Can't open cluster file: $cluster_file\n";
while (my $line = <INPUT>) {
  chomp $line;
  next if ($line =~ /^#/); ## Skip header lines
  next if ($line =~ /^;/); ## Skip comment lines
  next unless ($line =~  /\S/); ## Skip empty lines
  my ($gene, $cluster) = split /\t/, $line;
  next unless (($gene) && ($cluster)); ## skip lines where either the gene or the cluster is not defined
  push @{$clusters{$cluster}}, $gene;
}
close INPUT;

## Report the number of gene clusters
&PrintTime(join("", "Finished reading ", scalar(keys %clusters), " gene clusters"));

################################################################
## Run the analysis for each cluster
foreach my $cluster (sort keys %clusters) {
  &Analysis($cluster, @{$clusters{$cluster}});  ## analyse the current cluster
}

&PrintTime("ALL CLUSTERS HAVE BEEN ANALYZED");

exit(0);
################################################################
## A small subroutine to print the current time
sub PrintTime {
  my ($message) = @_;
  my ($sec, $min, $hour,$day,$month,$year) = localtime(time()); 
  warn (sprintf "%02d/%02d/%02d %02d:%02d:%02d\t%s\n", 1900+$year,$month+1,$day,$hour, $min, $sec, $message);
}


################################################################
## Subroutine definition
## Run the analysis for a single gene cluster
sub Analysis {
  my ($cluster, @genes) = @_;
  &PrintTime(join "", "Starting to analyze cluster ", $cluster, " (", scalar(@genes), " genes)");

  ################
  ## Retrieve upstream sequences
  my $output_choice = 'server'; ## the result will stay in a file on the server

  ## Arguments/parameters to send to the server
  my %args = (
	      'output' => $output_choice,
	      'organism' => $organism,
	      'query' => \@genes,
	      'noorf' => $noorf
	     );

  ## Send the retrieve-seq request to the server
  my $som = $soap->retrieve_seq({'request' => \%args});

  ## Get the name of the sequence file on the server to use it in next step
  my $sequence_file;
  if ($som){
      my $results = $som->get_response;
      $sequence_file = $results->get_server();
      chomp ($sequence_file);
  } else {
      printf "A fault (%s) occured: %s\n", $som->get_faultcode, $som->get_faultstring;
  }
  warn "\tRemote upstream sequence file\t", $sequence_file, "\n";

  ####################
  ## Purge sequences to discard redundant fragments

  ## Arguments/parameters to send to the server
  %args = (
	   'output' => $output_choice,
	   'tmp_infile' => $sequence_file
	  );

  ## Send the request to the server
  $som = $soap -> purge_seq({'request' => \%args});

  ## Get the name of the purged sequence file on the server to use it in next step
  my $purged_sequence_file;
  if ($som){
    my $results = $som->get_response();
    $purged_sequence_file = $results->get_server();
    chomp($purged_sequence_file);
  } else {
      printf "A fault (%s) occured: %s\n", $som->get_faultcode, $som->get_faultstring;
  }
  warn "\tRemote purged sequence file\t", $purged_sequence_file, "\n";

  ####################
  ## oligo-analysis part
  $output_choice = 'both'; ## the result will stay in a file on the server and will be sent back to the client

  ## Arguments/parameters to send to the server
  %args = (
	   'output' => $output_choice,
	   'tmp_infile' => $purged_sequence_file,
	   'length' => $length,
	   'organism' => $organism,
	   'background' => $background,
	   'stats' => $stats,
	   'noov' => $noov,
	   'str' => $str,
	   'sort' => $sort,
	   'lth' => \@lth,
	   'verbosity' => 1
	  );

  ##  name of the local result file for the oligo-analysis results
  my $client_oligo_file = $result_dir."/".$cluster."_oligos.tab";
  my $server_oligo_file; ## The name of the remote oligo-analysis file will be returned by the server
  my $result; ## The content of the oligo-analysis result

  ## Send the request to the server
  $som = $soap->oligo_analysis({'request' => \%args});
  if ($som){
      my $results = $som->get_response();
      $server_oligo_file = $results->get_server();
      chomp($server_oligo_file);
      $result = $results->get_client();
      open OUTPUT_OLIGO, ">$client_oligo_file" or die "Can't open output file: $!\n";
      print OUTPUT_OLIGO $result;
      close OUTPUT_OLIGO;
      warn "\tRemote oligo-analysis result\t", $server_oligo_file, "\n";
      warn "\tLocal oligo-analysis result\t", $client_oligo_file, "\n";
  } else {
      printf "A fault (%s) occured: %s\n", $som->get_faultcode, $som->get_faultstring;
  }

  ################################################################
  ## Do the rest only if there are discovered patterns
  unless ($result =~ /\n[acgt]/) {
    warn "\tNot a single significant motif for cluster", $cluster, "\n";
    return;
  }

  ####################
  ## pattern assembly
  $output_choice = 'client'; ## the pattern-assembly result is only for storin on the server

  ## Arguments/parameters to send to the server
  %args = (
	   'output' => $output_choice,
	   'tmp_infile' => $server_oligo_file,
	   'verbosity' => 1,
	   'str' => $str,
	   'subst' => 1,
	  );

  ##  name of the local file for the pattern-assembly results
  my $client_assembly_file = $result_dir."/".$cluster."_oligos.asmb";

  ## Send the request to the server
  $som = $soap->pattern_assembly({'request' => \%args});
  if ($som){
      my $results = $som->get_response();
      my $result = $results->get_client();
      open OUTPUT_ASSEMBLY, ">$client_assembly_file" or die "Can't open output file: $!\n";
      print OUTPUT_ASSEMBLY $result;
      close OUTPUT_ASSEMBLY;
      warn "\tLocal pattern-assembly result\t", $client_assembly_file, "\n";
  } else {
      printf "A fault (%s) occured: %s\n", $som->get_faultcode, $som->get_faultstring;
  }
  ################################################################
  ## Run dna-pattern to detect matching positions
  $output_choice = 'server'; ## the result will stay in a file on the server

  ## Arguments/parameters to send to the server
  %args = ('output' => $output_choice,
	   'tmp_infile' => $sequence_file,
	   'tmp_pattern_file' => $server_oligo_file,
	   'origin' => $origin,
	   'str' => $str,
	   'sort' => $sort,
	   'return' => 'sites,limits',
	   'score' => $score_column
	  );

  ## Send the request to the server
  $som = $soap->dna_pattern({'request' => \%args});

  ## name of the result file on the server to use it in next step
  my $server_dnapat_file;
  if ($som) {
      my $results = $som->get_response();
      $server_dnapat_file = $results->get_server();
      chomp($server_dnapat_file);
      warn "\tRemote dna-pattern result\t", $server_dnapat_file, "\n";
  } else {
      printf "A fault (%s) occured: %s\n", $som->get_faultcode, $som->get_faultstring;
  }

  ####################
  ## convert features from dna-pattern to feature-map format

  ## Arguments/parameters to send to the server
  %args = ('output' => $output_choice,
	   'tmp_infile' => $server_dnapat_file,
	   'from' => $feature_in,
	   'to' => $feature_out
	  );

  ## Send the request to the server
  $som = $soap->convert_features({'request' => \%args});

  ## Get the name of the result file on the server to use it in next step
  my $feature_file;
  if ($som) {
      my $results = $som->get_response();
      $feature_file = $results->get_server();
      chomp($feature_file);
      warn "\tRemote feature file\t", $feature_file, "\n";
  } else {
      printf "A fault (%s) occured: %s\n", $som->get_faultcode, $som->get_faultstring;
  }

  ################################################################
  ## Draw the feature-map
  $output_choice = 'client'; ## the result will be sent back to the client

  ## Arguments/parameters to send to the server for the feature-map
  %args = (
	   'output' => $output_choice,
	   'title' => join("", $cluster, " (", scalar(@genes), " genes)"),
	   'tmp_infile' => $feature_file,
	   'tmp_sequence_file' => $sequence_file,
	   'legend' => $legend,
	   'scalebar' => $scalebar,
	   'scalestep' => $scalestep,
	   'scorethick' => $scorethick,
	   'from' => $map_from,
	   'to' => $map_to,
	   'format' => $map_format
	  );

  ## Send the request to the server
  $som = $soap->feature_map({'request' => \%args});

  ## Get the result and print it to a file
  if ($som) {
      my $results = $som->get_response();
      my $result = $results->get_client();
      my $client_map_file = $result_dir."/".$cluster."_map.".$map_format;
      open OUTPUT_MAP, ">$client_map_file" or die "Can't open output file: $!\n";
      binmode OUTPUT_MAP;
      print OUTPUT_MAP decode_base64($result);
      close OUTPUT_MAP;
      chomp($client_map_file);
      warn "\tLocal feature-map file\t", $client_map_file, "\n";
  } else {
      printf "A fault (%s) occured: %s\n", $som->get_faultcode, $som->get_faultstring;
  }
}
