#!/usr/bin/perl -w
############################################################
#
# $Id: random-graph,v 1.39 2010/12/10 09:53:23 rsat Exp $
#
############################################################

## use strict;

=pod

=head1 NAME

random-graph

=head1 DESCRIPTION

Generate random graphs either from an existing graph or from scratch
according to different randomization procedure.

=head1 AUTHORS

Sylvain Brohee <sbrohee\@ulb.ac.be>
Jacques van Helden <Jacques.van.Helden@ulb.ac.be>

=head1 CATEGORY

Graph utilities

=head1 USAGE
    
random-graph [-node #] [-edge #] [-o outputfile] [-n #] [-self] [-duplicated] [-nodefile] [-n] [-random_type scratch|node_degree|node_degree_distrib] [-i input_graph_file] [-scol #] [-tcol #] [-wcol #] [-col_conservation] [-iter #]


=head1 INPUT FORMAT

A graph (in TAB or GML format) to be shuffled or whose nodes have to
be used to create a random graph from scratch.

A list of nodes (-nodefile) may also be given as input file.

=head1 OUTPUT FORMAT

Graph in GML, TAB or DOT format (to be specified by the -out_format
option)

=cut


BEGIN {
    if ($0 =~ /([^(\/)]+)$/) {
	push (@INC, "$`lib/");
    }
}
require "RSA.lib";
require "RSA.stat.lib";
require RSAT::Graph2;


################################################################
## Main package
package main;
{

  ################################################################
  ## Initialise parameters
  local $start_time = &RSAT::util::StartScript();


  %main::infile = ();
  %main::outfile = ();

  $main::verbose = 0;
  $main::out = STDOUT;

  ## Input formats
  local $input_format = "tab";
  %supported_input_format =(
			    tab=>1,
			    gml=>1
			   );
  $supported_input_formats = join (",", keys %supported_input_format);
  local $source_col = 1;
  local $target_col = 2;
  local $weight_col = 0;

  ## Output formats
  local $output_format = "tab";
  %supported_output_format =(
			     dot=>1,
			     gml=>1,
			     tab=>1
			       
			    );
  $supported_output_formats = join (",", keys %supported_output_format);

  # number of required nodes
  $req_nodes = 0;
    
  # number of required edges
  $req_edges = 0;

  # do not allow self loops
  $self = 0;
    
  # maximum degree
  $max_degree = -1;
    
  # duplicated edges
  $duplicated = 0;
    
  # directed edges
  $directed = 0;
  $bidirectional = 0;
    
  # source and target nodes stay connected in the random graph
  $col_conservation = 0;
    
  # nodes and weights
  @nodes = ();
  @source_nodes = ();
  @target_nodes = ();
  @weights = ();

  # number of random graphs to create (by default = 1)
  $n = 1;

  # mean value of the weight of the edges
  $mean = "null";

  # sd deviation of the weight of the edges
  $sd = "null";

  #number of iteration
  $iter = 300;

  # randomization type
  $random_type = "";

  # normal
  $normal = 0;

  # single
  $single = 0;

  ## Node prefix
  $node_prefix = "";

  ## Edge prefix
  $edge_prefix = "";

  %supported_random_type =(
			   scratch=>1,
			   node_degree=>1, 
			   node_degree_distrib=>1,
			   ER=>1
			  );

  $supported_random_type = join (",", keys %supported_random_type);        

  ################################################################
  ## Read argument values
  &ReadArguments();

  ################################################################
  ## Check argument values
  if ($req_nodes > 0 && defined($main::infile{nodefile})) {
    &RSAT::error::FatalError("When creating a graph from scratch with a set of nodes, you must not specify the number of requested nodes");
  }
  if ($req_nodes > 0 && ($random_type ne "scratch")) {
    &RSAT::error::FatalError("Option -nodes is only valid for random graphs of type \"scratch\" (option -random_type scratch).");
  }    
  if ($req_nodes == 0 && !defined($main::infile{nodefile}) && !$main::infile{input}) {
    &RSAT::error::FatalError("When using no starting graph, you must specify the number of requested nodes (-nodes)");
  }
  if ($main::infile{input} && ($random_type eq "scratch")) {
    &RSAT::error::FatalError("You must not specify an input file when generating a graph from scratch. Instead, specify the nodes (-nodef option) or the number of nodes and of edges (-nodes or -edges options)");
  }
  if ($main::infile{input} && !-e $main::infile{input}) {
    &RSAT::error::FatalError("File", $main::infile{input}, "does not exist");
  }
  if ($n > 1 && !$main::outfile{output}) {
    &RSAT::error::FatalError("You must specify an prefix for the output files when requiring more than one output random graph");
  }
  if ($main::infile{nodefile} && !-e $main::infile{nodefile}) {
    &RSAT::error::FatalError("File", $main::infile{nodefile}, "does not exist");
  }
  if ($main::infile{nodefile} && $main::infile{input}) {
    &RSAT::error::FatalError("Cannot specify both a file containing node names and a graph file");
  }
  if ($sd ne 'null' && $mean ne 'null' && ($random_type eq "node_degree" || $random_type eq "node_degree_distrib")) {
    &RSAT::error::FatalError("You must not specify randomization type", $random_type, "with -sd and -mean option");
  }
  if (($sd ne 'null' && $mean eq 'null') || ($sd eq 'null' && $mean ne 'null')) {
    &RSAT::error::FatalError("You must specify both standard deviation and mean value for the edge weight");
  }

  if (($iter != 300) && ($random_type ne "node_degree")) {
    &RSAT::error::Warning("\t", "Option -iter is only useful with node_degree randomization type");
  }

  if ($random_type eq "") {
    &RSAT::error::FatalError("You must specify a randomization type.");
  } elsif (($random_type eq "node_degree" || $random_type eq "node_degree_distrib" || $random_type eq "ER") && !exists($main::infile{input})) {
    &RSAT::error::FatalError("You must specify an input graph when using randomization type", $random_type);
  } elsif (($random_type eq "node_degree" || $random_type eq "node_degree_distrib") && $max_degree > 1) {
    &RSAT::error::FatalError("Option -max_degree are not compatible with randomization type", $random_type);
  } elsif (($random_type ne "ER" &&  $random_type ne "scratch") && $single != 0) {
    &RSAT::error::FatalError("Option -no_single cannot be used with randomization type ", $random_type);
  }
  if ($random_type ne "ER" && $col_conservation) {
    &RSAT::error::FatalError("Option -col_conservation must be used with randomization ER (-random_type ER option)");
  }

  ################################################################
  ## Read an input graph if available
  my $graph = new RSAT::Graph2();
  if ($main::infile{input}) {
    $graph->graph_from_text($input_format,$main::infile{input}, $source_col, $target_col, $weight_col);
    @arcs =  $graph->get_attribute("arcs");
    my %node_set = ();
    my %target_node_set = ();
    my %source_node_set = ();
    # si on prend l'option avec tous les noeuds
    # si on prend l'option avec les noeuds source restent source et target restent target
    for (my $i = 0; $i < scalar(@arcs); $i++) {
      $node_set{$arcs[$i][0]}++;
      $node_set{$arcs[$i][1]}++;
      $source_node_set{$arcs[$i][0]}++;
      $target_node_set{$arcs[$i][1]}++;
      push @weights, $arcs[$i][2];
    }
    @nodes = keys %node_set;
    @source_nodes = keys %source_node_set;
    @target_nodes = keys %target_node_set;
  }

  ################################################################
  ## Read the nodes of the node file
  if ($infile{nodefile}) {
    my $l = 0;
    &RSAT::message::TimeWarn("Reading nodes from file", $infile{nodefile}) if ($main::verbose >= 2);
    my ($node_handle) = &OpenInputFile($infile{nodefile});
    while (my $line = <$node_handle>) {
      $l++;  
      next if ($line =~ /^\#/);	  ## Skip header lines
      next if ($line =~ /^--/);	  ## Skip comment lines
      next if ($line =~ /^;/);	  ## Skip comment lines
      next unless ($line =~ /\S/); ## Skip empty lines
      chomp($line);
      my @fields = split /\s+/, $line;
      my $node =  $fields[0];
      if ($node) {
	push @nodes, $node;
      } else {
	&RSAT::message::Warning("Line", $l, "starts with a space. Skipped.");
      }
    }
    close $node_handle;
    $req_nodes = scalar @nodes;
  }
    
  ############################################################################
  ## 
  if ($random_type eq 'ER') {
    ## Count the number of edges and nodes for an erdos-renyi randomization
    ($req_nodes, $req_edges) = $graph->properties();
    ## Calculate the properties of the weights distribution
    my ($real, $mean, $sd, $min, $max) = $graph->weight_properties();
    if ($normal && $mean eq "null" && $sd eq "null" && !$real) {
      my $error_message =  "Cannot compute the mean and standard deviation of the edges : edge weights contain\n\tat least one non real value"."\n";
      $error_message .=  "\t"."Please check the edges value or enter a mean or sd value manually (-mean # -sd # options)"."\n";
      &RSAT::error::FatalError($error_message);
    }
    &RSAT::message::Info("Mean weight of the edges",$mean,"with deviation",$sd) if ($main::verbose >= 2);
  }

  ################################################################
  $out_file_name = $main::outfile{output};

  ################################################################
  ## Create the random graph(s)
  for (my $i = 1; $i <= $n; $i++) {
    &RSAT::message::psWarn("\t","Creating random graph",$i,"on",$n) if ($main::verbose >= 2);
    ## Create output file name if more than one random graph is requested
    if ($n > 1) {
      $out_file_name = $main::outfile{output};
      $out_file_name.= "_".$i.".".$output_format;
      $node_prefix = "g".$i."_";
    }
    ## Open output stream
    $main::out = &OpenOutputFile($out_file_name);
    if ($output_format eq "tab") {
      $main::verbose_out = $main::out;
    } else {
      $main::verbose_out = STDOUT;
    }
    if ($random_type eq 'scratch' || $random_type eq 'ER') {
      my $nodes_ref = \@nodes;
      my $source_nodes_ref = \@source_nodes;
      my $target_nodes_ref = \@target_nodes;
      my $weights_ref = \@weights;
      $rdm = $graph->create_random_graph($nodes_ref, $req_nodes, $req_edges, $self, $duplicated, $directed, $max_degree, $single, $mean, $sd, $normal, $col_conservation, $weights_ref, $source_nodes_ref, $target_nodes_ref, $node_prefix, $edge_prefix);
    } elsif ($random_type eq 'node_degree') {
      $rdm = $graph->randomize($self, $duplicated, $directed, $iter);
    } elsif ($random_type eq 'node_degree_distrib') {
      $rdm = $graph->random_graph_degree_distrib();
    }
    ## Print verbose
    &Verbose() if ($main::verbose);
    ## Print output
    print $out $rdm->to_text($output_format);


    $rdm->empty_graph();
  }

  ################################################################
  ## Report execution time
  my $exec_time = &RSAT::util::ReportExecutionTime($start_time); ## This has to be exectuted by all scripts
  warn $exec_time if ($main::verbose >= 1); ## only report exec time if verbosity is specified
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################


################################################################
## Display full help message 
sub PrintHelp {
    system "pod2text -c $0";
    exit()
}

################################################################
## Display short help message
sub PrintOptions {
    &PrintHelp();
}

################################################################
## Read arguments 
sub ReadArguments {
    my $arg;
    my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
    while (scalar(@arguments) >= 1) {
      $arg = shift (@arguments);
	## Verbosity
=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
	if ($arg eq "-v") {
	    if (&IsNatural($arguments[0])) {
		$main::verbose = shift(@arguments);
	    } else {
		$main::verbose = 1;
	    }

	    ## Help message
=pod

=item B<-h>

Display full help message

=cut
	} elsif ($arg eq "-h") {
	    &PrintHelp();

	    ## List of options
=pod

=item B<-help>

Same as -h

=cut
	} elsif ($arg eq "-help") {
	    &PrintOptions();

	    ## Input file
=pod

=item B<-i inputfile>

File containing an input graph. The graph is loaded and randomized
according to various random models.

Alternatively, the program can be used without input file, by
specifying the number of nodes (option -nodes) and edges (option
-edge).

=cut
	} elsif ($arg eq "-i") {
	    $main::infile{input} = shift(@arguments);


          ## Number of nodes
=pod

=item B<-nodes #>

Number of requested nodes.

This option is incompatible with the option -i.

=cut
	} elsif ($arg eq "-nodes") {
	    $req_nodes = shift (@arguments);
	    unless (&IsNatural($req_nodes) && ($req_nodes > 0)) {
		&RSAT::error::FatalError(join("\t", $req_nodes, "Invalid value for the number of requested nodes. Must be a strictly positive natural number"));
	    }



	    ##  File containing the node names
=pod

=item B<-nodefile #>

Name of a file containing node names. This file must be in the
tab-delimited format.  The first word of each line will be considered
as a node name.

=cut
	} elsif ($arg eq "-nodefile") {
          $main::infile{nodefile} = shift(@arguments);

	    ##  number of edges
=pod

=item B<-edges #>

Number of requested edges.

=cut
	} elsif ($arg eq "-edges") {
	    $req_edges = shift (@arguments);
	    unless (&IsNatural($req_edges) && ($req_edges > 0)) {
		&RSAT::error::FatalError(join("\t", $req_edges, "Invalid value for the number of requested edges. Must be a strictly positive natural number"));
	    }

	    ## number of random graphs to create, by default = 1
=pod

=item B<-n #>

Number of random graphs to create. By default, n = 1. If n is > 1,
then you must specify an output prefix for the output files (-o
option). In this case, the name of the files will be
prefix_graph_number.[tab|gml|dot] according to the output format.

=cut
	} elsif ($arg eq "-n") {
	    $main::n = shift(@arguments);
	    unless (&IsNatural($n) && ($n > 0)) {
		&RSAT::error::FatalError(join("\t", $n, "Invalid value for the number of random-graphs. Must be a strictly positive natural number > 0"));
	    }
	    
	    ## Output file
=pod

=item	B<-o outputfile>

If no output file is specified, the standard output is used.  This
allows to use the command within a pipe. In case, more than one random graph is 
required, you must specify a prefix for the output files.

=cut
	} elsif ($arg eq "-o") {
	    $main::outfile{output} = shift(@arguments);
	    
	    ### Input format  
=pod

=item B<-input_format input_format>

Input format. Supported: tab, gml. By default, the output format is
tab.

=cut
	} elsif ($arg eq "-in_format") {
	    $input_format = shift(@arguments);
	    &RSAT::error::FatalError("$input_format\tInvalid input format. Supported: $supported_input_formats")
		unless ($supported_input_format{$input_format});
=pod

=item B<-random_type >

Randomization type.

Supported: scratch, node_degree, node_degree_distrib, ER


From B<scratch>, builds a graph totally at random. You may specify the
nodes names either by indicating an input graph file (-i option) or a
file containing nodes name (-nodefile option). The edges are generated
using the Erdos-Renyi distribution. You may compute edge weight values
by specifying a mean and a standard deviation. Weight will the be
attributed according to the normal distribution.

B<ER> (Erdos-Renyii randomization), corresponds to the randomization of a
input graph, keeping the nodes and the number of edges but changing
its characteristics (node degree, clustering coefficient, ...). If you
specify a weight column for the input graph, the weights of the output
graph will be computed according to these values following the normal
distribution (considering the weight distribution of the original
graph). If nor the weight column is specified nor the wished mean and
standard deviation of the weights of the edges of the output graph, no
weight will be added.

With the node_degree randomization type, each node will keep the same
degree that in the input graph (edge randomization). Shuffling the
edges in a graph often create duplicate edges, this is why re-shuffles
the duplicated edges a given number of times. This number of time can
be specified with the -iter option. So even if the -duplicate option
is not specified, there might be some remaining duplicated edges but
this number is minimized by random-graph.

With the node_degree_distrib randomization type, the global
distribution of node degree will remain the same as in the input graph
but the deree of each individual will be changed.


=cut
	} elsif ($arg eq "-random_type") {
	    $random_type = shift(@arguments);
	    &RSAT::error::FatalError("$input_format\tInvalid randomization type. Supported: $supported_random_type")
		unless ($supported_random_type{$random_type});

	    ### Output format  

=item B<-out_format output_format>

Output format. Supported: tab, dot, gml. By default, the output format
is tab.

=cut 
	} elsif ($arg eq "-out_format") {
	    $output_format = shift(@arguments);
	    &RSAT::error::FatalError("$output_format\tInvalid output format. Supported: $supported_output_formats")
		unless ($supported_output_format{$output_format});


	    ## Source column
=pod

=item B<-scol>

Source column. Column containing the source nodes. 

=cut
	} elsif ($arg eq "-scol") {
	    $source_col = shift (@arguments);
	    unless (&IsNatural($source_col) && ($source_col > 0)) {
		&RSAT::error::FatalError(join("\t", $source_col, "Invalid value for the source column. Must be a strictly positive natural number"));
	    }

	    ## Target column
=pod

=item B<-tcol>

Target column. Column containing the target nodes. 

=cut
	} elsif ($arg eq "-tcol") {
	    $target_col = shift (@arguments);
	    unless (&IsNatural($target_col) && ($target_col > 0)) {
		&RSAT::error::FatalError(join("\t", $target_col, "Invalid value for the target column. Must be a strictly positive natural number"));
	    }

	    ## Weight column
=pod

=item B<-wcol>

Weight column. Column containing the weight nodes. 

=cut
	} elsif ($arg eq "-wcol") {
	    $weight_col = shift (@arguments);
	    unless (&IsNatural($weight_col) && ($weight_col > 0)) {
		&RSAT::error::FatalError(join("\t", $weight_col, "Invalid value for the weight column. Must be a strictly positive natural number"));
	    }	    


	    ## Mean value of the weights on arcs
=pod

=item B<-mean #>

Mean value on the weight of the arcs.
This argument can only be used with the scratch and ER randomization type and must be combined with the -sd option.

=cut
	} elsif ($arg eq "-mean") {
	    $main::mean = shift (@arguments);
	    unless (&IsReal($mean)) {
		&RSAT::error::FatalError(join("\t", $mean, "Invalid value for mean weight of the edge"));
	    }
	    ## File containing nodes name
=pod

=item B<-sd #>

Standard deviation value of the weight of the arcs.
This argument can only be used with the scratch and ER randomization type and must be combined with the -mean option.

=cut
	} elsif ($arg eq "-sd") {
	    $main::sd = shift (@arguments);
	    unless (&IsReal($sd)) {
		&RSAT::error::FatalError(join("\t", $sd, "Invalid value for mean standard deviation of the edge"));
	    }   

=pod

=item B<-iter #>

Number of iterations for the randomization procedure (default = 300).
This option can only be used with the node degree randomisation type.

=cut
	} elsif ($arg eq "-iter") {
	    $main::iter = shift (@arguments);
	    unless (&IsInteger($iter)) {
		&RSAT::error::FatalError(join("\t", $sd, "Invalid value for the number of iterations of the node_degree randomization procedure"));
	    } 
	    ##  maximal degree of the nodes
=pod

=item B<-degree #>

Maximal degree of the nodes random graph (ER and scratch randomization type only).  For a real Erdos-Renyi randomization, you must not use this option. 

=cut
	} elsif ($arg eq "-degree") {
	    $max_degree = shift (@arguments);
	    unless (&IsNatural($max_degree) && ($max_degree > 0)) {
		&RSAT::error::FatalError(join("\t", $max_degree, "Invalid value for the number of requested nodes. Must be a strictly positive natural number"));
	    }	
	    	   ## single option
=pod

=item B<-no_single #>

Prevent the ER graph from containing nodes with no neighbour. For a real Erdos-Renyi randomization, you must not use this option. 

=cut
	} elsif ($arg eq "-no_single") {
	    $single = 1;

=pod

=item B<-directed>

Specifies whether the edges must be considered as directed, i.e., an edge 
from node A to node B is different from an edge from B to A (by default, edges are not directed).

=cut
	    	    
	} elsif ($arg eq "-directed") {
	    $directed = 1;

=pod

=item B<-bidirectional>

Export each edge in both directions. This amounts to explicitly create
an indirected network.

=cut
	    	    
	} elsif ($arg eq "-bidirectional") {
	    $bidirectional = 1;

	   
=pod

=item B<-normal>

This option can only be used with ER randomization type and if the input graph is weighted.
Using this option will generate randomly the weight of the output random graph according to a normal distribution of 
weights. The mean and standard deviation can then be chosen (-mean and -sd option) or will be calculated according to the 
weights the input graph. 

=cut
	    	    
	} elsif ($arg eq "-normal") {
	    $normal = 1;
	    
=pod

=item B<-duplicate>

Specifies whether more than one edge may link two nodes. For a real Erdos-Renyi randomization, you must use this option. However, by default, duplicated edges are not allowed.

=cut
	    	    
	} elsif ($arg eq "-duplicate") {
	    $duplicated = 1;
=pod

=item B<-col_conservation>

Only compatible with ER randomization of a graph. Source and target nodes stay source and target nodes in the randomized graph. For a real Erdos-Renyi randomization, you must not use this option. 

=cut
	    	    
	} elsif ($arg eq "-col_conservation") {
	    $col_conservation = 1;
=pod

=item B<-self>

Allows self loops (by default, self loops are not allowed). For a real Erdos-Renyi randomization, you must use this option. However, by default, self loops are not allowed.


=cut	    
	} elsif ($arg eq "-self") {
	    $main::self = 1;
	    

	} else {
	    &FatalError(join("\t", "Invalid option", $arg));

	}

     }
=pod

=back

=cut
}


################################################################
#### verbose message
sub Verbose {
    print $main::verbose_out "; random-graph ";
    &PrintArguments($main::verbose_out);
    if ((%main::infile)) {
	print $main::verbose_out "; Input files\n";
	while (my ($key,$value) = each %main::infile) {
	    print $main::verbose_out ";\t$key\t$value\n";
	}
    }
    if ((%main::outfile)) {
	print $main::verbose_out "; Output files\n";
	while (my ($key,$value) = each %main::outfile) {
	    print $main::verbose_out ";\t$key\t$value\n";
	}
    }
}

__END__

=pod

=head1 SEE ALSO

=over

=item I<convert-graph>

=item I<graph-get-clusters>

=item I<graph-node-degree>

=item I<compare-graphs>

=item I<graph-neighbours>

=back

=cut
