#!/usr/bin/perl
if ($0 =~ /([^(\/)]+)$/) {
    push (@INC, "$`lib/");
}
require "RSA.lib";

if ($ARGV[0] eq "-h") {
#### display full help message #####
  open HELP, "| more";
  print HELP <<End_of_help;
NAME
	login-statistics

        1998 by Jacques van Helden (jvanheld\@bigre.ulb.ac.be)
	
USAGE
        login-statistics [-i inputfile] [-o outputfile] [-v]

DESCRIPTION
	Reads a login file and returns statistics about the 
	client domains and the scripts that have been requested. 

CATEGORY
	administration

OPTIONS
        -h      (must be first argument) display full help message
        -help   (must be first argument) display options
	-v	verbose
	-i inputfile
		if not specified, the standard input is used.
		This allows to place the command within a pipe.
	-o outputfile
		if not specified, the standard output is used.
		This allows to place the command within a pipe.
	-convert
		converts old format to the new one
	-server	server name (optional)
	-th #   threshold (do not print domains with less tha # queries)
	
INPUT FORMAT
	The input file must have been generated by the UpdateLogFile 
	command. See this command in RSA.lib.pl for the description of the 
	format.
	
OUTPUT FORMAT
	
	
EXAMPLES
       login-statistics -v -i mydata -o myresult
	
End_of_help
  close HELP;
  exit;
}

if ($ARGV[0] eq "-help") {
#### display short help message #####
  open HELP, "| more";
  print HELP <<End_short_help;
login-statistics options
----------------
-h      (must be first argument) display full help message
-help   (must be first argument) display options
-i      input file
-o      output file
-v      verbose
-th #   threshold (do not print domains with less tha # queries)
-convert	convert old format to the new one
-server	server name
End_short_help
  close HELP;
  exit;
}

#### initialise parameters ####
$start_time = &RSAT::util::StartScript();
$threshold = 100;

$forget_domain{'132.248.34'} = 1;
$forget_domain{'164.15.61'} = 1;
$forget_domain{'193.62.196'} = 1;
#### read arguments ####
foreach $a (0..$#ARGV) {
    ### verbose ###
    if ($ARGV[$a] eq "-v") {
	$verbose = 1;
    ### input file ###
    } elsif ($ARGV[$a] eq "-i") {
	$inputfile = $ARGV[$a+1];
    ### output file ###
    } elsif ($ARGV[$a] eq "-o") {
	$outputfile = $ARGV[$a+1];

    ### convert
    } elsif ($ARGV[$a] =~ /-conv/i) {
        $convert = 1;

    } elsif ($ARGV[$a] =~ /-server/i) {
        $force_server =  $ARGV[$a+1];

    } elsif ($ARGV[$a] =~ /-th/i) {
        $threshold =  $ARGV[$a+1];

    }
}


#### check argument values ####



### open input file ###
($in, $input_dir) = &OpenInputFile($inputfile);

### open output file ###
$out = &OpenOutputFile($outputfile);

#### verbose ####
if ($verbose) {
    print $out ";login-statistics result\n";
    if ($inputfile ne "") {
	print $out ";Input file	$inputfile\n";
    }
    if ($outputfile ne "") {
	print $out ";Output file	$outputfile\n";
    }
}

###### execute the command #########

while ($line = <$in>) {
    $line_nb++;
    next if ($line =~ /^;/);
    next unless ($line =~ /\S/);
    chomp($line);
    @fields = split /\s+/, $line;
    if (($fields[0] =~ /^\d{6}\.\d{6}$/) ||
        ($fields[0] =~ /^\d{4}_\d{2}_\d{2}\.\d{6}$/)) { ### new login format (&AlphaDate)
#print "$line\n";
        $alpha_date = $fields[0];
        $server_name = $fields[1];
        $client_IP = $fields[2];
        $client_hostname = $fields[3];
        $requested_script = $fields[4];
        $email_address = $fields[5];
    } elsif ($line =~ /^(\d{2})\/(\d{2})\/(\d{2}) (\d{2}):(\d{2}):(\d{2}).+(\@[\d\.]+) (\S+)\t([^\t\n]*)\t*([^\t\n]*)/) {
      ### ancient date format looked like 03/11/98 16:23:34 CET
        $alpha_date = "$3$2$1.$4$5$6";
        $client_IP = $7;
        $client_hostname = $8;
        $requested_script = $9;
        $email_address = $10;
        if ($force_server) {
            $server_name = "$force_server";
        } elsif ($inputfile =~ /log-file_(\S+)_\d+/) {
            $server_name = $1;
        } else {
            $server_name = "undefined";
        }
    }
    if ($convert) {
        print $out "$alpha_date\t$server_name\t$client_IP $client_hostname\t$requested_script\n";
    }
    if ($client_IP =~ /\@(\d+)\.(\d+)\.(\d+)\.(\d+)/) {
        $client_domain = "$1.$2.$3";
    }

    #### do not take into account some addresses that are used 
    #### for development and maintenance of the site
    next if ($forget_domain{$client_domain});
    next if ($forget_hostname{$client_hostname});
    next if ($forget_IP{$client_IP});

    #### extract year and month from date
    if ($alpha_date =~ /^(\d{4})_(\d{2})_(\d{2})/) {
        $year=$1;
        $month=$2;
    } elsif ($alpha_date =~ /^(\d{2})(\d{2})_(\d{2})/) {
        $year=1900+$2;
        $month=$3;
    } elsif ($alpha_date =~ /^(\d{2})(\d{2})/) {
        $year=1900+$1;
        $month=$2;
    } else {
        warn "Error at line $line_nb: invalid date format $alpha_date\n";
        next;
    }
    $year += 100 if ($year < 1997);
    $month_nb = 12*$year + $month;
    $month{$month_nb} = "$year/$month";
    if (defined($min_month)) {
        $min_month = &min($month_nb,$min_month);
    } else {
        $min_month = $month_nb;
    }
    $max_month = &max($month_nb,$max_month);

    $requested_script =~ s/.cgi$//;

    #### increment the counters ####
    $month_counter{$month_nb}++;
    $server_counter{$server_name}++;
    $server_counter[$month_nb]{$server_name}++;
    $script_counter{$requested_script}++;
    $script_counter[$month_nb]{$requested_script}++;
    $client_counter{$client_hostname}++;
    $domain_counter{$client_domain}++;
    $client_domain{$client_hostname} = $client_domain;
}


if ($convert) {
    exit(0);
}
###### print output ######

print $out "By server name\n";
print $out "==============\n";
foreach $server_name (sort keys %server_counter) {
    printf $out "$server_counter{$server_name}\t$server_name\n";
}

print $out "\nBy month\n";
print $out "========\n";
foreach $month_nb (sort keys %month_counter) {
    printf $out "%d\t%s\t%d\n", $month_nb - $min_month+1, $month{$month_nb}, $month_counter{$month_nb};
}

print $out "\nBy program\n";
print $out "==========\n";
foreach $script_name (sort {$script_counter{$b} <=> $script_counter{$a}} keys %script_counter) {
    printf $out "$script_counter{$script_name}\t$script_name\n";
}

print $out "\nBy program and month \n";
print $out "====================\n";
print $out "month";
foreach $script_name (sort {$script_counter{$b} <=> $script_counter{$a}} keys %script_counter) {
    print $out "\t$script_name";
}
print $out "\n";
foreach $month_nb (sort keys %month_counter) {
#for $month_nb ($min_month..$max_month) {
#    print $out $month_nb - $min_month+1;
    print $out $month{$month_nb};
    foreach $script_name (sort {$script_counter{$b} <=> $script_counter{$a}} keys %script_counter) {
        if ($script_counter[$month_nb]{$script_name} eq "") {
            $script_counter[$month_nb]{$script_name} = 0;
        }
       printf $out "\t$script_counter[$month_nb]{$script_name}";
    }
    print $out "\n";
}


#### generate a graph with the utilization by month for each script
open GRAPH, "|XYgraph -header -xcol 1 -ycol 2-100 -legend -xgstep 1 -xsize 600 -o by_script_month.gif -lines -pointsize 8 -title1 'yeast-tools' -title2 'utilization statistics' -xleg1 'month' -yleg1 'number of requests' -symbols -ymin 0 -xmin 0";
print GRAPH "month";
foreach $script_name (sort {$script_counter{$b} <=> $script_counter{$a}} keys %script_counter) {
    print GRAPH "\t$script_name";
}
print GRAPH "\n";
for $month_nb ($min_month..$max_month) {
    print GRAPH $month_nb - $min_month+1;
    foreach $script_name (sort {$script_counter{$b} <=> $script_counter{$a}} keys %script_counter) {
        if ($script_counter[$month_nb]{$script_name} eq "") {
            $script_counter[$month_nb]{$script_name} = 0;
        }
       printf GRAPH "\t$script_counter[$month_nb]{$script_name}";
    }
    print GRAPH "\n";
}
close GRAPH;



print $out "\nBy client domain\n";
print $out "================\n";
foreach $domain (sort {$domain_counter{$b} <=> $domain_counter{$a}} keys %domain_counter) {
    printf $out "$domain_counter{$domain}\t$domain\n" 
        if ($domain_counter{$domain} >= $threshold);
}

print $out "\nBy client name\n";
print $out "=================\n";
foreach $client (sort {$client_counter{$b} <=> $client_counter{$a}} keys %client_counter) {
    print $out "$client_counter{$client}\t$client_domain{$client}\t$client\n" 
        if ($client_counter{$client} >= $threshold);
}


###### close input file ######
close $in unless ($inputfile eq "");

###### close output file ######
my $exec_time = &RSAT::util::ReportExecutionTime($start_time);
print $main::out $exec_time if ($main::verbose >= 1);
close $out unless ($outputfile eq "");


exit(0);


########################## subroutine definition ############################

