#!/usr/bin/perl -w
############################################################
#
# $Id: word-occurrences,v 1.7 2011/02/17 04:54:49 rsat Exp $
#
############################################################

## use strict;

=pod

=head1 NAME

word-occurrences

=head1 VERSION

$program_version

=head1 DESCRIPTION

Count word occurrences in the input file.

=head1 AUTHORS

jvanheld@bigre.ulb.ac.be

=head1 CATEGORY

util

=head1 USAGE

word-occurrences [-i inputfile] [-o outputfile] [-v #] [...]

=head1 INPUT FORMAT

A text file.

=head1 OUTPUT FORMAT

A two-column file, with one row per word.

The first column indicates the number of occurrences, and the second
column the word (this is more readable, since words may have different
lengths and may exceed the width of a column).

=cut


BEGIN {
    if ($0 =~ /([^(\/)]+)$/) {
	push (@INC, "$`lib/");
    }
}
require "RSA.lib";



################################################################
## Main package
package main;
{

    ################################################################
    ## Initialise parameters
    local $start_time = &RSAT::util::StartScript();
    $program_version = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
#    $program_version = "0.00";

    %main::infile = ();
    %main::outfile = ();

    $main::verbose = 0;
    $main::in = STDIN;
    $main::out = STDOUT;

    ################################################################
    ## Read argument values
    &ReadArguments();

    ################################################################
    ## Check argument values

    ################################################################
    ## Open output stream
    $main::out = &OpenOutputFile($main::outfile{output});

    ################################################################
    ## Read input
    my %occ = ();
    ($main::in) = &OpenInputFile($main::infile{input});
    while (<$main::in>) {
      next unless (/\S/); ## Skip empty rows
      my @words = split /\s+/;
      foreach my $word (@words) {
	$occ{$word}++;
      }
    }
    close $main::in if ($main::infile{input});

    ################################################################
    ## Print verbose
    &Verbose() if ($main::verbose);

    ################################################################
    ## Execute the command

    ################################################################
    ## Print output
    print $out "; occ\tword\n";
    my @sorted_words = sort {$occ{$b} <=> $occ{$a}} keys(%occ);
    foreach my $word (@sorted_words) {
      print $out $occ{$word}, "\t", $word, "\n";
    }

    ################################################################
    ## Close output stream
    my $exec_time = &RSAT::util::ReportExecutionTime($start_time);
    print $main::out $exec_time if ($main::verbose >= 1);
    close $main::out if ($main::outfile{output});


    exit(0);
}

################################################################
################### SUBROUTINE DEFINITION ######################
################################################################


################################################################
## Display full help message 
sub PrintHelp {
    system "pod2text -c $0";
    exit()
}

################################################################
## Display short help message
sub PrintOptions {
    &PrintHelp();
}

################################################################
## Read arguments 
sub ReadArguments {
    my $arg;
    my @arguments = @ARGV; ## create a copy to shift, because we need ARGV to report command line in &Verbose()
    while (scalar(@arguments) >= 1) {
      $arg = shift (@arguments);
	## Verbosity
=pod

=head1 OPTIONS

=over 4

=item B<-v #>

Level of verbosity (detail in the warning messages during execution)

=cut
	if ($arg eq "-v") {
	    if (&IsNatural($arguments[0])) {
		$main::verbose = shift(@arguments);
	    } else {
		$main::verbose = 1;
	    }

	    ## Help message
=pod

=item B<-h>

Display full help message

=cut
	} elsif ($arg eq "-h") {
	    &PrintHelp();

	    ## List of options
=pod

=item B<-help>

Same as -h

=cut
	} elsif ($arg eq "-help") {
	    &PrintOptions();

	    ## Input file
=pod

=item B<-i inputfile>

If no input file is specified, the standard input is used.  This
allows to use the command within a pipe.

=cut
	} elsif ($arg eq "-i") {
	    $main::infile{input} = shift(@arguments);

	    ## Output file
=pod

=item	B<-o outputfile>

If no output file is specified, the standard output is used.  This
allows to use the command within a pipe.

=cut
	} elsif ($arg eq "-o") {
	    $main::outfile{output} = shift(@arguments);

	} else {
	    &FatalError(join("\t", "Invalid option", $arg));

	}
    }


=pod

=back

=cut

}

################################################################
## Verbose message
sub Verbose {
    print $main::out "; word-occurrences ";
    &PrintArguments($main::out);
    printf $main::out "; %-22s\t%s\n", "Program version", $program_version;
    if (%main::infile) {
	print $main::out "; Input files\n";
	while (my ($key,$value) = each %main::infile) {
	  printf $main::out ";\t%-13s\t%s\n", $key, $value;
	}
    }
    if (%main::outfile) {
	print $main::out "; Output files\n";
	while (my ($key,$value) = each %main::outfile) {
	  printf $main::out ";\t%-13s\t%s\n", $key, $value;
	}
    }
}


__END__

=pod

=head1 SEE ALSO

=head1 WISH LIST

=cut
