The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

$Bio::Root::Utilities::VERSION = '1.7.8';
use strict;
use base qw(Bio::Root::Root Exporter);
=head1 NAME
Bio::Root::Utilities - general-purpose utilities
=head1 SYNOPSIS
=head2 Object Creation
# Using the supplied singleton object:
use Bio::Root::Utilities qw(:obj);
$Util->some_method();
# Create an object manually:
use Bio::Root::Utilities;
my $util = Bio::Root::Utilities->new();
$util->some_method();
$date_stamp = $Util->date_format('yyy-mm-dd');
$clean = $Util->untaint($dirty);
$compressed = $Util->compress('/home/me/myfile.txt')
my ($mean, $stdev) = $Util->mean_stdev( @data );
$Util->authority("me@example.com");
$Util->mail_authority("Something you should know about...");
...and a host of other methods. See below.
=head1 DESCRIPTION
Provides general-purpose utilities of potential interest to any Perl script.
The C<:obj> tag is a convenience that imports a $Util symbol into your
namespace representing a Bio::Root::Utilities object. This saves you
from creating your own Bio::Root::Utilities object via
C<Bio::Root::Utilities-E<gt>new()> or from prefixing all method calls with
C<Bio::Root::Utilities>, though feel free to do these things if desired.
Since there should normally not be a need for a script to have more
than one Bio::Root::Utilities object, this module thus comes with it's
own singleton.
=head1 INSTALLATION
This module is included with the central Bioperl distribution:
Follow the installation instructions included in the README file.
=head1 DEPENDENCIES
Inherits from L<Bio::Root::Root>, and uses L<Bio::Root::IO>
and L<Bio::Root::Exception>.
Relies on external executables for file compression/uncompression
and sending mail. No paths to these are hard coded but are located
as needed.
=head1 SEE ALSO
http://bioperl.org - Bioperl Project Homepage
=head1 ACKNOWLEDGEMENTS
This module was originally developed under the auspices of the
Saccharomyces Genome Database: http://www.yeastgenome.org/
=head1 AUTHOR Steve Chervitz
=cut
use vars qw(@EXPORT_OK %EXPORT_TAGS);
@EXPORT_OK = qw($Util);
%EXPORT_TAGS = ( obj => [qw($Util)],
std => [qw($Util)],);
use vars qw($ID $Util $GNU_PATH $TIMEOUT_SECS
@COMPRESSION_UTILS @UNCOMPRESSION_UTILS
$DEFAULT_NEWLINE $NEWLINE $AUTHORITY
@MONTHS @DAYS $BASE_YEAR $DEFAULT_CENTURY
);
$ID = 'Bio::Root::Utilities';
# Number of seconds to wait before timing out when reading input (taste_file())
$TIMEOUT_SECS = 30;
$NEWLINE = $ENV{'NEWLINE'} || undef;
$BASE_YEAR = 1900; # perl's localtime() assumes this for it's year data.
# TODO: update this every hundred years. Y2K-sensitive code.
$DEFAULT_CENTURY = $BASE_YEAR + 100;
@MONTHS = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
@DAYS = qw(Sun Mon Tue Wed Thu Fri Sat);
# Sets the preference for compression utilities to be used by compress().
# The first executable in this list to be found in the current PATH will be used,
# unless overridden in the call to that function. See docs for details.
@COMPRESSION_UTILS = qw(gzip bzip2 zip compress);
@UNCOMPRESSION_UTILS = qw(gunzip gzip bunzip2 unzip uncompress);
# Default person to receive feedback from users and possibly automatic error messages.
$AUTHORITY = '';
# Note: $GNU_PATH is now deprecated, shouldn't be needed since now this module
# will automatically locate the compression utility in the current PATH.
# Retaining $GNU_PATH for backward compatibility.
#
# $GNU_PATH points to the directory containing the gzip and gunzip
# executables. It may be required for executing gzip/gunzip
# in some situations (e.g., when $ENV{PATH} doesn't contain this dir.
# Customize $GNU_PATH for your site if the compress() or
# uncompress() functions are generating exceptions.
$GNU_PATH = '';
#$GNU_PATH = '/tools/gnu/bin/';
$DEFAULT_NEWLINE = "\012"; # \n (used if get_newline() fails for some reason)
## Static UTIL object.
$Util = Bio::Root::Root->new();
=head2 date_format
Title : date_format
Usage : $Util->date_format( [FMT], [DATE])
Purpose : -- Get a string containing the formatted date or time
: taken when this routine is invoked.
: -- Provides a way to avoid using `date`.
: -- Provides an interface to localtime().
: -- Interconverts some date formats.
:
: (For additional functionality, use Date::Manip or
: Date::DateCalc available from CPAN).
Example : $Util->date_format();
: $date = $Util->date_format('yyyy-mmm-dd', '11/22/92');
Returns : String (unless 'list' is provided as argument, see below)
:
: 'yyyy-mm-dd' = 1996-05-03 # default format.
: 'yyyy-dd-mm' = 1996-03-05
: 'yyyy-mmm-dd' = 1996-May-03
: 'd-m-y' = 3-May-1996
: 'd m y' = 3 May 1996
: 'dmy' = 3may96
: 'mdy' = May 3, 1996
: 'ymd' = 96may3
: 'md' = may3
: 'year' = 1996
: 'hms' = 23:01:59 # when not converting a format, 'hms' can be
: # tacked on to any of the above options
: # to add the time stamp: eg 'dmyhms'
: 'full' | 'unix' = UNIX-style date: Tue May 5 22:00:00 1998
: 'list' = the contents of localtime(time) in an array.
Argument : (all are optional)
: FMT = yyyy-mm-dd | yyyy-dd-mm | yyyy-mmm-dd |
: mdy | ymd | md | d-m-y | hms | hm
: ('hms' may be appended to any of these to
: add a time stamp)
:
: DATE = String containing date to be converted.
: Acceptable input formats:
: 12/1/97 (for 1 December 1997)
: 1997-12-01
: 1997-Dec-01
Throws :
Comments : If you don't care about formatting or using backticks, you can
: always use: $date = `date`;
:
: For more features, use Date::Manip.pm, (which I should
: probably switch to...)
See Also : L<file_date()|file_date>, L<month2num()|month2num>
=cut
#---------------'
sub date_format {
#---------------
my $self = shift;
my $option = shift;
my $date = shift; # optional date to be converted.
my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
$option ||= 'yyyy-mm-dd';
my ($month_txt, $day_txt, $month_num, $fullYear);
my ($converting, @date);
# Load a supplied date for conversion:
if(defined($date) && ($date =~ /[\D-]+/)) {
$converting = 1;
if( $date =~ m{/}) {
($mon,$mday,$year) = split(m{/}, $date);
} elsif($date =~ /(\d{4})-(\d{1,2})-(\d{1,2})/) {
($year,$mon,$mday) = ($1, $2, $3);
} elsif($date =~ /(\d{4})-(\w{3,})-(\d{1,2})/) {
($year,$mon,$mday) = ($1, $2, $3);
$mon = $self->month2num($2);
} else {
print STDERR "\n*** Unsupported input date format: $date\n";
}
if(length($year) == 4) {
$fullYear = $year;
$year = substr $year, 2;
} else {
# Heuristics to guess what century was intended when a 2-digit year is given
# If number is over 50, assume it's for prev century; under 50 = default century.
# TODO: keep an eye on this Y2K-sensitive code
if ($year > 50) {
$fullYear = $DEFAULT_CENTURY + $year - 100;
} else {
$fullYear = $DEFAULT_CENTURY + $year;
}
}
$mon -= 1;
} else {
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = @date =
localtime(($date ? $date : time()));
return @date if $option =~ /list/i;
$fullYear = $BASE_YEAR+$year;
}
$month_txt = $MONTHS[$mon];
$day_txt = $DAYS[$wday] if defined $wday;
$month_num = $mon+1;
# print "sec: $sec, min: $min, hour: $hour, month: $mon, m-day: $mday, year: $year\nwday: $wday, yday: $yday, dst: $isdst";<STDIN>;
if( $option =~ /yyyy-mm-dd/i ) {
$date = sprintf "%4d-%02d-%02d",$fullYear,$month_num,$mday;
} elsif( $option =~ /yyyy-dd-mm/i ) {
$date = sprintf "%4d-%02d-%02d",$fullYear,$mday,$month_num;
} elsif( $option =~ /yyyy-mmm-dd/i ) {
$date = sprintf "%4d-%3s-%02d",$fullYear,$month_txt,$mday;
} elsif( $option =~ /full|unix/i ) {
$date = sprintf "%3s %3s %2d %02d:%02d:%02d %d",$day_txt, $month_txt, $mday, $hour, $min, $sec, $fullYear;
} elsif( $option =~ /mdy/i ) {
$date = "$month_txt $mday, $fullYear";
} elsif( $option =~ /ymd/i ) {
$date = $year."\l$month_txt$mday";
} elsif( $option =~ /dmy/i ) {
$date = $mday."\l$month_txt$year";
} elsif( $option =~ /md/i ) {
$date = "\l$month_txt$mday";
} elsif( $option =~ /d-m-y/i ) {
$date = "$mday-$month_txt-$fullYear";
} elsif( $option =~ /d m y/i ) {
$date = "$mday $month_txt $fullYear";
} elsif( $option =~ /year/i ) {
$date = $fullYear;
} elsif( $option =~ /dmy/i ) {
$date = $mday.'-'.$month_txt.'-'.$fullYear;
} elsif($option and $option !~ /hms/i) {
print STDERR "\n*** Unrecognized date format request: $option\n";
}
if( $option =~ /hms/i and not $converting) {
$date .= " $hour:$min:$sec" if $date;
$date ||= "$hour:$min:$sec";
}
return $date || join(" ", @date);
}
=head2 month2num
Title : month2num
Purpose : Converts a string containing a name of a month to integer
: representing the number of the month in the year.
Example : $Util->month2num("march"); # returns 3
Argument : The string argument must contain at least the first
: three characters of the month's name. Case insensitive.
Throws : Exception if the conversion fails.
=cut
#--------------'
sub month2num {
#--------------
my ($self, $str) = @_;
# Get string in proper format for conversion.
$str = substr($str, 0, 3);
for my $month (0..$#MONTHS) {
return $month+1 if $str =~ /$MONTHS[$month]/i;
}
$self->throw("Invalid month name: $str");
}
=head2 num2month
Title : num2month
Purpose : Does the opposite of month2num.
: Converts a number into a string containing a name of a month.
Example : $Util->num2month(3); # returns 'Mar'
Throws : Exception if supplied number is out of range.
=cut
#-------------
sub num2month {
#-------------
my ($self, $num) = @_;
$self->throw("Month out of range: $num") if $num < 1 or $num > 12;
return $MONTHS[$num-1];
}
=head2 compress
Title : compress
Usage : $Util->compress(full-path-filename);
: $Util->compress(<named parameters>);
Purpose : Compress a file.
Example : $Util->compress("/usr/people/me/data.txt");
: $Util->compress(-file=>"/usr/people/me/data.txt",
: -tmp=>1,
: -outfile=>"/usr/people/share/data.txt.gz",
: -exe=>"/usr/local/bin/fancyzip");
Returns : String containing full, absolute path to compressed file
Argument : Named parameters (case-insensitive):
: -FILE => String (name of file to be compressed, full path).
: If the supplied filename ends with '.gz' or '.Z',
: that extension will be removed before attempting to compress.
: Optional:
: -TMP => boolean. If true, (or if user is not the owner of the file)
: the file is compressed to a temp file. If false, file may be
: clobbered with the compressed version (if using a utility like
: gzip, which is the default)
: -OUTFILE => String (name of the output compressed file, full path).
: -EXE => Name of executable for compression utility to use.
: Will supersede those in @COMPRESSION_UTILS defined by
: this module. If the absolute path to the executable is not provided,
: it will be searched in the PATH env variable.
Throws : Exception if file cannot be compressed.
: If user is not owner of the file, generates a warning and compresses to
: a tmp file. To avoid this warning, use the -o file test operator
: and call this function with -TMP=>1.
Comments : Attempts to compress using utilities defined in the @COMPRESSION_UTILS
: defined by this module, in the order defined. The first utility that is
: found to be executable will be used. Any utility defined in optional -EXE param
: will be tested for executability first.
: To minimize security risks, the -EXE parameter value is untained using
: the untaint() method of this module (in 'relaxed' mode to permit path separators).
See Also : L<uncompress()|uncompress>
=cut
#------------'
sub compress {
#------------
my ($self, @args) = @_;
# This method formerly didn't use named params and expected fileName, tmp
# in that order. This should be backward compatibile.
my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
my ($file, $get, $fmt);
# in case the supplied name already has a compressed extension
if($fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName =~ s/$1$//; };
$self->debug("compressing file $fileName");
my @util_to_use = @COMPRESSION_UTILS;
if (defined $exe){
$exe = $self->untaint($exe, 1);
unshift @util_to_use, $exe;
}
my @checked = @util_to_use;
$exe ||= '';
while (not -x $exe and scalar(@util_to_use)) {
$exe = $self->find_exe(shift @util_to_use);
}
unless (-x $exe) {
$self->throw("Can't find compression utility. Looked for @checked");
}
my ($compressed, @cmd, $handle);
if(defined($outfile) or $tmp or not -o $fileName) {
if (defined $outfile) {
$compressed = $outfile;
} else {
# obtain a temporary file name (not using the handle)
# and insert some special text to flag it as a bioperl-based temp file
my $io = Bio::Root::IO->new();
($handle, $compressed) = $io->tempfile();
$compressed .= '.tmp.bioperl.gz';
}
# Use double quotes if executable path have empty spaces
if ($exe =~ m/ /) {
$exe = "\"$exe\"";
}
if ($exe =~ /gzip|bzip2|compress/) {
@cmd = ("$exe -f < \"$fileName\" > \"$compressed\"");
} elsif ($exe eq 'zip') {
@cmd = ("$exe -r \"$fileName.zip\" \"$fileName\"");
}
not $tmp and
$self->warn("Not owner of file $fileName. Compressing to temp file $compressed.");
$tmp = 1;
} else {
# Need to compute the compressed name based on exe since we're returning it.
$compressed = $fileName;
if ($exe =~ /gzip/) {
$compressed .= '.gz';
} elsif ($exe =~ /bzip2/) {
$compressed .= '.bz2';
} elsif ($exe =~ /zip/) {
$compressed .= '.zip';
} elsif ($exe =~ /compress/) {
$compressed .= '.Z';
}
if ($exe =~ /gzip|bzip2|compress/) {
@cmd = ($exe, '-f', $fileName);
} elsif ($exe eq 'zip') {
@cmd = ($exe, '-r', "$compressed", $fileName);
}
}
if(system(@cmd) != 0) {
$self->throw( -class => 'Bio::Root::SystemException',
-text => "Failed to compress file $fileName using $exe: $!");
}
return $compressed;
}
=head2 uncompress
Title : uncompress
Usage : $Util->uncompress(full-path-filename);
: $Util->uncompress(<named parameters>);
Purpose : Uncompress a file.
Example : $Util->uncompress("/usr/people/me/data.txt");
: $Util->uncompress(-file=>"/usr/people/me/data.txt.gz",
: -tmp=>1,
: -outfile=>"/usr/people/share/data.txt",
: -exe=>"/usr/local/bin/fancyzip");
Returns : String containing full, absolute path to uncompressed file
Argument : Named parameters (case-insensitive):
: -FILE => String (name of file to be uncompressed, full path).
: If the supplied filename ends with '.gz' or '.Z',
: that extension will be removed before attempting to uncompress.
: Optional:
: -TMP => boolean. If true, (or if user is not the owner of the file)
: the file is uncompressed to a temp file. If false, file may be
: clobbered with the uncompressed version (if using a utility like
: gzip, which is the default)
: -OUTFILE => String (name of the output uncompressed file, full path).
: -EXE => Name of executable for uncompression utility to use.
: Will supersede those in @UNCOMPRESSION_UTILS defined by
: this module. If the absolute path to the executable is not provided,
: it will be searched in the PATH env variable.
Throws : Exception if file cannot be uncompressed.
: If user is not owner of the file, generates a warning and uncompresses to
: a tmp file. To avoid this warning, use the -o file test operator
: and call this function with -TMP=>1.
Comments : Attempts to uncompress using utilities defined in the @UNCOMPRESSION_UTILS
: defined by this module, in the order defined. The first utility that is
: found to be executable will be used. Any utility defined in optional -EXE param
: will be tested for executability first.
: To minimize security risks, the -EXE parameter value is untained using
: the untaint() method of this module (in 'relaxed' mode to permit path separators).
See Also : L<compress()|compress>
=cut
#------------'
sub uncompress {
#------------
my ($self, @args) = @_;
# This method formerly didn't use named params and expected fileName, tmp
# in that order. This should be backward compatibile.
my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
my ($file, $get, $fmt);
# in case the supplied name lacks a compressed extension
if(not $fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName .= $1; };
$self->debug("uncompressing file $fileName");
my @util_to_use = @UNCOMPRESSION_UTILS;
if (defined $exe){
$exe = $self->untaint($exe, 1);
unshift @util_to_use, $exe;
}
$exe ||= '';
while (not -x $exe and scalar(@util_to_use)) {
$exe = $self->find_exe(shift @util_to_use);
}
unless (-x $exe) {
$self->throw("Can't find compression utility. Looked for @util_to_use");
}
my ($uncompressed, @cmd, $handle);
$uncompressed = $fileName;
$uncompressed =~ s/\.\w+$//;
if(defined($outfile) or $tmp or not -o $fileName) {
if (defined $outfile) {
$uncompressed = $outfile;
} else {
# obtain a temporary file name (not using the handle)
my $io = Bio::Root::IO->new();
($handle, $uncompressed) = $io->tempfile();
# insert some special text to flag it as a bioperl-based temp file
$uncompressed .= '.tmp.bioperl';
}
# Use double quotes if executable path have empty spaces
if ($exe =~ m/ /) {
$exe = "\"$exe\"";
}
if ($exe =~ /gunzip|bunzip2|uncompress/) {
@cmd = ("$exe -f < \"$fileName\" > \"$uncompressed\"");
} elsif ($exe =~ /gzip/) {
@cmd = ("$exe -df < \"$fileName\" > \"$uncompressed\"");
} elsif ($exe eq 'unzip') {
@cmd = ("$exe -p \"$fileName\" > \"$uncompressed\"");
}
not $tmp and
$self->warn("Not owner of file $fileName. Uncompressing to temp file $uncompressed.");
$tmp = 1;
} else {
if ($exe =~ /gunzip|bunzip2|uncompress/) {
@cmd = ($exe, '-f', $fileName);
} elsif ($exe =~ /gzip/) {
@cmd = ($exe, '-df', $fileName);
} elsif ($exe eq 'zip') {
@cmd = ($exe, $fileName);
}
}
if(system(@cmd) != 0) {
$self->throw( -class => 'Bio::Root::SystemException',
-text => "Failed to uncompress file $fileName using $exe: $!");
}
return $uncompressed;
}
=head2 file_date
Title : file_date
Usage : $Util->file_date( filename [,date_format])
Purpose : Obtains the date of a given file.
: Provides flexible formatting via date_format().
Returns : String = date of the file as: yyyy-mm-dd (e.g., 1997-10-15)
Argument : filename = string, full path name for file
: date_format = string, desired format for date (see date_format()).
: Default = yyyy-mm-dd
Thows : Exception if no file is provided or does not exist.
Comments : Uses the mtime field as obtained by stat().
=cut
#--------------
sub file_date {
#--------------
my ($self, $file, $fmt) = @_;
$self->throw("No such file: $file") if not $file or not -e $file;
$fmt ||= 'yyyy-mm-dd';
my @file_data = stat($file);
return $self->date_format($fmt, $file_data[9]); # mtime field
}
=head2 untaint
Title : untaint
Purpose : To remove nasty shell characters from untrusted data
: and allow a script to run with the -T switch.
: Potentially dangerous shell meta characters: &;`'\"|*?!~<>^()[]{}$\n\r
: Accept only the first block of contiguous characters:
: Default allowed chars = "-\w.', ()"
: If $relax is true = "-\w.', ()\/=%:^<>*"
Usage : $Util->untaint($value, $relax)
Returns : String containing the untained data.
Argument: $value = string
: $relax = boolean
Comments:
This general untaint() function may not be appropriate for every situation.
To allow only a more restricted subset of special characters
(for example, untainting a regular expression), then using a custom
untainting mechanism would permit more control.
Note that special trusted vars (like $0) require untainting.
=cut
#------------`
sub untaint {
#------------
my($self,$value,$relax) = @_;
$relax ||= 0;
my $untainted;
$self->debug("\nUNTAINT: $value\n");
unless (defined $value and $value ne '') {
return $value;
}
if( $relax ) {
$value =~ /([-\w.\', ()\/=%:^<>*]+)/;
$untainted = $1
# } elsif( $relax == 2 ) { # Could have several degrees of relax.
# $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
# $untainted = $1
} else {
$value =~ /([-\w.\', ()]+)/;
$untainted = $1
}
$self->debug("UNTAINTED: $untainted\n");
$untainted;
}
=head2 mean_stdev
Title : mean_stdev
Usage : ($mean, $stdev) = $Util->mean_stdev( @data )
Purpose : Calculates the mean and standard deviation given a list of numbers.
Returns : 2-element list (mean, stdev)
Argument : list of numbers (ints or floats)
Thows : n/a
=cut
#---------------
sub mean_stdev {
#---------------
my ($self, @data) = @_;
return (undef, undef) if not @data; # case of empty @data list
my $mean = 0;
my $N = 0;
foreach my $num (@data) {
$mean += $num;
$N++
}
$mean /= $N;
my $sum_diff_sqd = 0;
foreach my $num (@data) {
$sum_diff_sqd += ($mean - $num) * ($mean - $num);
}
# if only one element in @data list, unbiased stdev is undefined
my $stdev = $N <= 1 ? undef : sqrt( $sum_diff_sqd / ($N-1) );
return ($mean, $stdev);
}
=head2 count_files
Title : count_files
Purpose : Counts the number of files/directories within a given directory.
: Also reports the number of text and binary files in the dir
: as well as names of these files and directories.
Usage : count_files(\%data)
: $data{-DIR} is the directory to be analyzed. Default is ./
: $data{-PRINT} = 0|1; if 1, prints results to STDOUT, (default=0).
Argument : Hash reference (empty)
Returns : n/a;
: Modifies the hash ref passed in as the sole argument.
: $$href{-TOTAL} scalar
: $$href{-NUM_TEXT_FILES} scalar
: $$href{-NUM_BINARY_FILES} scalar
: $$href{-NUM_DIRS} scalar
: $$href{-T_FILE_NAMES} array ref
: $$href{-B_FILE_NAMES} array ref
: $$href{-DIRNAMES} array ref
=cut
#----------------
sub count_files {
#----------------
my $self = shift;
my $href = shift; # Reference to an empty hash.
my( $name, @fileLine);
my $dir = $$href{-DIR} || './'; # THIS IS UNIX SPECIFIC? FIXME/TODO
my $print = $$href{-PRINT} || 0;
### Make sure $dir ends with /
$dir !~ m{/$} and do{ $dir .= '/'; $$href{-DIR} = $dir; };
open ( my $PIPE, "ls -1 $dir |" ) || $self->throw("Can't open input pipe: $!");
### Initialize the hash data.
$$href{-TOTAL} = 0;
$$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} = $$href{-NUM_DIRS} = 0;
$$href{-T_FILE_NAMES} = [];
$$href{-B_FILE_NAMES} = [];
$$href{-DIR_NAMES} = [];
while( my $line = <$PIPE> ) {
chomp();
$$href{-TOTAL}++;
if( -T $dir.$line ) {
$$href{-NUM_TEXT_FILES}++;
push @{$$href{-T_FILE_NAMES}}, $line; }
if( -B $dir.$line and not -d $dir.$line) {
$$href{-NUM_BINARY_FILES}++;
push @{$$href{-B_FILE_NAMES}}, $line; }
if( -d $dir.$line ) {
$$href{-NUM_DIRS}++;
push @{$$href{-DIR_NAMES}}, $line; }
}
close $PIPE;
if( $print) {
printf( "\n%4d %s\n", $$href{-TOTAL}, "total files+dirs in $dir");
printf( "%4d %s\n", $$href{-NUM_TEXT_FILES}, "text files");
printf( "%4d %s\n", $$href{-NUM_BINARY_FILES}, "binary files");
printf( "%4d %s\n", $$href{-NUM_DIRS}, "directories");
}
}
=head2 file_info
Title : file_info
Purpose : Obtains a variety of date for a given file.
: Provides an interface to Perl's stat().
Status : Under development. Not ready. Don't use!
=cut
#--------------
sub file_info {
#--------------
my ($self, %param) = @_;
my ($file, $get, $fmt) = $self->_rearrange([qw(FILE GET FMT)], %param);
$get ||= 'all';
$fmt ||= 'yyyy-mm-dd';
my($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
$atime, $mtime, $ctime, $blksize, $blocks) = stat $file;
if($get =~ /date/i) {
## I can get the elapsed time since the file was modified but
## it's not so straightforward to get the date in a nice format...
## Think about using a standard CPAN module for this, like
## Date::Manip or Date::DateCalc.
my $date = $mtime;
my $elsec = time - $mtime;
printf "\nFile age: %.0f sec %.0f hrs %.0f days", $elsec, $elsec/3600, $elsec/(3600*24);<STDIN>;
my $days = sprintf "%.0f", $elsec/(3600*24);
} elsif($get eq 'all') {
return stat $file;
}
}
=head2 delete
Title : delete
Purpose :
=cut
#------------
sub delete {
#------------
my $self = shift;
my $fileName = shift;
if(not -e $fileName) {
$self->throw("Could not delete file '$fileName': Does not exist.");
} elsif(not -o $fileName) {
$self->throw("Could not delete file '$fileName': Not owner.");
}
my $ulval = unlink($fileName) > 0
or $self->throw("Failed to delete file '$fileName': $!");
}
=head2 create_filehandle
Usage : $object->create_filehandle(<named parameters>);
Purpose : Create a FileHandle object from a file or STDIN.
: Mainly used as a helper method by read() and get_newline().
Example : $data = $object->create_filehandle(-FILE =>'usr/people/me/data.txt')
Argument : Named parameters (case-insensitive):
: (all optional)
: -CLIENT => object reference for the object submitting
: the request. Default = $Util.
: -FILE => string (full path to file) or a reference
: to a FileHandle object or typeglob. This is an
: optional parameter (if not defined, STDIN is used).
Returns : Reference to a FileHandle object.
Throws : Exception if cannot open a supplied file or if supplied with a
: reference that is not a FileHandle ref.
Comments : If given a FileHandle reference, this method simply returns it.
: This method assumes the user wants to read ascii data. So, if
: the file is binary, it will be treated as a compressed (gzipped)
: file and access it using gzip -ce. The problem here is that not
: all binary files are necessarily compressed. Therefore,
: this method should probably have a -mode parameter to
: specify ascii or binary.
See Also : L<get_newline()|get_newline>
=cut
#---------------------
sub create_filehandle {
#---------------------
my($self, @param) = @_;
my($client, $file, $handle) =
$self->_rearrange([qw( CLIENT FILE HANDLE )], @param);
if(not ref $client) { $client = $self; }
$file ||= $handle;
if( $client->can('file')) {
$file = $client->file($file);
}
my $FH;
my ($handle_ref);
if($handle_ref = ref($file)) {
if($handle_ref eq 'FileHandle') {
$FH = $file;
$client->{'_input_type'} = "FileHandle";
} elsif($handle_ref eq 'GLOB') {
$FH = $file;
$client->{'_input_type'} = "Glob";
} else {
$self->throw(-class => 'Bio::Root::IOException',
-text => "Could not read file '$file': Not a FileHandle or GLOB ref.");
}
$self->verbose > 0 and printf STDERR "$ID: reading data from FileHandle\n";
} elsif($file) {
$client->{'_input_type'} = "FileHandle for $file";
# Use gzip -cd to access compressed data.
if( -B $file ) {
$client->{'_input_type'} .= " (compressed)";
my $gzip = $self->find_exe('gzip');
$file = "$gzip -cd $file |"
}
require FileHandle;
$FH = FileHandle->new();
open ($FH, $file) || $self->throw(-class=>'Bio::Root::FileOpenException',
-text =>"Could not access data file '$file': $!");
$self->verbose > 0 and printf STDERR "$ID: reading data from file '$file'\n";
} else {
# Read from STDIN.
$FH = \*STDIN;
$self->verbose > 0 and printf STDERR "$ID: reading data from STDIN\n";
$client->{'_input_type'} = "STDIN";
}
return $FH;
}
=head2 get_newline
Usage : $object->get_newline(<named parameters>);
Purpose : Determine the character(s) used for newlines in a given file or
: input stream. Delegates to Bio::Root::Utilities::get_newline()
Example : $data = $object->get_newline(-CLIENT => $anObj,
: -FILE =>'usr/people/me/data.txt')
Argument : Same arguemnts as for create_filehandle().
Returns : Reference to a FileHandle object.
Throws : Propagates any exceptions thrown by Bio::Root::Utilities::get_newline().
See Also : L<taste_file()|taste_file>, L<create_filehandle()|create_filehandle>
=cut
#-----------------
sub get_newline {
#-----------------
my($self, @param) = @_;
return $NEWLINE if defined $NEWLINE;
my($client ) =
$self->_rearrange([qw( CLIENT )], @param);
my $FH = $self->create_filehandle(@param);
if(not ref $client) { $client = $self; }
if($client->{'_input_type'} =~ /STDIN|Glob|compressed/) {
# Can't taste from STDIN since we can't seek 0 on it.
# Are other non special Glob refs seek-able?
# Attempt to guess newline based on platform.
# Not robust since we could be reading Unix files on a Mac, e.g.
if(defined $ENV{'MACPERL'}) {
$NEWLINE = "\015"; # \r
} else {
$NEWLINE = "\012"; # \n
}
} else {
$NEWLINE = $self->taste_file($FH);
}
close ($FH) unless ($client->{'_input_type'} eq 'STDIN' ||
$client->{'_input_type'} eq 'FileHandle' ||
$client->{'_input_type'} eq 'Glob' );
delete $client->{'_input_type'};
return $NEWLINE || $DEFAULT_NEWLINE;
}
=head2 taste_file
Usage : $object->taste_file( <FileHandle> );
: Mainly a utility method for get_newline().
Purpose : Sample a filehandle to determine the character(s) used for a newline.
Example : $char = $Util->taste_file($FH)
Argument : Reference to a FileHandle object.
Returns : String containing an octal represenation of the newline character string.
: Unix = "\012" ("\n")
: Win32 = "\012\015" ("\r\n")
: Mac = "\015" ("\r")
Throws : Exception if no input is read within $TIMEOUT_SECS seconds.
: Exception if argument is not FileHandle object reference.
: Warning if cannot determine neewline char(s).
Comments : Based on code submitted by Vicki Brown (vlb@deltagen.com).
See Also : L<get_newline()|get_newline>
=cut
#---------------
sub taste_file {
#---------------
my ($self, $FH) = @_;
my $BUFSIZ = 256; # Number of bytes read from the file handle.
my ($buffer, $octal, $str, $irs, $i);
ref($FH) eq 'FileHandle' or $self->throw("Can't taste file: not a FileHandle ref");
$buffer = '';
# this is a quick hack to check for availability of alarm(); just copied
# from Bio/Root/IOManager.pm HL 02/19/01
my $alarm_available = 1;
eval {
alarm(0);
};
if($@) {
# alarm() not available (ActiveState perl for win32 doesn't have it.
# See jitterbug PR#98)
$alarm_available = 0;
}
$SIG{ALRM} = sub { die "Timed out!"; };
my $result;
eval {
$alarm_available && alarm( $TIMEOUT_SECS );
$result = read($FH, $buffer, $BUFSIZ); # read the $BUFSIZ characters of file
$alarm_available && alarm(0);
};
if($@ =~ /Timed out!/) {
$self->throw( "Timed out while waiting for input.",
"Timeout period = $TIMEOUT_SECS seconds.\n"
."For longer time before timing out, edit \$TIMEOUT_SECS in Bio::Root::Utilities.pm.");
} elsif(not $result) {
my $err = $@;
$self->throw("read taste failed to read from FileHandle.", $err);
} elsif($@ =~ /\S/) {
my $err = $@;
$self->throw("Unexpected error during read: $err");
}
seek($FH, 0, 0) or $self->throw("seek failed to seek 0 on FileHandle.");
my @chars = split(//, $buffer);
my $flavor;
for ($i = 0; $i <$BUFSIZ; $i++) {
if (($chars[$i] eq "\012")) {
unless ($chars[$i-1] eq "\015") {
$flavor='Unix';
$octal = "\012";
$str = '\n';
$irs = "^J";
last;
}
} elsif (($chars[$i] eq "\015") && ($chars[$i+1] eq "\012")) {
$flavor='DOS';
$octal = "\015\012";
$str = '\r\n';
$irs = "^M^J";
last;
} elsif (($chars[$i] eq "\015")) {
$flavor='Mac';
$octal = "\015";
$str = '\r';
$irs = "^M";
last;
}
}
if (not $octal) {
$self->warn("Could not determine newline char. Using '\012'");
$octal = "\012";
} else {
#print STDERR "FLAVOR=$flavor, NEWLINE CHAR = $irs\n";
}
return($octal);
}
=head2 file_flavor
Usage : $object->file_flavor( <filename> );
Purpose : Returns the 'flavor' of a given file (unix, dos, mac)
Example : print "$file has flavor: ", $Util->file_flavor($file);
Argument : filename = string, full path name for file
Returns : String describing flavor of file and handy info about line endings.
: One of these is returned:
: unix (\n or 012 or ^J)
: dos (\r\n or 015,012 or ^M^J)
: mac (\r or 015 or ^M)
: unknown
Throws : Exception if argument is not a file
: Propagates any exceptions thrown by Bio::Root::Utilities::get_newline().
See Also : L<get_newline()|get_newline>, L<taste_file()|taste_file>
=cut
#---------------
sub file_flavor {
#---------------
my ($self, $file) = @_;
my %flavors=("\012" =>'unix (\n or 012 or ^J)',
"\015\012" =>'dos (\r\n or 015,012 or ^M^J)',
"\015" =>'mac (\r or 015 or ^M)'
);
-f $file or $self->throw("Could not determine flavor: arg '$file' is either non existant or is not a file.\n");
my $octal = $self->get_newline($file);
my $flavor = $flavors{$octal} || "unknown";
return $flavor;
}
######################################
##### Mail Functions ########
######################################
=head2 mail_authority
Title : mail_authority
Usage : $Util->mail_authority( $message )
Purpose : Syntactic sugar to send email to $Bio::Root::Global::AUTHORITY
See Also : L<send_mail()|send_mail>
=cut
#---------------
sub mail_authority {
#---------------
my( $self, $message ) = @_;
my $script = $self->untaint($0,1);
my $email = $self->{'_auth_email'} || $AUTHORITY;
if (defined $email) {
$self->send_mail( -TO=>$AUTHORITY, -SUBJ=>$script, -MSG=>$message);
} else {
$self->throw("Can't email authority. No email defined.");
}
}
=head2 authority
Title : authority
Usage : $Util->authority('admin@example.com');
Purpose : Set/get the email address that should be notified by mail_authority()
See Also : L<mail_authority()|mail_authority>
=cut
#-------------
sub authority {
#-------------
my( $self, $email ) = @_;
$self->{'_auth_email'} = $email if defined $email;
return $self->{'_auth_email'};
}
=head2 send_mail
Title : send_mail
Usage : $Util->send_mail( named_parameters )
Purpose : Provides an interface to mail or sendmail, if available
Returns : n/a
Argument : Named parameters: (case-insensitive)
: -TO => e-mail address to send to
: -SUBJ => subject for message (optional)
: -MSG => message to be sent (optional)
: -CC => cc: e-mail address (optional)
Thows : Exception if TO: address appears bad or is missing.
: Exception if mail cannot be sent.
Comments : Based on TomC's tip at:
:
: Using default 'From:' information.
: sendmail options used:
: -t: ignore the address given on the command line and
: get To:address from the e-mail header.
: -oi: prevents send_mail from ending the message if it
: finds a period at the start of a line.
See Also : L<mail_authority()|mail_authority>
=cut
#-------------
sub send_mail {
#-------------
my( $self, @param) = @_;
my($recipient,$subj,$message,$cc) = $self->_rearrange([qw(TO SUBJ MSG CC)],@param);
$self->throw("Invalid or missing e-mail address: $recipient")
if not $recipient =~ /\S+\@\S+/;
$subj ||= 'empty subject'; $message ||= '';
# Best to use mail rather than sendmail. Permissions on sendmail in
# linux distros have been significantly locked down in recent years,
# due to the perception that it is insecure.
my ($exe, $ccinfo);
if ($exe = $self->find_exe('mail')) {
if (defined $cc) {
$ccinfo = "-c $cc";
}
$self->debug("send_mail: $exe -s '$subj' $ccinfo $recipient\n");
open (MAIL, "| $exe -s '$subj' $ccinfo $recipient") ||
$self->throw("Can't send email: mail cannot fork: $!");
print MAIL <<QQ_EOFM_QQ;
$message
QQ_EOFM_QQ
$? and $self->warn("mail didn't exit nicely: $?");
close(MAIL);
} elsif ($exe = $self->find_exe('sendmail')) {
open (SENDMAIL, "| $exe -oi -t") ||
$self->throw("Can't send email: sendmail cannot fork: $!");
print SENDMAIL <<QQ_EOFSM_QQ;
To: $recipient
Subject: $subj
Cc: $cc
$message
QQ_EOFSM_QQ
$? and $self->warn("sendmail didn't exit nicely: $?");
close(SENDMAIL);
} else {
$self->throw("Can't find executable for mail or sendmail.");
}
}
=head2 find_exe
Title : find_exe
Usage : $Util->find_exe(name);
Purpose : Locate an executable (for use in a system() call, e.g.))
Example : $Util->find_exe("gzip");
Returns : String containing executable that passes the -x test.
Returns undef if an executable of the supplied name cannot be found.
Argument : Name of executable to be found.
: Can be a full path. If supplied name is not executable, an executable
: of that name will be searched in all directories in the currently
: defined PATH environment variable.
Throws : No exceptions, but issues a warning if multiple paths are found
: for a given name. The first one is used.
Comments : TODO: Confirm functionality on all bioperl-supported platforms.
May get tripped up by variation in path separator character used
for splitting ENV{PATH}.
See Also :
=cut
#------------
sub find_exe {
#------------
my ($self, $name) = @_;
my @bindirs;
if ($^O =~ m/mswin/i) {
@bindirs = split ';', $ENV{'PATH'};
# Add usual executable extension if missing or -x won't work
$name.= '.exe' if ($name !~ m/\.exe$/i);
}
else {
@bindirs = split ':', $ENV{'PATH'};
}
my $exe = $name;
unless (-x $exe) {
undef $exe;
my @exes;
foreach my $d (@bindirs) {
# Note: Windows also understand '/' as folder separator,
# so there is no need to use a conditional with '\'
push(@exes, "$d/$name") if -x "$d/$name";
}
if (scalar @exes) {
$exe = $exes[0];
if (defined $exes[1]) {
$self->warn("find_exe: Multiple paths to '$name' found. Using $exe.");
}
}
}
return $exe;
}
######################################
### Interactive Functions #####
######################################
=head2 yes_reply
Title : yes_reply()
Usage : $Util->yes_reply( [query_string]);
Purpose : To test an STDIN input value for affirmation.
Example : print +( $Util->yes_reply('Are you ok') ? "great!\n" : "sorry.\n" );
: $Util->yes_reply('Continue') || die;
Returns : Boolean, true (1) if input string begins with 'y' or 'Y'
Argument: query_string = string to be used to prompt user (optional)
: If not provided, 'Yes or no' will be used.
: Question mark is automatically appended.
=cut
#-------------
sub yes_reply {
#-------------
my $self = shift;
my $query = shift;
my $reply;
$query ||= 'Yes or no';
print "\n$query? (y/n) [n] ";
chomp( $reply = <STDIN> );
$reply =~ /^y/i;
}
=head2 request_data
Title : request_data()
Usage : $Util->request_data( [value_name]);
Purpose : To request data from a user to be entered via keyboard (STDIN).
Example : $name = $Util->request_data('Name');
: # User will see: % Enter Name:
Returns : String, (data entered from keyboard, sans terminal newline.)
Argument: value_name = string to be used to prompt user.
: If not provided, 'data' will be used, (not very helpful).
: Question mark is automatically appended.
=cut
#----------------
sub request_data {
#----------------
my $self = shift;
my $data = shift || 'data';
print "Enter $data: ";
# Remove the terminal newline char.
chomp($data = <STDIN>);
$data;
}
=head2 quit_reply
Title : quit_reply
Usage :
Purpose :
=cut
sub quit_reply {
# Not much used since you can use request_data()
# and test for an empty string.
my $self = shift;
my $reply;
chop( $reply = <STDIN> );
$reply =~ /^q.*/i;
}
=head2 verify_version
Purpose : Checks the version of Perl used to invoke the script.
: Aborts program if version is less than the given argument.
Usage : verify_version('5.000')
=cut
#------------------
sub verify_version {
#------------------
my $self = shift;
my $reqVersion = shift;
$] < $reqVersion and do {
printf STDERR ( "\a\n%s %0.3f.\n", "** Sorry. This Perl script requires at least version", $reqVersion);
printf STDERR ( "%s %0.3f %s\n\n", "You are running Perl version", $], "Please update your Perl!\n\n" );
exit(1);
}
}
1;
__END__