————————————————————————package
Bio::Root::Utilities;
$Bio::Root::Utilities::VERSION
=
'1.7.8'
;
use
strict;
use
Bio::Root::IO;
use
Bio::Root::Exception;
=head1 NAME
Bio::Root::Utilities - general-purpose utilities
=head1 SYNOPSIS
=head2 Object Creation
# Using the supplied singleton object:
use Bio::Root::Utilities qw(:obj);
$Util->some_method();
# Create an object manually:
use Bio::Root::Utilities;
my $util = Bio::Root::Utilities->new();
$util->some_method();
$date_stamp = $Util->date_format('yyy-mm-dd');
$clean = $Util->untaint($dirty);
$compressed = $Util->compress('/home/me/myfile.txt')
my ($mean, $stdev) = $Util->mean_stdev( @data );
$Util->authority("me@example.com");
$Util->mail_authority("Something you should know about...");
...and a host of other methods. See below.
=head1 DESCRIPTION
Provides general-purpose utilities of potential interest to any Perl script.
The C<:obj> tag is a convenience that imports a $Util symbol into your
namespace representing a Bio::Root::Utilities object. This saves you
from creating your own Bio::Root::Utilities object via
C<Bio::Root::Utilities-E<gt>new()> or from prefixing all method calls with
C<Bio::Root::Utilities>, though feel free to do these things if desired.
Since there should normally not be a need for a script to have more
than one Bio::Root::Utilities object, this module thus comes with it's
own singleton.
=head1 INSTALLATION
This module is included with the central Bioperl distribution:
Follow the installation instructions included in the README file.
=head1 DEPENDENCIES
Inherits from L<Bio::Root::Root>, and uses L<Bio::Root::IO>
and L<Bio::Root::Exception>.
Relies on external executables for file compression/uncompression
and sending mail. No paths to these are hard coded but are located
as needed.
=head1 SEE ALSO
http://bioperl.org - Bioperl Project Homepage
=head1 ACKNOWLEDGEMENTS
This module was originally developed under the auspices of the
Saccharomyces Genome Database: http://www.yeastgenome.org/
=head1 AUTHOR Steve Chervitz
=cut
@EXPORT_OK
=
qw($Util)
;
%EXPORT_TAGS
= (
obj
=> [
qw($Util)
],
std
=> [
qw($Util)
],);
@COMPRESSION_UTILS @UNCOMPRESSION_UTILS
$DEFAULT_NEWLINE $NEWLINE $AUTHORITY
@MONTHS @DAYS $BASE_YEAR $DEFAULT_CENTURY
)
;
$ID
=
'Bio::Root::Utilities'
;
# Number of seconds to wait before timing out when reading input (taste_file())
$TIMEOUT_SECS
= 30;
$NEWLINE
=
$ENV
{
'NEWLINE'
} ||
undef
;
$BASE_YEAR
= 1900;
# perl's localtime() assumes this for it's year data.
# TODO: update this every hundred years. Y2K-sensitive code.
$DEFAULT_CENTURY
=
$BASE_YEAR
+ 100;
@MONTHS
=
qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)
;
@DAYS
=
qw(Sun Mon Tue Wed Thu Fri Sat)
;
# Sets the preference for compression utilities to be used by compress().
# The first executable in this list to be found in the current PATH will be used,
# unless overridden in the call to that function. See docs for details.
@COMPRESSION_UTILS
=
qw(gzip bzip2 zip compress)
;
@UNCOMPRESSION_UTILS
=
qw(gunzip gzip bunzip2 unzip uncompress)
;
# Default person to receive feedback from users and possibly automatic error messages.
$AUTHORITY
=
''
;
# Note: $GNU_PATH is now deprecated, shouldn't be needed since now this module
# will automatically locate the compression utility in the current PATH.
# Retaining $GNU_PATH for backward compatibility.
#
# $GNU_PATH points to the directory containing the gzip and gunzip
# executables. It may be required for executing gzip/gunzip
# in some situations (e.g., when $ENV{PATH} doesn't contain this dir.
# Customize $GNU_PATH for your site if the compress() or
# uncompress() functions are generating exceptions.
$GNU_PATH
=
''
;
#$GNU_PATH = '/tools/gnu/bin/';
$DEFAULT_NEWLINE
=
"\012"
;
# \n (used if get_newline() fails for some reason)
## Static UTIL object.
$Util
= Bio::Root::Root->new();
=head2 date_format
Title : date_format
Usage : $Util->date_format( [FMT], [DATE])
Purpose : -- Get a string containing the formatted date or time
: taken when this routine is invoked.
: -- Provides a way to avoid using `date`.
: -- Provides an interface to localtime().
: -- Interconverts some date formats.
:
: (For additional functionality, use Date::Manip or
: Date::DateCalc available from CPAN).
Example : $Util->date_format();
: $date = $Util->date_format('yyyy-mmm-dd', '11/22/92');
Returns : String (unless 'list' is provided as argument, see below)
:
: 'yyyy-mm-dd' = 1996-05-03 # default format.
: 'yyyy-dd-mm' = 1996-03-05
: 'yyyy-mmm-dd' = 1996-May-03
: 'd-m-y' = 3-May-1996
: 'd m y' = 3 May 1996
: 'dmy' = 3may96
: 'mdy' = May 3, 1996
: 'ymd' = 96may3
: 'md' = may3
: 'year' = 1996
: 'hms' = 23:01:59 # when not converting a format, 'hms' can be
: # tacked on to any of the above options
: # to add the time stamp: eg 'dmyhms'
: 'full' | 'unix' = UNIX-style date: Tue May 5 22:00:00 1998
: 'list' = the contents of localtime(time) in an array.
Argument : (all are optional)
: FMT = yyyy-mm-dd | yyyy-dd-mm | yyyy-mmm-dd |
: mdy | ymd | md | d-m-y | hms | hm
: ('hms' may be appended to any of these to
: add a time stamp)
:
: DATE = String containing date to be converted.
: Acceptable input formats:
: 12/1/97 (for 1 December 1997)
: 1997-12-01
: 1997-Dec-01
Throws :
Comments : If you don't care about formatting or using backticks, you can
: always use: $date = `date`;
:
: For more features, use Date::Manip.pm, (which I should
: probably switch to...)
See Also : L<file_date()|file_date>, L<month2num()|month2num>
=cut
#---------------'
sub
date_format {
#---------------
my
$self
=
shift
;
my
$option
=
shift
;
my
$date
=
shift
;
# optional date to be converted.
my
(
$sec
,
$min
,
$hour
,
$mday
,
$mon
,
$year
,
$wday
,
$yday
,
$isdst
);
$option
||=
'yyyy-mm-dd'
;
my
(
$month_txt
,
$day_txt
,
$month_num
,
$fullYear
);
my
(
$converting
,
@date
);
# Load a supplied date for conversion:
if
(
defined
(
$date
) && (
$date
=~ /[\D-]+/)) {
$converting
= 1;
if
(
$date
=~ m{/}) {
(
$mon
,
$mday
,
$year
) =
split
(m{/},
$date
);
}
elsif
(
$date
=~ /(\d{4})-(\d{1,2})-(\d{1,2})/) {
(
$year
,
$mon
,
$mday
) = ($1, $2, $3);
}
elsif
(
$date
=~ /(\d{4})-(\w{3,})-(\d{1,2})/) {
(
$year
,
$mon
,
$mday
) = ($1, $2, $3);
$mon
=
$self
->month2num($2);
}
else
{
STDERR
"\n*** Unsupported input date format: $date\n"
;
}
if
(
length
(
$year
) == 4) {
$fullYear
=
$year
;
$year
=
substr
$year
, 2;
}
else
{
# Heuristics to guess what century was intended when a 2-digit year is given
# If number is over 50, assume it's for prev century; under 50 = default century.
# TODO: keep an eye on this Y2K-sensitive code
if
(
$year
> 50) {
$fullYear
=
$DEFAULT_CENTURY
+
$year
- 100;
}
else
{
$fullYear
=
$DEFAULT_CENTURY
+
$year
;
}
}
$mon
-= 1;
}
else
{
(
$sec
,
$min
,
$hour
,
$mday
,
$mon
,
$year
,
$wday
,
$yday
,
$isdst
) =
@date
=
localtime
((
$date
?
$date
:
time
()));
return
@date
if
$option
=~ /list/i;
$fullYear
=
$BASE_YEAR
+
$year
;
}
$month_txt
=
$MONTHS
[
$mon
];
$day_txt
=
$DAYS
[
$wday
]
if
defined
$wday
;
$month_num
=
$mon
+1;
# print "sec: $sec, min: $min, hour: $hour, month: $mon, m-day: $mday, year: $year\nwday: $wday, yday: $yday, dst: $isdst";<STDIN>;
if
(
$option
=~ /yyyy-mm-dd/i ) {
$date
=
sprintf
"%4d-%02d-%02d"
,
$fullYear
,
$month_num
,
$mday
;
}
elsif
(
$option
=~ /yyyy-dd-mm/i ) {
$date
=
sprintf
"%4d-%02d-%02d"
,
$fullYear
,
$mday
,
$month_num
;
}
elsif
(
$option
=~ /yyyy-mmm-dd/i ) {
$date
=
sprintf
"%4d-%3s-%02d"
,
$fullYear
,
$month_txt
,
$mday
;
}
elsif
(
$option
=~ /full|unix/i ) {
$date
=
sprintf
"%3s %3s %2d %02d:%02d:%02d %d"
,
$day_txt
,
$month_txt
,
$mday
,
$hour
,
$min
,
$sec
,
$fullYear
;
}
elsif
(
$option
=~ /mdy/i ) {
$date
=
"$month_txt $mday, $fullYear"
;
}
elsif
(
$option
=~ /ymd/i ) {
$date
=
$year
.
"\l$month_txt$mday"
;
}
elsif
(
$option
=~ /dmy/i ) {
$date
=
$mday
.
"\l$month_txt$year"
;
}
elsif
(
$option
=~ /md/i ) {
$date
=
"\l$month_txt$mday"
;
}
elsif
(
$option
=~ /d-m-y/i ) {
$date
=
"$mday-$month_txt-$fullYear"
;
}
elsif
(
$option
=~ /d m y/i ) {
$date
=
"$mday $month_txt $fullYear"
;
}
elsif
(
$option
=~ /year/i ) {
$date
=
$fullYear
;
}
elsif
(
$option
=~ /dmy/i ) {
$date
=
$mday
.
'-'
.
$month_txt
.
'-'
.
$fullYear
;
}
elsif
(
$option
and
$option
!~ /hms/i) {
STDERR
"\n*** Unrecognized date format request: $option\n"
;
}
if
(
$option
=~ /hms/i and not
$converting
) {
$date
.=
" $hour:$min:$sec"
if
$date
;
$date
||=
"$hour:$min:$sec"
;
}
return
$date
||
join
(
" "
,
@date
);
}
=head2 month2num
Title : month2num
Purpose : Converts a string containing a name of a month to integer
: representing the number of the month in the year.
Example : $Util->month2num("march"); # returns 3
Argument : The string argument must contain at least the first
: three characters of the month's name. Case insensitive.
Throws : Exception if the conversion fails.
=cut
#--------------'
sub
month2num {
#--------------
my
(
$self
,
$str
) =
@_
;
# Get string in proper format for conversion.
$str
=
substr
(
$str
, 0, 3);
for
my
$month
(0..
$#MONTHS
) {
return
$month
+1
if
$str
=~ /
$MONTHS
[
$month
]/i;
}
$self
->throw(
"Invalid month name: $str"
);
}
=head2 num2month
Title : num2month
Purpose : Does the opposite of month2num.
: Converts a number into a string containing a name of a month.
Example : $Util->num2month(3); # returns 'Mar'
Throws : Exception if supplied number is out of range.
=cut
#-------------
sub
num2month {
#-------------
my
(
$self
,
$num
) =
@_
;
$self
->throw(
"Month out of range: $num"
)
if
$num
< 1 or
$num
> 12;
return
$MONTHS
[
$num
-1];
}
=head2 compress
Title : compress
Usage : $Util->compress(full-path-filename);
: $Util->compress(<named parameters>);
Purpose : Compress a file.
Example : $Util->compress("/usr/people/me/data.txt");
: $Util->compress(-file=>"/usr/people/me/data.txt",
: -tmp=>1,
: -outfile=>"/usr/people/share/data.txt.gz",
: -exe=>"/usr/local/bin/fancyzip");
Returns : String containing full, absolute path to compressed file
Argument : Named parameters (case-insensitive):
: -FILE => String (name of file to be compressed, full path).
: If the supplied filename ends with '.gz' or '.Z',
: that extension will be removed before attempting to compress.
: Optional:
: -TMP => boolean. If true, (or if user is not the owner of the file)
: the file is compressed to a temp file. If false, file may be
: clobbered with the compressed version (if using a utility like
: gzip, which is the default)
: -OUTFILE => String (name of the output compressed file, full path).
: -EXE => Name of executable for compression utility to use.
: Will supersede those in @COMPRESSION_UTILS defined by
: this module. If the absolute path to the executable is not provided,
: it will be searched in the PATH env variable.
Throws : Exception if file cannot be compressed.
: If user is not owner of the file, generates a warning and compresses to
: a tmp file. To avoid this warning, use the -o file test operator
: and call this function with -TMP=>1.
Comments : Attempts to compress using utilities defined in the @COMPRESSION_UTILS
: defined by this module, in the order defined. The first utility that is
: found to be executable will be used. Any utility defined in optional -EXE param
: will be tested for executability first.
: To minimize security risks, the -EXE parameter value is untained using
: the untaint() method of this module (in 'relaxed' mode to permit path separators).
See Also : L<uncompress()|uncompress>
=cut
#------------'
sub
compress {
#------------
my
(
$self
,
@args
) =
@_
;
# This method formerly didn't use named params and expected fileName, tmp
# in that order. This should be backward compatibile.
my
(
$fileName
,
$tmp
,
$outfile
,
$exe
) =
$self
->_rearrange([
qw(FILE TMP OUTFILE EXE)
],
@args
);
my
(
$file
,
$get
,
$fmt
);
# in case the supplied name already has a compressed extension
if
(
$fileName
=~ /(\.gz|\.Z|\.bz2|\.zip)$/) {
$fileName
=~ s/$1$//; };
$self
->debug(
"compressing file $fileName"
);
my
@util_to_use
=
@COMPRESSION_UTILS
;
if
(
defined
$exe
){
$exe
=
$self
->untaint(
$exe
, 1);
unshift
@util_to_use
,
$exe
;
}
my
@checked
=
@util_to_use
;
$exe
||=
''
;
while
(not -x
$exe
and
scalar
(
@util_to_use
)) {
$exe
=
$self
->find_exe(
shift
@util_to_use
);
}
unless
(-x
$exe
) {
$self
->throw(
"Can't find compression utility. Looked for @checked"
);
}
my
(
$compressed
,
@cmd
,
$handle
);
if
(
defined
(
$outfile
) or
$tmp
or not -o
$fileName
) {
if
(
defined
$outfile
) {
$compressed
=
$outfile
;
}
else
{
# obtain a temporary file name (not using the handle)
# and insert some special text to flag it as a bioperl-based temp file
my
$io
= Bio::Root::IO->new();
(
$handle
,
$compressed
) =
$io
->tempfile();
$compressed
.=
'.tmp.bioperl.gz'
;
}
# Use double quotes if executable path have empty spaces
if
(
$exe
=~ m/ /) {
$exe
=
"\"$exe\""
;
}
if
(
$exe
=~ /gzip|bzip2|compress/) {
@cmd
= (
"$exe -f < \"$fileName\" > \"$compressed\""
);
}
elsif
(
$exe
eq
'zip'
) {
@cmd
= (
"$exe -r \"$fileName.zip\" \"$fileName\""
);
}
not
$tmp
and
$self
->
warn
(
"Not owner of file $fileName. Compressing to temp file $compressed."
);
$tmp
= 1;
}
else
{
# Need to compute the compressed name based on exe since we're returning it.
$compressed
=
$fileName
;
if
(
$exe
=~ /gzip/) {
$compressed
.=
'.gz'
;
}
elsif
(
$exe
=~ /bzip2/) {
$compressed
.=
'.bz2'
;
}
elsif
(
$exe
=~ /zip/) {
$compressed
.=
'.zip'
;
}
elsif
(
$exe
=~ /compress/) {
$compressed
.=
'.Z'
;
}
if
(
$exe
=~ /gzip|bzip2|compress/) {
@cmd
= (
$exe
,
'-f'
,
$fileName
);
}
elsif
(
$exe
eq
'zip'
) {
@cmd
= (
$exe
,
'-r'
,
"$compressed"
,
$fileName
);
}
}
if
(
system
(
@cmd
) != 0) {
$self
->throw(
-class
=>
'Bio::Root::SystemException'
,
-text
=>
"Failed to compress file $fileName using $exe: $!"
);
}
return
$compressed
;
}
=head2 uncompress
Title : uncompress
Usage : $Util->uncompress(full-path-filename);
: $Util->uncompress(<named parameters>);
Purpose : Uncompress a file.
Example : $Util->uncompress("/usr/people/me/data.txt");
: $Util->uncompress(-file=>"/usr/people/me/data.txt.gz",
: -tmp=>1,
: -outfile=>"/usr/people/share/data.txt",
: -exe=>"/usr/local/bin/fancyzip");
Returns : String containing full, absolute path to uncompressed file
Argument : Named parameters (case-insensitive):
: -FILE => String (name of file to be uncompressed, full path).
: If the supplied filename ends with '.gz' or '.Z',
: that extension will be removed before attempting to uncompress.
: Optional:
: -TMP => boolean. If true, (or if user is not the owner of the file)
: the file is uncompressed to a temp file. If false, file may be
: clobbered with the uncompressed version (if using a utility like
: gzip, which is the default)
: -OUTFILE => String (name of the output uncompressed file, full path).
: -EXE => Name of executable for uncompression utility to use.
: Will supersede those in @UNCOMPRESSION_UTILS defined by
: this module. If the absolute path to the executable is not provided,
: it will be searched in the PATH env variable.
Throws : Exception if file cannot be uncompressed.
: If user is not owner of the file, generates a warning and uncompresses to
: a tmp file. To avoid this warning, use the -o file test operator
: and call this function with -TMP=>1.
Comments : Attempts to uncompress using utilities defined in the @UNCOMPRESSION_UTILS
: defined by this module, in the order defined. The first utility that is
: found to be executable will be used. Any utility defined in optional -EXE param
: will be tested for executability first.
: To minimize security risks, the -EXE parameter value is untained using
: the untaint() method of this module (in 'relaxed' mode to permit path separators).
See Also : L<compress()|compress>
=cut
#------------'
sub
uncompress {
#------------
my
(
$self
,
@args
) =
@_
;
# This method formerly didn't use named params and expected fileName, tmp
# in that order. This should be backward compatibile.
my
(
$fileName
,
$tmp
,
$outfile
,
$exe
) =
$self
->_rearrange([
qw(FILE TMP OUTFILE EXE)
],
@args
);
my
(
$file
,
$get
,
$fmt
);
# in case the supplied name lacks a compressed extension
if
(not
$fileName
=~ /(\.gz|\.Z|\.bz2|\.zip)$/) {
$fileName
.= $1; };
$self
->debug(
"uncompressing file $fileName"
);
my
@util_to_use
=
@UNCOMPRESSION_UTILS
;
if
(
defined
$exe
){
$exe
=
$self
->untaint(
$exe
, 1);
unshift
@util_to_use
,
$exe
;
}
$exe
||=
''
;
while
(not -x
$exe
and
scalar
(
@util_to_use
)) {
$exe
=
$self
->find_exe(
shift
@util_to_use
);
}
unless
(-x
$exe
) {
$self
->throw(
"Can't find compression utility. Looked for @util_to_use"
);
}
my
(
$uncompressed
,
@cmd
,
$handle
);
$uncompressed
=
$fileName
;
$uncompressed
=~ s/\.\w+$//;
if
(
defined
(
$outfile
) or
$tmp
or not -o
$fileName
) {
if
(
defined
$outfile
) {
$uncompressed
=
$outfile
;
}
else
{
# obtain a temporary file name (not using the handle)
my
$io
= Bio::Root::IO->new();
(
$handle
,
$uncompressed
) =
$io
->tempfile();
# insert some special text to flag it as a bioperl-based temp file
$uncompressed
.=
'.tmp.bioperl'
;
}
# Use double quotes if executable path have empty spaces
if
(
$exe
=~ m/ /) {
$exe
=
"\"$exe\""
;
}
if
(
$exe
=~ /gunzip|bunzip2|uncompress/) {
@cmd
= (
"$exe -f < \"$fileName\" > \"$uncompressed\""
);
}
elsif
(
$exe
=~ /gzip/) {
@cmd
= (
"$exe -df < \"$fileName\" > \"$uncompressed\""
);
}
elsif
(
$exe
eq
'unzip'
) {
@cmd
= (
"$exe -p \"$fileName\" > \"$uncompressed\""
);
}
not
$tmp
and
$self
->
warn
(
"Not owner of file $fileName. Uncompressing to temp file $uncompressed."
);
$tmp
= 1;
}
else
{
if
(
$exe
=~ /gunzip|bunzip2|uncompress/) {
@cmd
= (
$exe
,
'-f'
,
$fileName
);
}
elsif
(
$exe
=~ /gzip/) {
@cmd
= (
$exe
,
'-df'
,
$fileName
);
}
elsif
(
$exe
eq
'zip'
) {
@cmd
= (
$exe
,
$fileName
);
}
}
if
(
system
(
@cmd
) != 0) {
$self
->throw(
-class
=>
'Bio::Root::SystemException'
,
-text
=>
"Failed to uncompress file $fileName using $exe: $!"
);
}
return
$uncompressed
;
}
=head2 file_date
Title : file_date
Usage : $Util->file_date( filename [,date_format])
Purpose : Obtains the date of a given file.
: Provides flexible formatting via date_format().
Returns : String = date of the file as: yyyy-mm-dd (e.g., 1997-10-15)
Argument : filename = string, full path name for file
: date_format = string, desired format for date (see date_format()).
: Default = yyyy-mm-dd
Thows : Exception if no file is provided or does not exist.
Comments : Uses the mtime field as obtained by stat().
=cut
#--------------
sub
file_date {
#--------------
my
(
$self
,
$file
,
$fmt
) =
@_
;
$self
->throw(
"No such file: $file"
)
if
not
$file
or not -e
$file
;
$fmt
||=
'yyyy-mm-dd'
;
my
@file_data
=
stat
(
$file
);
return
$self
->date_format(
$fmt
,
$file_data
[9]);
# mtime field
}
=head2 untaint
Title : untaint
Purpose : To remove nasty shell characters from untrusted data
: and allow a script to run with the -T switch.
: Potentially dangerous shell meta characters: &;`'\"|*?!~<>^()[]{}$\n\r
: Accept only the first block of contiguous characters:
: Default allowed chars = "-\w.', ()"
: If $relax is true = "-\w.', ()\/=%:^<>*"
Usage : $Util->untaint($value, $relax)
Returns : String containing the untained data.
Argument: $value = string
: $relax = boolean
Comments:
This general untaint() function may not be appropriate for every situation.
To allow only a more restricted subset of special characters
(for example, untainting a regular expression), then using a custom
untainting mechanism would permit more control.
Note that special trusted vars (like $0) require untainting.
=cut
#------------`
sub
untaint {
#------------
my
(
$self
,
$value
,
$relax
) =
@_
;
$relax
||= 0;
my
$untainted
;
$self
->debug(
"\nUNTAINT: $value\n"
);
unless
(
defined
$value
and
$value
ne
''
) {
return
$value
;
}
if
(
$relax
) {
$value
=~ /([-\w.\', ()\/=%:^<>*]+)/;
$untainted
= $1
# } elsif( $relax == 2 ) { # Could have several degrees of relax.
# $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
# $untainted = $1
}
else
{
$value
=~ /([-\w.\', ()]+)/;
$untainted
= $1
}
$self
->debug(
"UNTAINTED: $untainted\n"
);
$untainted
;
}
=head2 mean_stdev
Title : mean_stdev
Usage : ($mean, $stdev) = $Util->mean_stdev( @data )
Purpose : Calculates the mean and standard deviation given a list of numbers.
Returns : 2-element list (mean, stdev)
Argument : list of numbers (ints or floats)
Thows : n/a
=cut
#---------------
sub
mean_stdev {
#---------------
my
(
$self
,
@data
) =
@_
;
return
(
undef
,
undef
)
if
not
@data
;
# case of empty @data list
my
$mean
= 0;
my
$N
= 0;
foreach
my
$num
(
@data
) {
$mean
+=
$num
;
$N
++
}
$mean
/=
$N
;
my
$sum_diff_sqd
= 0;
foreach
my
$num
(
@data
) {
$sum_diff_sqd
+= (
$mean
-
$num
) * (
$mean
-
$num
);
}
# if only one element in @data list, unbiased stdev is undefined
my
$stdev
=
$N
<= 1 ?
undef
:
sqrt
(
$sum_diff_sqd
/ (
$N
-1) );
return
(
$mean
,
$stdev
);
}
=head2 count_files
Title : count_files
Purpose : Counts the number of files/directories within a given directory.
: Also reports the number of text and binary files in the dir
: as well as names of these files and directories.
Usage : count_files(\%data)
: $data{-DIR} is the directory to be analyzed. Default is ./
: $data{-PRINT} = 0|1; if 1, prints results to STDOUT, (default=0).
Argument : Hash reference (empty)
Returns : n/a;
: Modifies the hash ref passed in as the sole argument.
: $$href{-TOTAL} scalar
: $$href{-NUM_TEXT_FILES} scalar
: $$href{-NUM_BINARY_FILES} scalar
: $$href{-NUM_DIRS} scalar
: $$href{-T_FILE_NAMES} array ref
: $$href{-B_FILE_NAMES} array ref
: $$href{-DIRNAMES} array ref
=cut
#----------------
sub
count_files {
#----------------
my
$self
=
shift
;
my
$href
=
shift
;
# Reference to an empty hash.
my
(
$name
,
@fileLine
);
my
$dir
=
$$href
{-DIR} ||
'./'
;
# THIS IS UNIX SPECIFIC? FIXME/TODO
my
=
$$href
{-PRINT} || 0;
### Make sure $dir ends with /
$dir
!~ m{/$} and
do
{
$dir
.=
'/'
;
$$href
{-DIR} =
$dir
; };
open
(
my
$PIPE
,
"ls -1 $dir |"
) ||
$self
->throw(
"Can't open input pipe: $!"
);
### Initialize the hash data.
$$href
{-TOTAL} = 0;
$$href
{-NUM_TEXT_FILES} =
$$href
{-NUM_BINARY_FILES} =
$$href
{-NUM_DIRS} = 0;
$$href
{-T_FILE_NAMES} = [];
$$href
{-B_FILE_NAMES} = [];
$$href
{-DIR_NAMES} = [];
while
(
my
$line
= <
$PIPE
> ) {
chomp
();
$$href
{-TOTAL}++;
if
( -T
$dir
.
$line
) {
$$href
{-NUM_TEXT_FILES}++;
push
@{
$$href
{-T_FILE_NAMES}},
$line
; }
if
( -B
$dir
.
$line
and not -d
$dir
.
$line
) {
$$href
{-NUM_BINARY_FILES}++;
push
@{
$$href
{-B_FILE_NAMES}},
$line
; }
if
( -d
$dir
.
$line
) {
$$href
{-NUM_DIRS}++;
push
@{
$$href
{-DIR_NAMES}},
$line
; }
}
close
$PIPE
;
if
(
) {
printf
(
"\n%4d %s\n"
,
$$href
{-TOTAL},
"total files+dirs in $dir"
);
printf
(
"%4d %s\n"
,
$$href
{-NUM_TEXT_FILES},
"text files"
);
printf
(
"%4d %s\n"
,
$$href
{-NUM_BINARY_FILES},
"binary files"
);
printf
(
"%4d %s\n"
,
$$href
{-NUM_DIRS},
"directories"
);
}
}
=head2 file_info
Title : file_info
Purpose : Obtains a variety of date for a given file.
: Provides an interface to Perl's stat().
Status : Under development. Not ready. Don't use!
=cut
#--------------
sub
file_info {
#--------------
my
(
$self
,
%param
) =
@_
;
my
(
$file
,
$get
,
$fmt
) =
$self
->_rearrange([
qw(FILE GET FMT)
],
%param
);
$get
||=
'all'
;
$fmt
||=
'yyyy-mm-dd'
;
my
(
$dev
,
$ino
,
$mode
,
$nlink
,
$uid
,
$gid
,
$rdev
,
$size
,
$atime
,
$mtime
,
$ctime
,
$blksize
,
$blocks
) =
stat
$file
;
if
(
$get
=~ /date/i) {
## I can get the elapsed time since the file was modified but
## it's not so straightforward to get the date in a nice format...
## Think about using a standard CPAN module for this, like
## Date::Manip or Date::DateCalc.
my
$date
=
$mtime
;
my
$elsec
=
time
-
$mtime
;
printf
"\nFile age: %.0f sec %.0f hrs %.0f days"
,
$elsec
,
$elsec
/3600,
$elsec
/(3600*24);<STDIN>;
my
$days
=
sprintf
"%.0f"
,
$elsec
/(3600*24);
}
elsif
(
$get
eq
'all'
) {
return
stat
$file
;
}
}
=head2 delete
Title : delete
Purpose :
=cut
#------------
sub
delete
{
#------------
my
$self
=
shift
;
my
$fileName
=
shift
;
if
(not -e
$fileName
) {
$self
->throw(
"Could not delete file '$fileName': Does not exist."
);
}
elsif
(not -o
$fileName
) {
$self
->throw(
"Could not delete file '$fileName': Not owner."
);
}
my
$ulval
=
unlink
(
$fileName
) > 0
or
$self
->throw(
"Failed to delete file '$fileName': $!"
);
}
=head2 create_filehandle
Usage : $object->create_filehandle(<named parameters>);
Purpose : Create a FileHandle object from a file or STDIN.
: Mainly used as a helper method by read() and get_newline().
Example : $data = $object->create_filehandle(-FILE =>'usr/people/me/data.txt')
Argument : Named parameters (case-insensitive):
: (all optional)
: -CLIENT => object reference for the object submitting
: the request. Default = $Util.
: -FILE => string (full path to file) or a reference
: to a FileHandle object or typeglob. This is an
: optional parameter (if not defined, STDIN is used).
Returns : Reference to a FileHandle object.
Throws : Exception if cannot open a supplied file or if supplied with a
: reference that is not a FileHandle ref.
Comments : If given a FileHandle reference, this method simply returns it.
: This method assumes the user wants to read ascii data. So, if
: the file is binary, it will be treated as a compressed (gzipped)
: file and access it using gzip -ce. The problem here is that not
: all binary files are necessarily compressed. Therefore,
: this method should probably have a -mode parameter to
: specify ascii or binary.
See Also : L<get_newline()|get_newline>
=cut
#---------------------
sub
create_filehandle {
#---------------------
my
(
$self
,
@param
) =
@_
;
my
(
$client
,
$file
,
$handle
) =
$self
->_rearrange([
qw( CLIENT FILE HANDLE )
],
@param
);
if
(not
ref
$client
) {
$client
=
$self
; }
$file
||=
$handle
;
if
(
$client
->can(
'file'
)) {
$file
=
$client
->file(
$file
);
}
my
$FH
;
my
(
$handle_ref
);
if
(
$handle_ref
=
ref
(
$file
)) {
if
(
$handle_ref
eq
'FileHandle'
) {
$FH
=
$file
;
$client
->{
'_input_type'
} =
"FileHandle"
;
}
elsif
(
$handle_ref
eq
'GLOB'
) {
$FH
=
$file
;
$client
->{
'_input_type'
} =
"Glob"
;
}
else
{
$self
->throw(
-class
=>
'Bio::Root::IOException'
,
-text
=>
"Could not read file '$file': Not a FileHandle or GLOB ref."
);
}
$self
->verbose > 0 and
printf
STDERR
"$ID: reading data from FileHandle\n"
;
}
elsif
(
$file
) {
$client
->{
'_input_type'
} =
"FileHandle for $file"
;
# Use gzip -cd to access compressed data.
if
( -B
$file
) {
$client
->{
'_input_type'
} .=
" (compressed)"
;
my
$gzip
=
$self
->find_exe(
'gzip'
);
$file
=
"$gzip -cd $file |"
}
$FH
= FileHandle->new();
open
(
$FH
,
$file
) ||
$self
->throw(
-class
=>
'Bio::Root::FileOpenException'
,
-text
=>
"Could not access data file '$file': $!"
);
$self
->verbose > 0 and
printf
STDERR
"$ID: reading data from file '$file'\n"
;
}
else
{
# Read from STDIN.
$FH
= \
*STDIN
;
$self
->verbose > 0 and
printf
STDERR
"$ID: reading data from STDIN\n"
;
$client
->{
'_input_type'
} =
"STDIN"
;
}
return
$FH
;
}
=head2 get_newline
Usage : $object->get_newline(<named parameters>);
Purpose : Determine the character(s) used for newlines in a given file or
: input stream. Delegates to Bio::Root::Utilities::get_newline()
Example : $data = $object->get_newline(-CLIENT => $anObj,
: -FILE =>'usr/people/me/data.txt')
Argument : Same arguemnts as for create_filehandle().
Returns : Reference to a FileHandle object.
Throws : Propagates any exceptions thrown by Bio::Root::Utilities::get_newline().
See Also : L<taste_file()|taste_file>, L<create_filehandle()|create_filehandle>
=cut
#-----------------
sub
get_newline {
#-----------------
my
(
$self
,
@param
) =
@_
;
return
$NEWLINE
if
defined
$NEWLINE
;
my
(
$client
) =
$self
->_rearrange([
qw( CLIENT )
],
@param
);
my
$FH
=
$self
->create_filehandle(
@param
);
if
(not
ref
$client
) {
$client
=
$self
; }
if
(
$client
->{
'_input_type'
} =~ /STDIN|Glob|compressed/) {
# Can't taste from STDIN since we can't seek 0 on it.
# Are other non special Glob refs seek-able?
# Attempt to guess newline based on platform.
# Not robust since we could be reading Unix files on a Mac, e.g.
if
(
defined
$ENV
{
'MACPERL'
}) {
$NEWLINE
=
"\015"
;
# \r
}
else
{
$NEWLINE
=
"\012"
;
# \n
}
}
else
{
$NEWLINE
=
$self
->taste_file(
$FH
);
}
close
(
$FH
)
unless
(
$client
->{
'_input_type'
} eq
'STDIN'
||
$client
->{
'_input_type'
} eq
'FileHandle'
||
$client
->{
'_input_type'
} eq
'Glob'
);
delete
$client
->{
'_input_type'
};
return
$NEWLINE
||
$DEFAULT_NEWLINE
;
}
=head2 taste_file
Usage : $object->taste_file( <FileHandle> );
: Mainly a utility method for get_newline().
Purpose : Sample a filehandle to determine the character(s) used for a newline.
Example : $char = $Util->taste_file($FH)
Argument : Reference to a FileHandle object.
Returns : String containing an octal represenation of the newline character string.
: Unix = "\012" ("\n")
: Win32 = "\012\015" ("\r\n")
: Mac = "\015" ("\r")
Throws : Exception if no input is read within $TIMEOUT_SECS seconds.
: Exception if argument is not FileHandle object reference.
: Warning if cannot determine neewline char(s).
Comments : Based on code submitted by Vicki Brown (vlb@deltagen.com).
See Also : L<get_newline()|get_newline>
=cut
#---------------
sub
taste_file {
#---------------
my
(
$self
,
$FH
) =
@_
;
my
$BUFSIZ
= 256;
# Number of bytes read from the file handle.
my
(
$buffer
,
$octal
,
$str
,
$irs
,
$i
);
ref
(
$FH
) eq
'FileHandle'
or
$self
->throw(
"Can't taste file: not a FileHandle ref"
);
$buffer
=
''
;
# this is a quick hack to check for availability of alarm(); just copied
# from Bio/Root/IOManager.pm HL 02/19/01
my
$alarm_available
= 1;
eval
{
alarm
(0);
};
if
($@) {
# alarm() not available (ActiveState perl for win32 doesn't have it.
# See jitterbug PR#98)
$alarm_available
= 0;
}
$SIG
{ALRM} =
sub
{
die
"Timed out!"
; };
my
$result
;
eval
{
$alarm_available
&&
alarm
(
$TIMEOUT_SECS
);
$result
=
read
(
$FH
,
$buffer
,
$BUFSIZ
);
# read the $BUFSIZ characters of file
$alarm_available
&&
alarm
(0);
};
if
($@ =~ /Timed out!/) {
$self
->throw(
"Timed out while waiting for input."
,
"Timeout period = $TIMEOUT_SECS seconds.\n"
.
"For longer time before timing out, edit \$TIMEOUT_SECS in Bio::Root::Utilities.pm."
);
}
elsif
(not
$result
) {
my
$err
= $@;
$self
->throw(
"read taste failed to read from FileHandle."
,
$err
);
}
elsif
($@ =~ /\S/) {
my
$err
= $@;
$self
->throw(
"Unexpected error during read: $err"
);
}
seek
(
$FH
, 0, 0) or
$self
->throw(
"seek failed to seek 0 on FileHandle."
);
my
@chars
=
split
(//,
$buffer
);
my
$flavor
;
for
(
$i
= 0;
$i
<
$BUFSIZ
;
$i
++) {
if
((
$chars
[
$i
] eq
"\012"
)) {
unless
(
$chars
[
$i
-1] eq
"\015"
) {
$flavor
=
'Unix'
;
$octal
=
"\012"
;
$str
=
'\n'
;
$irs
=
"^J"
;
last
;
}
}
elsif
((
$chars
[
$i
] eq
"\015"
) && (
$chars
[
$i
+1] eq
"\012"
)) {
$flavor
=
'DOS'
;
$octal
=
"\015\012"
;
$str
=
'\r\n'
;
$irs
=
"^M^J"
;
last
;
}
elsif
((
$chars
[
$i
] eq
"\015"
)) {
$flavor
=
'Mac'
;
$octal
=
"\015"
;
$str
=
'\r'
;
$irs
=
"^M"
;
last
;
}
}
if
(not
$octal
) {
$self
->
warn
(
"Could not determine newline char. Using '\012'"
);
$octal
=
"\012"
;
}
else
{
#print STDERR "FLAVOR=$flavor, NEWLINE CHAR = $irs\n";
}
return
(
$octal
);
}
=head2 file_flavor
Usage : $object->file_flavor( <filename> );
Purpose : Returns the 'flavor' of a given file (unix, dos, mac)
Example : print "$file has flavor: ", $Util->file_flavor($file);
Argument : filename = string, full path name for file
Returns : String describing flavor of file and handy info about line endings.
: One of these is returned:
: unix (\n or 012 or ^J)
: dos (\r\n or 015,012 or ^M^J)
: mac (\r or 015 or ^M)
: unknown
Throws : Exception if argument is not a file
: Propagates any exceptions thrown by Bio::Root::Utilities::get_newline().
See Also : L<get_newline()|get_newline>, L<taste_file()|taste_file>
=cut
#---------------
sub
file_flavor {
#---------------
my
(
$self
,
$file
) =
@_
;
my
%flavors
=(
"\012"
=>
'unix (\n or 012 or ^J)'
,
"\015\012"
=>
'dos (\r\n or 015,012 or ^M^J)'
,
"\015"
=>
'mac (\r or 015 or ^M)'
);
-f
$file
or
$self
->throw(
"Could not determine flavor: arg '$file' is either non existant or is not a file.\n"
);
my
$octal
=
$self
->get_newline(
$file
);
my
$flavor
=
$flavors
{
$octal
} ||
"unknown"
;
return
$flavor
;
}
######################################
##### Mail Functions ########
######################################
=head2 mail_authority
Title : mail_authority
Usage : $Util->mail_authority( $message )
Purpose : Syntactic sugar to send email to $Bio::Root::Global::AUTHORITY
See Also : L<send_mail()|send_mail>
=cut
#---------------
sub
mail_authority {
#---------------
my
(
$self
,
$message
) =
@_
;
my
$script
=
$self
->untaint($0,1);
my
=
$self
->{
'_auth_email'
} ||
$AUTHORITY
;
if
(
defined
) {
$self
->send_mail(
-TO
=>
$AUTHORITY
,
-SUBJ
=>
$script
,
-MSG
=>
$message
);
}
else
{
$self
->throw(
"Can't email authority. No email defined."
);
}
}
=head2 authority
Title : authority
Usage : $Util->authority('admin@example.com');
Purpose : Set/get the email address that should be notified by mail_authority()
See Also : L<mail_authority()|mail_authority>
=cut
#-------------
sub
authority {
#-------------
my
(
$self
,
) =
@_
;
$self
->{
'_auth_email'
} =
if
defined
;
return
$self
->{
'_auth_email'
};
}
=head2 send_mail
Title : send_mail
Usage : $Util->send_mail( named_parameters )
Purpose : Provides an interface to mail or sendmail, if available
Returns : n/a
Argument : Named parameters: (case-insensitive)
: -TO => e-mail address to send to
: -SUBJ => subject for message (optional)
: -MSG => message to be sent (optional)
: -CC => cc: e-mail address (optional)
Thows : Exception if TO: address appears bad or is missing.
: Exception if mail cannot be sent.
Comments : Based on TomC's tip at:
:
: Using default 'From:' information.
: sendmail options used:
: -t: ignore the address given on the command line and
: get To:address from the e-mail header.
: -oi: prevents send_mail from ending the message if it
: finds a period at the start of a line.
See Also : L<mail_authority()|mail_authority>
=cut
#-------------
sub
send_mail {
#-------------
my
(
$self
,
@param
) =
@_
;
my
(
$recipient
,
$subj
,
$message
,
$cc
) =
$self
->_rearrange([
qw(TO SUBJ MSG CC)
],
@param
);
$self
->throw(
"Invalid or missing e-mail address: $recipient"
)
if
not
$recipient
=~ /\S+\@\S+/;
$subj
||=
'empty subject'
;
$message
||=
''
;
# Best to use mail rather than sendmail. Permissions on sendmail in
# linux distros have been significantly locked down in recent years,
# due to the perception that it is insecure.
my
(
$exe
,
$ccinfo
);
if
(
$exe
=
$self
->find_exe(
'mail'
)) {
if
(
defined
$cc
) {
$ccinfo
=
"-c $cc"
;
}
$self
->debug(
"send_mail: $exe -s '$subj' $ccinfo $recipient\n"
);
open
(MAIL,
"| $exe -s '$subj' $ccinfo $recipient"
) ||
$self
->throw(
"Can't send email: mail cannot fork: $!"
);
<<QQ_EOFM_QQ;
$message
QQ_EOFM_QQ
$? and
$self
->
warn
(
"mail didn't exit nicely: $?"
);
close
(MAIL);
}
elsif
(
$exe
=
$self
->find_exe(
'sendmail'
)) {
open
(SENDMAIL,
"| $exe -oi -t"
) ||
$self
->throw(
"Can't send email: sendmail cannot fork: $!"
);
SENDMAIL
<<QQ_EOFSM_QQ;
To: $recipient
Subject: $subj
Cc: $cc
$message
QQ_EOFSM_QQ
$? and
$self
->
warn
(
"sendmail didn't exit nicely: $?"
);
close
(SENDMAIL);
}
else
{
$self
->throw(
"Can't find executable for mail or sendmail."
);
}
}
=head2 find_exe
Title : find_exe
Usage : $Util->find_exe(name);
Purpose : Locate an executable (for use in a system() call, e.g.))
Example : $Util->find_exe("gzip");
Returns : String containing executable that passes the -x test.
Returns undef if an executable of the supplied name cannot be found.
Argument : Name of executable to be found.
: Can be a full path. If supplied name is not executable, an executable
: of that name will be searched in all directories in the currently
: defined PATH environment variable.
Throws : No exceptions, but issues a warning if multiple paths are found
: for a given name. The first one is used.
Comments : TODO: Confirm functionality on all bioperl-supported platforms.
May get tripped up by variation in path separator character used
for splitting ENV{PATH}.
See Also :
=cut
#------------
sub
find_exe {
#------------
my
(
$self
,
$name
) =
@_
;
my
@bindirs
;
if
($^O =~ m/mswin/i) {
@bindirs
=
split
';'
,
$ENV
{
'PATH'
};
# Add usual executable extension if missing or -x won't work
$name
.=
'.exe'
if
(
$name
!~ m/\.exe$/i);
}
else
{
@bindirs
=
split
':'
,
$ENV
{
'PATH'
};
}
my
$exe
=
$name
;
unless
(-x
$exe
) {
undef
$exe
;
my
@exes
;
foreach
my
$d
(
@bindirs
) {
# Note: Windows also understand '/' as folder separator,
# so there is no need to use a conditional with '\'
push
(
@exes
,
"$d/$name"
)
if
-x
"$d/$name"
;
}
if
(
scalar
@exes
) {
$exe
=
$exes
[0];
if
(
defined
$exes
[1]) {
$self
->
warn
(
"find_exe: Multiple paths to '$name' found. Using $exe."
);
}
}
}
return
$exe
;
}
######################################
### Interactive Functions #####
######################################
=head2 yes_reply
Title : yes_reply()
Usage : $Util->yes_reply( [query_string]);
Purpose : To test an STDIN input value for affirmation.
Example : print +( $Util->yes_reply('Are you ok') ? "great!\n" : "sorry.\n" );
: $Util->yes_reply('Continue') || die;
Returns : Boolean, true (1) if input string begins with 'y' or 'Y'
Argument: query_string = string to be used to prompt user (optional)
: If not provided, 'Yes or no' will be used.
: Question mark is automatically appended.
=cut
#-------------
sub
yes_reply {
#-------------
my
$self
=
shift
;
my
$query
=
shift
;
my
$reply
;
$query
||=
'Yes or no'
;
"\n$query? (y/n) [n] "
;
chomp
(
$reply
= <STDIN> );
$reply
=~ /^y/i;
}
=head2 request_data
Title : request_data()
Usage : $Util->request_data( [value_name]);
Purpose : To request data from a user to be entered via keyboard (STDIN).
Example : $name = $Util->request_data('Name');
: # User will see: % Enter Name:
Returns : String, (data entered from keyboard, sans terminal newline.)
Argument: value_name = string to be used to prompt user.
: If not provided, 'data' will be used, (not very helpful).
: Question mark is automatically appended.
=cut
#----------------
sub
request_data {
#----------------
my
$self
=
shift
;
my
$data
=
shift
||
'data'
;
"Enter $data: "
;
# Remove the terminal newline char.
chomp
(
$data
= <STDIN>);
$data
;
}
=head2 quit_reply
Title : quit_reply
Usage :
Purpose :
=cut
sub
quit_reply {
# Not much used since you can use request_data()
# and test for an empty string.
my
$self
=
shift
;
my
$reply
;
chop
(
$reply
= <STDIN> );
$reply
=~ /^
q.*/i;
}
=head2 verify_version
Purpose : Checks the version of Perl used to invoke the script.
: Aborts program
if
version is less than the
given
argument.
Usage : verify_version(
'5.000'
)
=cut
#------------------
sub
verify_version {
#------------------
my
$self
=
shift
;
my
$reqVersion
=
shift
;
$] <
$reqVersion
and
do
{
printf
STDERR (
"\a\n%s %0.3f.\n"
,
"** Sorry. This Perl script requires at least version"
,
$reqVersion
);
printf
STDERR (
"%s %0.3f %s\n\n"
,
"You are running Perl version"
, $],
"Please update your Perl!\n\n"
);
exit
(1);
}
}
1;
__END__