NAME
DiaColloDB::Utils - diachronic collocation database, generic utilities
SYNOPSIS
##========================================================================
## PRELIMINARIES
use DiaColloDB::Utils;
##========================================================================
## Functions: Fcntl
$flags = PACKAGE::fcflags($flags);
$flags = PACKAGE::fcgetfl($fh);
$bool = fcread($flags);
$bool = fcwrite($flags);
$bool = fctrunc($flags);
$bool = fccreat($flags);
$fh_or_undef = fcopen($file,$flags);
##========================================================================
## JSON: load
$data = PACKAGE::loadJsonString( $string,%opts);
$data = PACKAGE::loadJsonFile($filename_or_handle,%opts);
##========================================================================
## JSON: save
$str = PACKAGE::saveJsonString($data);
$bool = PACKAGE::saveJsonFile($data,$filename_or_handle,%opts);
##========================================================================
## Functions: env
\%setenv = PACKAGE::env_set(%setenv);
\%restored = PACKAGE::env_pop(%setenv);
##========================================================================
## Functions: run
$fh_or_undef = PACKAGE::opencmd($cmd);
$bool = crun(@IPC_Run_args);
$bool = csort_to(\@sortargs, \&catcher);
$bool = csortuc_to(\@sortargs, \&catcher);
$cmd = sortCmd();
##========================================================================
## Functions: pack filters
$len = PACKAGE::packsize($packfmt);
\&filter_sub = PACKAGE::packFilterStore($pack_template);
\&filter_sub = PACKAGE::packFilterFetch($pack_template);
##========================================================================
## Math stuff
$log2 = log2($x);
$max2 = max2($x,$y);
$min2 = min2($x,$y);
##========================================================================
## Functions: lists
\@l_uniq        = luniq(\@l);
\@l_sorted_uniq = sluniq(\@l_sorted);
\@l_uniq        = xluniq(\@l,\&keyfunc);
##========================================================================
## Functions: regexes
$re = regex($re_str);
##========================================================================
## Functions: html
$escaped = htmlesc($str);
##========================================================================
## Functions: time
$hms     = PACKAGE::s2hms($seconds,$sfmt="%06.3f");
$timestr = PACKAGE::s2timestr($seconds,$sfmt="%f");
$rfc_timestamp = PACAKGE->timestamp();
##========================================================================
## Functions: file
$mtime = PACKAGE->file_mtime($file_or_fh);
$timestamp = PACKAGE->file_timestamp($file_or_fh);
$nbytes = du_file(@filenames_or_fh);
$nbytes = du_glob(@globs);
$bool = PACKAGE->copyto  ($src_filename_or_array, $dstdir, %opts);
$bool = PACKAGE->copyto_a($src_filename_or_array, $dstdir, %opts);
$bool = PACKAGE->moveto  ($src_filename_or_array, $dstdir, %opts);
$bool = PACKAGE->cp_a    ($src_filename_or_array, $dstdir);
##========================================================================
## Utils: SI
$str = si_str($float);
##========================================================================
## Functions: pdl: setops
$pi = CLASS::_intersect_p($p1,$p2);
$pu = CLASS::_union_p($p1,$p2);
$pneg = CLASS::_complement_p($p,$N);
$pdiff = CLASS::_setdiff_p($a,$b,$N);
##========================================================================
## Functions: pdl
$pdl_or_undef = CLASS->readPdlFile($basename, %opts);
$bool = CLASS->writePdlFile($pdl_or_undef, $basename, %opts);
$bool = CLASS->writePdlHeader($filename, $type, $ndims, @dims);
$bool = CLASS->writeCcsHeader($filename, $itype, $vtype, $pdims, %opts);
$pdl  = mmzeroes($file?, $type?, @dims, \%opts?);
$bool = mmunlink(@mmfiles);
$type   = CLASS->mintype($pdl,    @types);
$maxval = $type->maxval();
($vals,$counts) = $pdl->valcounts();
##========================================================================
## Functions: temporaries
$tmpdir = CLASS->tmpdir();
$fh = CLASS->tmpfh();
$filename = CLASS->tmpfile();
\@tmparray = CLASS->tmparray($template, %opts);
\@tmparrayp = CLASS->tmparrayp($template, $packas, %opts);
\%tmphash = CLASS->tmphash($class, $template, %opts);
##========================================================================
## Functions: parallelization
$ncores   = CLASS->nCores();
$njobs    = CLASS->nJobs();
$sortjobs = CLASS->sortJobs();
DESCRIPTION
Globals
- Variable: @ISA
 - 
DiaColloDB::Utils inherits from Exporter and DiaColloDB::Logger.
 - Variable: %EXPORT_TAGS
 - 
Exportable tags:
fcntl => [qw(fcflags fcgetfl fcread fcwrite fctrunc fccreat fcperl fcopen)], json => [qw(loadJsonString loadJsonFile saveJsonString saveJsonFile)], sort => [qw(csort_to csortuc_to sortCmd)], run => [qw(crun opencmd)], env => [qw(env_set env_push env_pop)], pack => [qw(packsize packsingle packFilterFetch packFilterStore)], math => [qw($LOG2 log2 min2 max2)], list => [qw(luniq sluniq xluniq)], regex => [qw(regex)], html => [qw(htmlesc)], time => [qw(s2hms s2timestr timestamp)], file => [qw(file_mtime file_timestamp du_file du_glob copyto copyto_a moveto cp_a fh_flush fh_reopen)], si => [qw(si_str)], pdl => [qw(_intersect_p _union_p _complement_p _setdiff_p), qw(readPdlFile writePdlFile writePdlHeader writeCcsHeader mmzeroes mmtemp), qw(maxval mintype), ], temp => [qw($TMPDIR tmpdir tmpfh tmpfile tmparray tmparrayp tmphash)], jobs => [qw(nCores nJobs sortJobs)], - Variable: @EXPORT_OK
 - 
All symbols in %EXPORT_TAGS are exportable
 - Variable: @EXPORT
 - 
All symbols in %EXPORT_TAGS are exported by default.
 
Functions: Fcntl
- fcflags
 - 
$flags = PACKAGE::fcflags($flags);returns Fcntl flags for symbolic string $flags
 - fcgetfl
 - 
$flags = PACKAGE::fcgetfl($fh);returns Fcntl flags for filehandle $fh
 - fcread
 - 
$bool = fcread($flags);returns true if any read-bits are set for $flags
 - fcwrite
 - 
$bool = fcwrite($flags);returns true if any write-bits are set for $flags
 - fctrunc
 - 
$bool = fctrunc($flags);returns true if truncate-bits are set for $flags
 - fccreat
 - 
$bool = fccreat($flags);returns true iff creation flag is set for $flags.
 - fcperl
 - 
$str = fcperl($flags);returns perl mode-string corresponding to $flags.
 - fcopen
 - 
$fh_or_undef = fcopen($file,$flags); $fh_or_undef = fcopen($file,$flags,$mode,$perms)opens $file with Fcntl-style flags $flags.
 
JSON: load
- loadJsonString
 - 
$data = PACKAGE::loadJsonString( $string,%opts); $data = PACKAGE::loadJsonString(\$string,%opts)decodes JSON string. %opts are passed to JSON::from_json().
 - loadJsonFile
 - 
$data = PACKAGE::loadJsonFile($filename_or_handle,%opts);loads JSON data from a file or filehandle. %opts are passed to loadJsonString().
 
JSON: save
- saveJsonString
 - 
$str = PACKAGE::saveJsonString($data); $str = PACKAGE::saveJsonString($data,%opts);Encode data as a JSON string. %opts are passed to JSON::to_json(), e.g. (pretty=>0, canonical=>0)'.
 - saveJsonFile
 - 
$bool = PACKAGE::saveJsonFile($data,$filename_or_handle,%opts);Save JSON data to a file. %opts are passed to saveJsonString().
 
Functions: env
- Variable: @env_stack
 - 
Stack of temporary environment variables.
 - env_set
 - 
\%setenv = PACKAGE::env_set(%setenv);Set or clear environment variables.
 - env_push
 - 
\%oldvals = PACKAGE::env_push(%setenv);Push old values for keys(%setenv) to @env_stack and calls env_set(%setenv).
 - env_pop
 - 
\%restored = PACKAGE::env_pop(%setenv);Pops the most recent variable bindings from @env_stack and restores them to the environment.
 
Functions: run
- opencmd
 - 
$fh_or_undef = PACKAGE::opencmd($cmd); $fh_or_undef = PACKAGE::opencmd($mode,@argv);does log trace at level $TRACE_RUNCMD
 - crun
 - 
$bool = crun(@IPC_Run_args);wrapper for IPC::Run::run(@IPC_Run_args) with $ENV{LC_ALL}='C'
 - csort_to
 - 
$bool = csort_to(\@sortargs, \&catcher);runs system sort and feeds resulting lines to \&catcher
 - csortuc_to
 - 
$bool = csortuc_to(\@sortargs, \&catcher);runs system sort | uniq -c and feeds resulting lines to \&catcher
 - sortCmd
 - 
$cmd = sortCmd(); $cmd = sortCmd($nJobs);Returns command-line prefix (command and initial optopins) for GNU-like sort command. This method just returns the value of the
DIACOLLO_SORTenvironment variable if it is set, otherwise the value of theSORTenvironment variable if that is set. If neitherDIACOLLO_SORTnorSORTare set, it returns the string sort with the parallelization options returned by "sortJobs" appended. You can use the environment variable hooks e.g. to reduce the amount of RAM and/or CPU cores used by subordinate system sort calls by setting them appropriately, e.g.env SORT="/bin/sort --parallel=4 --buffer-size=1G"to request that GNU sort use at most 4 CPU cores and a maximum RAM buffer size of 1GB.
 
Functions: pack filters
- packsize
 - 
$len = PACKAGE::packsize($packfmt); $len = PACKAGE::packsize($packfmt,@args);get pack-size for $packfmt with args @args
 - packFilterStore
 - 
\&filter_sub = PACKAGE::packFilterStore($pack_template); \&filter_sub = PACKAGE::packFilterStore([$pack_template_store, $pack_template_fetch]); \&filter_sub = PACKAGE::packFilterStore([\&pack_code_store, \&pack_code_fetch]);returns a DB_File-style STORE-filter sub for transparent packing of data to $pack_template
 - packFilterFetch
 - 
\&filter_sub = PACKAGE::packFilterFetch($pack_template); \&filter_sub = PACKAGE::packFilterFetch([$pack_template_store, $pack_template_fetch]); \&filter_sub = PACKAGE::packFilterFetch([\&pack_code_store, \&pack_code_fetch]);returns a DB_File-style FETCH-filter sub for transparent unpacking of data from $pack_template.
 
Math stuff
- Variable: $LOG2
 - 
constant:
log(2)for binary logarithms. - log2
 - 
$log2 = log2($x);binary logarithm function.
 - max2
 - 
$max2 = max2($x,$y);maximum
 - min2
 - 
$min2 = min2($x,$y);minimum
 
Functions: lists
- luniq
 - 
\@l_uniq = luniq(\@l);returns sorted list of unique defined elements of @l; @l need not be sorted.
 - sluniq
 - 
\@l_sorted_uniq = sluniq(\@l_sorted);returns unique defined elements of pre-sorted list @l_sorted.
 - xluniq
 - 
\@l_uniq = xluniq(\@l,\&keyfunc);returns elements of @l with unique defined keys according to
\&keyfunc(default=\&overload::StrVal); returned list is sorted by\&keyfunc. 
Functions: regexes
- regex
 - 
$re = regex($re_str);parses regex $re_str, which can optionally be "/"-quoted. parses modifiers /[gimsadlu]. /g modifier is parsed a la ddc (match whole word).
 
Functions: html
- htmlesc
 - 
$escaped = htmlesc($str);escape an HTML string.
 
Functions: time
- s2hms
 - 
$hms = PACKAGE::s2hms($seconds,$sfmt="%06.3f"); ($h,$m,$s) = PACKAGE::s2hms($seconds,$sfmt="%06.3f");convert a time value in seconds to HH:MM:SS.SSSS format
 - s2timestr
 - 
$timestr = PACKAGE::s2timestr($seconds,$sfmt="%f");convert a time value in seconds to H?M?S.SSSS format
 - timestamp
 - 
$rfc_timestamp = PACAKGE->timestamp(); $rfc_timestamp = PACAKGE->timestamp($time);Return a UTC ISO-8601 timestamp format "%Y-%m-%dT%H:%M%SZ" for the UNIX time $time.
 
Functions: file
- file_mtime
 - 
$mtime = PACKAGE->file_mtime($file_or_fh);get mtime (last modification time) for $file_or_fh.
 - file_timestamp
 - 
$timestamp = PACKAGE->file_timestamp($file_or_fh);get an ISO-8601 timestamp for mtime of $file_or_fh.
 - du_file
 - 
$nbytes = du_file(@filenames_or_fhs);return number of bytes used by @filesnames_or_fhs
 - du_glob
 - 
$nbytes = du_glob(@globs);return number of bytes used by files matching and $glob in @globs
 - copyto
 - 
$bool = PACKAGE->moveto($src_filename_or_array, $dstdir, %opts);Copies source file(s) to
$dstdir, creating$dstdirif it doesn't already exist. Argument$src_filename_or_arraymay be either a ARRAY-ref of filenames to be copied or a single scalar filename. By default, files are copied using File::Copy::copy(). Options %opts:from => $from, ##-- replace prefix $from in file(s) with $todir; default=undef: flat copy to $todir method => \&method, ##-- use CODE-ref \&method as underlying copy routing; default=\&File::Copy::copy label => $label, ##-- report errors as '$label'; (default='copyto()') - copyto_a
 - 
$bool = PACKAGE->copyto_a($src_filename_or_array, $dstdir, %opts);Wrapper for
PACKAGE->copyto($src_filename_or_array, $dstdir, %opts, method=>\&cp_a, label=>'moveto()'). - moveto
 - 
$bool = PACKAGE->moveto($src_filename_or_array, $dstdir, %opts);Wrapper for
PACKAGE->copyto($src_filename_or_array, $dstdir, %opts, method=>\&File::Copy::move, label=>'moveto()'). - cp_a
 - 
$bool = PACKAGE->cp_a($src,$dst);Copies a single file
$srcto$dst, attempting to preserve ownership, permissions, and timestamps; used by copyto_a(). Uses File::Copy::syscopy() if available and distinct from File::Copy::copy(), otherwise first copies the file using File::Copy::copy() and subsequently propagates file attributes using the core perl functions chown(), chmod(), and utime(). - fh_flush
 - 
$fh_or_undef = PACKAGE->fh_flush($fh);flushes filehandle $fh using its flush() method if available
 - fh_reopen
 - 
$fh_or_undef = PACKAGE->fh_reopen($fh,$file);closes and re-opens filehandle $fh, should be an expensive flush even if system doesn't support the IO::Handle::flush method.
 
Utils: SI
- si_str
 - 
$str = si_str($float);returns an SI string for $float.
 
Functions: pdl
- _intersect_p
 - 
$pi = CLASS::_intersect_p($p1,$p2); $pi = CLASS->_intersect_p($p1,$p2);computes intersection of 2 piddles; undef is treated as the universal set; argument piddles MUST be sorted in ascending order.
 - _union_p
 - 
$pu = CLASS::_union_p($p1,$p2); $pu = CLASS-E<gt>_intersect_p($p1,$p2);computes union of 2 piddles; undef is treated as the universal set; argument piddles MUST be sorted in ascending order.
 - _complement_p
 - 
$pneg = CLASS::_complement_p($p,$N); $pneg = CLASS-E<gt>_complement_p($p,$N);computes complement of an index-piddle
$p; undef is treated as the universal set;$Nis the total number of elements in the index-universe. - _setdiff_p
 - 
$pdiff = CLASS::_setdiff_p($a,$b,$N); $pdiff = CLASS-E<gt>_setdiff_p($a,$b,$N);index-piddle difference; undef is treated as the universal set.
$Nis the total number of elements in the index-universe. - readPdlFile
 - 
$pdl_or_undef = CLASS->readPdlFile($basename, %opts);Load or mmap a PDL file from disk using PDL::IO::FastRaw; %opts:
class=>$class, # one of qw(PDL PDL::CCS::Nd) mmap =>$bool, # use mapfraw() (default=1) log =>$level, # log-level (default=undef: off) #... # other keys passed to CLASS->mapfraw() rsp. CLASS->readfraw() - writePdlFile
 - 
$bool = CLASS->writePdlFile($pdl_or_undef, $basename, %opts);Write a PDL file to disk using PDL::IO::FastRaw. Unlinks target file(s) if
$pdl_or_undefis not defined. %opts:log => $bool, # log-level (default=undef: off) #... # other keys passed to $pdl->writefraw() - writePdlHeader
 - 
$bool = CLASS->writePdlHeader($filename, $type, $ndims, @dims);writes a PDL::IO::FastRaw-style header
$filename(e.g."pdl.hdr"); adapted from PDL::IO::FastRaw::_writefrawhdr(). Arguments:$type ##-- PDL::Type or integer $ndims ##-- number of piddle dimensions @dims ##-- dimension size list, piddle, or ARRAY-ref - writeCcsHeader
 - 
$bool = CLASS->writeCcsHeader($filename, $itype, $vtype, $pdims, %opts);writes a PDL::CCS::IO::FastRaw-style header
$filename(e.g."pdl.hdr"). Arguments:$itype, ##-- PDL::Type for index (default: PDL::CCS::Utils::ccs_indx()) $vtype, ##-- PDL::Type for values (default: $PDL::IO::Misc::deftype) $pdims, ##-- dimension piddle or ARRAY-ref %opts ##-- passed to PDL::CCS::Nd-E<gt>newFromWich - mmzeroes
 - 
$pdl = mmzeroes ($file?, $type?, @dims, \%opts?); $pdl = $pdl->mmzeroes($file?, $type?, \%opts?);create a (temporary) mmap()ed pdl using DiaColloDB::PDL::MM; wraps DiaColloDB::PDL::MM->new(). %opts:
file => $template, ##-- file basename or File::Temp template; default='pdlXXXX' suffix => $suffix, ##-- File::Temp::tempfile() suffix (default='.pdl') log => $level, ##-- logging verbosity (default=undef: off) temp => $bool, ##-- delete on END (default: $file =~ /X{4}/) - mmtemp
 - 
$pdl = mmtemp ($file?, $type?, @dims, \%opts?); $pdl = $pdl->mmtemp($file?, $type?, \%opts?);Like mmzeroes(), but wraps DiaColloDB::PDL::MM->mmtemp(), implicitly setting
$opts->{temp}=1. - mmunlink
 - 
$bool = mmunlink(@mmfiles); $bool = mmunlink($mmpdl,@mmfiles);unlinks file(s) generated by mmzeroes($basename) or mmtemp($basename); wraps DiaColloDB::PDL::MM::unlink().
 - mintype
 - 
$type = CLASS->mintype ($pdl, @types); $type = CLASS->mintype($maxval, @types);returns minimum PDL::Types type from
@typesrequired for representing$maxval, which in turn defaults to$maxval->maxif$maxvalis passed as a PDL;@typesdefaults to all known PDL types. - maxval
 - 
$maxval = $type->maxval(); $maxval = CLASS::maxval($type_or_name)returns maximum value representable by PDL::Type
$type(first form) or$type_or_name(second form); really only meaningful for integer types. - valcounts
 - 
($vals,$counts) = $pdl->valcounts();wrapper for $pdl->flat->qsort->rle() with masking of zero-counts lifted from MUDL::PDL::Smooth.
 
Functions: temporaries
- Variable: $TMPDIR
 - 
Global temp directory to use. If undefined (the default), File::Spec::tmpdir() will be used.
 - Variable: @TMPFILES
 - 
list of temporary files created by this process to be unlinked in an END block.
 - tmpdir
 - 
$tmpdir = CLASS->tmpdir(); $tmpdir = CLASS-t>tmpdir($template, %opts);in first form, get name of global tempdir ($TMPDIR || File::Spec::tmpdir()). in second form, create and return a new temporary directory via File::Temp::tempdir().
 - tmpfh
 - 
$fh = CLASS->tmpfh(); $fh = CLASS->tmpfh($template_or_filename, %opts);get a new temporary filehandle or
undefon error; in list context, returns($fh,$filename)or empty list on error.$template_or_filenamedefaults to"tmpXXXXX". uses File::Temp::tempfile() if $template_or_filename contains at least 4"X"characters, otherwise uses literal $template_or_filename, honoring theDIR,TMPDIR,SUFFIX, andUNLINKoptions in%opts, which are interpreted as for File::Temp::tempfile(). - tmpfile
 - 
$filename = CLASS->tmpfile(); $filename = CLASS->tmpfile($template, %opts);Wrapper for tmpfh which returns only the filename.
 - tmparray
 - 
\@tmparray = CLASS->tmparray($template, %opts);ties a new temporary array via DiaColloDB::Temp::Array and returns a reference to the newly tied array. wraps
tmpfile($template,%opts)tie(my @tmparray, 'DiaColloDB::Temp::Array', $tmpfilename, %opts). - tmparrayp
 - 
\@tmparrayp = CLASS->tmparrayp($template, $packas, %opts);ties a new temporary integer-array via DiaColloDB::PackedFile and returns a reference to the newly tied array. wraps
tmpfile($template,%opts)andtie(my @tmparray, 'DiaColloDB::PackedFile', $tmpfilename, %opts). - tmphash
 - 
\%tmphash = CLASS->tmphash($template, %opts);ties a new temporary hash via DiaColloDB::Temp::Hash and returns a reference to the newly tied hash. wraps
tmpfile($template,%opts)andtie(my %tmphash, 'DiaColloDB::Temp::Hash', $tmpfilename, %opts). 
Functions: parallelization
- Variable: %NCORES
 - 
Cache for nCores() utility:
($cpuinfo_file=>$n, ...) - nCores
 - 
$ncores = PACKAGE::nCores(); $ncores = PACKAGE::nCores($proc_cpuinfo_filename);Returns the number of CPU cores on the system according to the file $proc_cpuinfo_filename (by default /proc/cpuinfo) if available, otherwise according to the external program
nprocif that is avaialble, and otherwise zero. Caches result as$NCORES{$proc_cpuinfo_filename}. - nJobs
 - 
$njobs = PACKAGE::nJobs(); $njobs = PACKAGE::nJobs($njobsRequest);Gets non-negative number of parallel jobs (threads) for user request
$njobsRequest, which defaults to the current value of the package variable$DiaColloDB::NJOBS, or -1 if it is undefined.If
$njobsRequestis negative, returns the number of CPU cores on the system via nCores(). Otherwise, if (0 < $njobsRequest < 1), $njobsRequest is interpreted as the desired fraction of the number of available CPU cores to be used, and returns($njobsRequest*nCores()). In all other cases,$njobsRequestis interpreted as an exact number of jobs to use, ans is returned asint($njobs+0). - sortJobs
 - 
$sort_parallel_option = sortJobs(); $sort_parallel_option = sortJobs($njobsRequest);Returns an appropriate
--paralleloption for externalsortsystem command calls to use <$njobsRequest> parallel jobs (assumingsortcalling conventions as for GNU coreutils). 
AUTHOR
Bryan Jurish <moocow@cpan.org>
COPYRIGHT AND LICENSE
Copyright (C) 2015-2020 by Bryan Jurish
This package is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.14.2 or, at your option, any later version of Perl 5 you may have available.
SEE ALSO
DiaColloDB(3pm), perl(1), ...