NAME
File::Pairtree - routines to manage pairtrees
SYNOPSIS
use File::Pairtree; # imports routines into a Perl script
id2ppath($id); # returns pairpath corresponding to $id
id2ppath($id, $separator); # if you want an alternate separator char
ppath2id($path); # returns id corresponding to $path
ppath2id($path, $separator); # if you want an alternate separator char
pt_budstr();
pt_mkid();
pt_mktree();
pt_rmid();
pt_lsid();
DESCRIPTION
This is very brief documentation for the Pairtree Perl module.
COPYRIGHT AND LICENSE
Copyright 2008-2011 UC Regents. Open source BSD license.
#use File::Find; # $File::Find::prune = 1
# XXX add to spec: two ways that a pairpath ends: 1) the form of the # ppath (ie, ends in a morty) and 2) you run "aground" smack into # a "longy" ("thingy") or a file
# xxx other stats to gather: total dir count, total count of all things # that aren't either reg files or dirs; plus max and averages for all # things like depth of ppaths (ids), depth of objects, sizes of objects, # fanout; same numbers for "pairtree.*" branches
my ($pdname, $tpname, $wpname); my $symlinks_followed = 1; my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $sze); my %curobj = ( 'ppath' => '', 'encaperr' => 0, 'octets' => 0, 'streams' => 0, );
sub pt_newobj { my( $ppath, $encaperr, $octets, $streams )=@_;
# warning: ugly code ahead
if ($curobj{'ppath'}) { # print record of previous obj
$_ = ppath2id($curobj{'ppath'});
s/^/$$gr_opt{prefix}/; # uses global set in lstree()
$$gr_opt{long} and
$gr_opt->{om}->elem('node',
join(" ", $_, $curobj{'ppath'},
"$curobj{'octets'}.$curobj{'streams'}")), 1
or
$gr_opt->{om}->elem('node', $_), 1
;
$curobj{'ppath'} eq $ppath and
print "error: corrupted pairtree at pairpath ",
"$ppath/: split end $homily\n";
# xxx use om?
}
# xxx strange
die "pt_newobj: all args must be defined"
unless (defined($ppath) && defined($encaperr)
&& defined($octets) && defined($streams));
$curobj{'ppath'} = $ppath;
$curobj{'encaperr'} = $encaperr;
$curobj{'octets'} = $octets;
$curobj{'streams'} = $streams;
}
sub pt_visit_node { # receives no args
$pdname = $File::Find::dir; # current parent directory name
$tpname = $_; # current filename in that dir
$_ = $wpname = $File::Find::name; # whole pathname to file
# We always need lstat() info on the current node XXX why?
# xxx tells us all, but if following symlinks the lstat is done
# ... by find: use (-X _), but of the nifty facts below we
# still need to harvest the size ($sze) by hand.
#
($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $sze) = lstat($tpname)
unless ($symlinks_followed and ($sze = -s _));
#print "NEXT: $pdname $_ $wpname\n";
# If we follow symlinks (usual), we have to expect the -l type,
# which hides the type of the link target (what we really want).
#
if (! $Win and -l _) {
$symlinkcount++;
print "XXXX SYMLINK $_\n";
# yyy presumably this branch never happens when
# _not_ following links?
($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $sze)
= stat($tpname); # get the real thing
}
# After this, tests of the form (-X _) give almost everything.
if (-f $tpname) {
$filecount++;
if (m@^.*$R/(.*/)?pairtree.*$@o) {
### print "$pdname PTREEFILE $tpname\n";
# xxx if $verbose;
# else -prune ??
}
elsif (m@^.*$R/$P/[^/]+$@o) {
#print "m@.*$R/$P/[^/]+@: $_\n";
#print "$pdname UF $tpname\n";
print "error: corrupted pairtree at pairpath ",
"$pdname/: found unencapsulated file ",
"'$tpname' $homily\n";
}
else {
# xxx sanity check that $curobj is defined
$curobj{'octets'} += $sze;
### print "sssss $curobj{'octets'}\n";
$curobj{'streams'}++;
# -fprintf $altout 'IN %p %s\n'
# $noprune
}
}
elsif (-d $tpname) {
$dircount++;
if (m@^.*$R/(.*/)?pairtree.*$@o) {
#print "$pdname PTREEDIR $tpname\n";
# xxx if $verbose;
# -prune
}
# At last, we're entering a "regular" object.
# XXXXXXX add re qualifier so Perl knows re's not changing
elsif (m@^.*$R/($P/)?[^/]{$pairp1,}$@o) {
# start new object; but end previous object first
# form: ppath, EncapErr, octets, streams
$objectcount++;
pt_newobj($pdname, 0, 0, 0);
# print "$pdname NS $tpname\n";
# -fprintf $altout 'START %h 0\n'
# $noprune
}
elsif (m@^.*$R/$P$@o) {
# -empty
# xxx if $verbose... -printf '%p EP -\n'
}
# $pair, $pairm1, $pairp1
# We have a post-morty encapsulation error
elsif (m@^.*$R/([^/]{$pair}/)*[^/]{1,$pairm1}/[^/]{1,$pair}$@o) {
#print "$pdname PM $tpname\n";
print "error: corrupted pairtree at pairpath ",
"$pdname/: found '$tpname' after forced ",
"path ending $homily\n";
# -fprintf $altout 'START %h 0\n'
# $noprune
}
}
else {
$irregularcount++;
}
}