NAME

File::Pairtree - routines to manage pairtrees

SYNOPSIS

use File::Pairtree;           # imports routines into a Perl script

id2ppath($id);                # returns pairpath corresponding to $id
id2ppath($id, $separator);    # if you want an alternate separator char

ppath2id($path);              # returns id corresponding to $path
ppath2id($path, $separator);  # if you want an alternate separator char

pt_budstr();
pt_mkid();
pt_mktree();
pt_rmid();
pt_lsid();

DESCRIPTION

This is very brief documentation for the Pairtree Perl module.

COPYRIGHT AND LICENSE

Copyright 2008-2011 UC Regents. Open source BSD license.

#use File::Find; # $File::Find::prune = 1

# XXX add to spec: two ways that a pairpath ends: 1) the form of the # ppath (ie, ends in a morty) and 2) you run "aground" smack into # a "longy" ("thingy") or a file

# xxx other stats to gather: total dir count, total count of all things # that aren't either reg files or dirs; plus max and averages for all # things like depth of ppaths (ids), depth of objects, sizes of objects, # fanout; same numbers for "pairtree.*" branches

my ($pdname, $tpname, $wpname); my $symlinks_followed = 1; my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $sze); my %curobj = ( 'ppath' => '', 'encaperr' => 0, 'octets' => 0, 'streams' => 0, );

sub pt_newobj { my( $ppath, $encaperr, $octets, $streams )=@_;

# warning: ugly code ahead
if ($curobj{'ppath'}) {		# print record of previous obj
	$_ = ppath2id($curobj{'ppath'});
	s/^/$$gr_opt{prefix}/;		# uses global set in lstree()
	$$gr_opt{long} and
		$gr_opt->{om}->elem('node',
			join("  ", $_, $curobj{'ppath'},
			"$curobj{'octets'}.$curobj{'streams'}")), 1
	or
		$gr_opt->{om}->elem('node', $_), 1
	;
	$curobj{'ppath'} eq $ppath and
		print "error: corrupted pairtree at pairpath ",
			"$ppath/: split end $homily\n";
	# xxx use om?
}
# xxx strange
die "pt_newobj: all args must be defined"
	unless (defined($ppath) && defined($encaperr)
		&& defined($octets) && defined($streams));
$curobj{'ppath'} = $ppath;
$curobj{'encaperr'} = $encaperr;
$curobj{'octets'} = $octets;
$curobj{'streams'} = $streams;
}

sub pt_visit_node { # receives no args

$pdname = $File::Find::dir;		# current parent directory name
$tpname = $_;				# current filename in that dir
$_ = $wpname = $File::Find::name;	# whole pathname to file

# We always need lstat() info on the current node XXX why?
# xxx tells us all, but if following symlinks the lstat is done
# ... by find:  use (-X _), but of the nifty facts below we
# still need to harvest the size ($sze) by hand.
#
($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $sze) = lstat($tpname)
	unless ($symlinks_followed and ($sze = -s _));

#print "NEXT: $pdname $_ $wpname\n";

# If we follow symlinks (usual), we have to expect the -l type,
# which hides the type of the link target (what we really want).
#
if (! $Win and -l _) {
	$symlinkcount++;
	print "XXXX SYMLINK $_\n";
	# yyy presumably this branch never happens when
	#     _not_ following links?
	($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $sze)
		= stat($tpname);	# get the real thing
}
# After this, tests of the form (-X _) give almost everything.

if (-f $tpname) {
	$filecount++;
	if (m@^.*$R/(.*/)?pairtree.*$@o) {
		### print "$pdname PTREEFILE $tpname\n";
		# xxx    if $verbose;
		# else -prune ??
	}
	elsif (m@^.*$R/$P/[^/]+$@o) {
		#print "m@.*$R/$P/[^/]+@: $_\n";
	 	#print "$pdname UF $tpname\n";
		print "error: corrupted pairtree at pairpath ",
			"$pdname/: found unencapsulated file ",
			"'$tpname' $homily\n";
	}
	else {
		# xxx sanity check that $curobj is defined
		$curobj{'octets'} += $sze;
		### print "sssss $curobj{'octets'}\n";
		$curobj{'streams'}++;
	#	-fprintf $altout 'IN %p %s\n'
	#	$noprune
	}
}
elsif (-d $tpname) {
	$dircount++;
	if (m@^.*$R/(.*/)?pairtree.*$@o) {
		#print "$pdname PTREEDIR $tpname\n";
		# xxx if $verbose;
	#	-prune
	}
	# At last, we're entering a "regular" object.
	# XXXXXXX add re qualifier so Perl knows re's not changing
	elsif (m@^.*$R/($P/)?[^/]{$pairp1,}$@o) {
		# start new object; but end previous object first
		# form: ppath, EncapErr, octets, streams
		$objectcount++;
		pt_newobj($pdname, 0, 0, 0);
		# print "$pdname NS $tpname\n";
		#	-fprintf $altout 'START %h 0\n'
		#	$noprune
	}
	elsif (m@^.*$R/$P$@o) {
		#	-empty
		# xxx if $verbose...	-printf '%p EP -\n'
	}
	# $pair, $pairm1, $pairp1
	# We have a post-morty encapsulation error
	elsif (m@^.*$R/([^/]{$pair}/)*[^/]{1,$pairm1}/[^/]{1,$pair}$@o) {
		#print "$pdname PM $tpname\n";
		print "error: corrupted pairtree at pairpath ",
			"$pdname/: found '$tpname' after forced ",
			"path ending $homily\n";
			
		#	-fprintf $altout 'START %h 0\n'
		#	$noprune
	}
}
else {
	$irregularcount++;
}
}