package App::BackupPlan::Policy; use Archive::Tar; use File::Find; sub new { my $class = shift; my $self = { maxFiles => shift, prefix => shift, frequency => shift, targetDir => shift, sourceDir => shift}; bless $self,$class; return $self; } sub setMaxFiles { my ( $self, $maxFiles ) = @_; $self->{maxFiles} = $maxFiles if defined($maxFiles); return $self->{maxFiles}; } sub getMaxFiles { my( $self ) = @_; return $self->{maxFiles}; } sub setPrefix { my ( $self, $prefix ) = @_; $self->{prefix} = $prefix if defined($prefix); return $self->{prefix}; } sub getPrefix { my( $self ) = @_; return $self->{prefix}; } sub setFrequency { my ( $self, $frequency ) = @_; $self->{frequency} = $frequency if defined($frequency); return $self->{frequency}; } sub getFrequency { my( $self ) = @_; return $self->{frequency}; } sub setTargetDir { my ( $self, $targetDir ) = @_; $self->{targetDir} = $targetDir if defined($targetDir); return $self->{targetDir}; } sub getTargetDir { my( $self ) = @_; return $self->{targetDir}; } sub setSourceDir { my ( $self, $sourceDir ) = @_; $self->{sourceDir} = $sourceDir if defined($sourceDir); return $self->{sourceDir}; } sub getSourceDir { my( $self ) = @_; return $self->{sourceDir}; } sub set { my ($self, $name, $value) = @_; $self->{$name} = $value if defined($value) && defined($name); } sub print { my( $self ) = @_; $self->{maxFiles} = "n/a" unless defined($self->{maxFiles}); $self->{prefix} = "n/a" unless defined($self->{prefix}); $self->{frequency} = "n/a" unless defined($self->{frequency}); $self->{targetDir} = "n/a" unless defined($self->{targetDir}); $self->{sourceDir} = "n/a" unless defined($self->{sourceDir}); print "Policy: maxFiles=$self->{maxFiles}, prefix=$self->{prefix}, frequency=$self->{frequency}, targetDir=$self->{targetDir}, sourceDir=$self->{sourceDir}\n"; } sub info { my( $self ) = @_; $self->{maxFiles} = "n/a" unless defined($self->{maxFiles}); $self->{prefix} = "n/a" unless defined($self->{prefix}); $self->{frequency} = "n/a" unless defined($self->{frequency}); $self->{targetDir} = "n/a" unless defined($self->{targetDir}); $self->{sourceDir} = "n/a" unless defined($self->{sourceDir}); return "Policy: maxFiles=$self->{maxFiles}, prefix=$self->{prefix}, frequency=$self->{frequency}, targetDir=$self->{targetDir}, sourceDir=$self->{sourceDir}"; } sub tar { my( $self, $ts ) = @_; my $filename = sprintf("%s/%s_%s.tar.gz",$self->{targetDir},$self->{prefix},$ts); my $output = `tar cvzf $filename --exclude-tag-all=NOTAR $self->{sourceDir} 2>&1 1>/dev/null`; if (-e $filename) { my $stat = `ls -lh $filename`; return "system tar: $stat"; } return "Error: tar failed to produce $filename\n$output\n"; } sub perlTar { my( $self, $ts ) = @_; my $filename = sprintf("%s/%s_%s.tar.gz",$self->{targetDir},$self->{prefix},$ts); my $tar = new Archive::Tar; our @files=(); find(sub {push(@files,$File::Find::name);},$self->{sourceDir}); $tar->add_files(@files); $tar->write($filename,COMPRESS_GZIP); if (-e $filename) { my $stat = `ls -lh $filename`; return "perl tar: $stat"; } my $err = $tar->error(); return "Error: tar failed to produce $filename\n$err\n"; } package App::BackupPlan; use 5.012003; use strict; use warnings; use XML::DOM; use Log::Log4perl qw(:easy); require XML::DOM; require Log::Log4perl; require Exporter; use AutoLoader qw(AUTOLOAD); our @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use App::BackupPlan ':all'; # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK # will save memory. our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw(); BEGIN { our $VERSION = '0.0.1'; print "App::BackupPlan by Gualtiero Chiaia, version $VERSION\n"; } # Preloaded methods go here. #todo allow default behaviour for logging our $TAR = 'system'; #use system tar sub new { my $class = shift; my $self = { config => shift, log => shift }; bless $self,$class; return $self; } sub run { my ($self) = @_; #validate the config file die "App::BackupPlan configuration file is required, but was not given!" unless defined $self->{config}; #logging config if (defined $self->{log}) { Log::Log4perl::init($self->{log}); } else { Log::Log4perl->easy_init( { level => $INFO, file => ">>easy.log" } ); } my $logger = Log::Log4perl::get_logger(); #--now read config file my $parser = new XML::DOM::Parser; my $doc = $parser->parsefile ($self->{config}) or die "Could not parse $self->{config}"; #get policies my ($obj,%policies) = &getPolicies($doc); foreach my $k (keys %policies) { #policy info print "**$k policy**\n"; $logger->info("**$k policy**"); my $policy = $policies{$k}; $policy->print; $logger->debug($policy->info); my $now = time; my $ts = &formatTimeSpan(time); my %files = &getFiles($policy->getTargetDir,$policy->getPrefix); #get last my $lastts = &getLastTs(keys %files); my $threshold = &subTimeSpan($now,$policy->getFrequency); if (!defined($lastts) || $lastts < $threshold ) { #needs a new tar file my $lastTS = '<missing>'; $lastTS = &formatTimeSpan($lastts) if defined $lastts; $logger->info("Need a new tar file, last tar was on $lastTS"); my $tarout; if (lc $TAR eq 'perl') {$tarout= $policy->perlTar($ts);} else {$tarout = $policy->tar($ts);} if ($tarout =~ /Error/i) { $logger->error($tarout); } else { $logger->debug($tarout); } #now delete old %files = &getFiles($policy->getTargetDir,$policy->getPrefix); my $maxFiles = $policy->getMaxFiles; my $cnt = scalar(keys %files); while ($cnt > $maxFiles && $cnt >0) { my $oldts = &getFirstTs(keys %files); my $oldTS = '<missing>'; $oldTS = &formatTimeSpan($oldts) if defined $oldts; unlink $files{$oldts}; $logger->info("Deleted old tar file, with time stamp $oldTS"); %files = &getFiles($policy->getTargetDir,$policy->getPrefix); $cnt--; } #end while } #end if } #end foreach } #end sub sub getPolicies { my $xml = $_[0]; my $defaultPolicy = new App::BackupPlan::Policy; #get default policy first #first default policy my $nodes = $xml->getElementsByTagName("default"); if ($nodes->getLength > 0) { my $node = $nodes->item(0); foreach my $child ($node->getChildNodes) { if ($child->getNodeType == ELEMENT_NODE){ my $name = $child->getNodeName; my $value = $child->getFirstChild->getNodeValue; $defaultPolicy->set($name,$value); } } } #then all policies my %raw_policies; $nodes = $xml->getElementsByTagName("task"); for (my $i=0;$i<$nodes->getLength; $i++) { my $task = $nodes->item($i); my $taskName = $task->getAttributes->getNamedItem('name')->getNodeValue; my $p = new App::BackupPlan::Policy; foreach my $child ($task->getChildNodes) { if ($child->getNodeType == ELEMENT_NODE){ my $name = $child->getNodeName; my $value = $child->getFirstChild->getNodeValue; $p->set($name,$value); } } $raw_policies{$taskName} = $p; } %raw_policies = injectDefaultPolicy($defaultPolicy,%raw_policies); return ($defaultPolicy,%raw_policies); } sub injectDefaultPolicy { my ($defPolicy,%raw_pcs) = @_; foreach my $k (keys %raw_pcs) { $raw_pcs{$k}->setMaxFiles($defPolicy->getMaxFiles) unless defined($raw_pcs{$k}->getMaxFiles); $raw_pcs{$k}->setPrefix($defPolicy->getPrefix) unless defined($raw_pcs{$k}->getPrefix); $raw_pcs{$k}->setFrequency($defPolicy->getFrequency) unless defined($raw_pcs{$k}->getFrequency); $raw_pcs{$k}->setSourceDir($defPolicy->getSourceDir) unless defined($raw_pcs{$k}->getSourceDir); $raw_pcs{$k}->setTargetDir($defPolicy->getTargetDir) unless defined($raw_pcs{$k}->getTargetDir); } return %raw_pcs; } sub addTimeSpan { my ($timestamp,$span) = @_; if ($span=~/(\d+)d/) { return $timestamp+86400*$1; } if ($span=~/(\d+)m/) { return $timestamp+86400*30*$1; } if ($span=~/(\d+)y/) { return $timestamp+86400*30*12*$1; } return $timestamp; } sub subTimeSpan { my ($timestamp,$span) = @_; if ($span=~/(\d+)d/) { return $timestamp-86400*$1; } if ($span=~/(\d+)m/) { return $timestamp-86400*30*$1; } if ($span=~/(\d+)y/) { return $timestamp-86400*30*12*$1; } return $timestamp; } sub formatTimeSpan { my $ts = $_[0]; my @ts = localtime $ts; my $year = $ts[5]+1900; my $month = $ts[4]+1; my $day = $ts[3]; return sprintf("%4d%02d%02d",$year,$month,$day); } sub getFiles { my %fileMap; my ($sourceDir, $pattern) = @_; opendir DH, $sourceDir or die "Cannot open directory $sourceDir: $!\n"; foreach my $f (readdir DH) { if ($f=~/$pattern.*/) { my $fname = $sourceDir."/".$f; #print "$fname\n"; my @s = stat $fname; my $mtime = $s[9]; $fileMap{$mtime}= $fname; } } closedir DH; return %fileMap; } sub getLastTs { my (@ts) = sort @_; my $nts = scalar @ts; return $ts[$nts-1]; } sub getFirstTs { my (@ts) = sort @_; return $ts[0]; } # Autoload methods go after =cut, and are processed by the autosplit program. 1; __END__ # Below is stub documentation for your module. You'd better edit it! =head1 NAME App::BackupPlan - Perl extension for automated, regular backups =head1 SYNOPSIS #when using system tar use App::BackupPlan; my $plan = new App::BackupPlan($configFile, $logFile); $plan->run; #when using perl tar use App::BackupPlan; $App::BackupPlan::TAR='perl'; my $plan = new App::BackupPlan($configFile, $logFile); $plan->run; =head1 DESCRIPTION App::BackupPlan is a simple module to perform regular, selective and automated backups of your system. It requires an xml file with the specification of your backup plan, logically divided into independent tasks. The constructor also takes a log4perl configuration file, to customise the logging produced. This can be omitted if the default logging behaviour is desired. By setting up a regular back-up task using cron or similar, and by configuring a backup plan with different tasks, backup frequencies and max number of files retained, it is possible to achieve a regular and incremental backup of different part of your system, without too much trouble. =head2 CONFIGURATION Here is a made-up sample configuration file for a backup plan that backups two directories with different frequencies: a B<pictures> and a B<videos> directories. <backup> <plan name="one"> <default> <maxFiles>3</maxFiles> <frequency>1m</frequency> <targetDir><![CDATA[/backup]]></targetDir> </default> <task name="pics"> <prefix>pics</prefix> <sourceDir><![CDATA[/data/pictures]]></sourceDir> <frequency>20d</frequency> </task> <task name="video"> <prefix>vid</prefix> <maxFiles>2</maxFiles> <sourceDir><![CDATA[/data/Videos]]></sourceDir> <frequency>40d</frequency> </task> </plan> </backup> =over =item * The tag B<E<lt>backupE<gt>> is the container tag for the backup plan. =item * The tag B<E<lt>planE<gt>> contains the actual plan, as a collection of B<tasks>, with an identifying name that is not currently used. A B<plan> is made of a E<lt>defaultE<gt> B<task> and many separate B<tasks>. The E<lt>defaultE<gt> B<task> contains the definition of the properties of a general B<task>, when an override is not given. Strictly speaking the current version of B<App::BackupPlan> requires only a default task and some distinct task elements inside a well formed XML document. The structure of this sample configuration is mostly given for clarity. =item * The tag B<E<lt>defaultE<gt>> contains the specification of the common properties for all other tasks. This element is used to specify the default behaviour and its properies are inherited by all other B<tasks>. It allows the same XML sub-elements as E<lt>taskE<gt> does, so for its specification please see below. =item * The tag B<E<lt>taskE<gt>> defines a backup policy for a given directory structure. It has an attribute I<name> mostly for debugging purpouse. Its properties, partially inherited from the E<lt>defaultE<gt> B<task> and partially overridden, are: =over =item * B<E<lt>prefixE<gt>> The prefix used to identify the beginning of the compressed backup file. =item * B<E<lt>maxFilesE<gt>> The maximum number of backup files preserved in the E<lt>targetDirE<gt> directory. As soon as this number is breached, the oldest backup file is removed (rolling behaviour). =item * B<E<lt>frequencyE<gt>> The period of time between two consecutive backups of the current E<lt>sourceDirE<gt>. This is specified by a string of type C<n[dmy]>, where n is a number and the second letter is either C<d> for days, C<m> for months or C<y> for years. Internally, C<1m = 30d> and C<1y = 360d>, wihtout considering months of 28 or 31 days. =item * B<E<lt>sourceDirE<gt>> The path for the directory structure to be backed up. It requires a B<CDATA> xml tag to escape the slashes in the full path. =item * B<E<lt>targetDirE<gt>> The path for the destination directory where backup files are stored. It requires a B<CDATA> xml tag to escape the slashes in the full path. Typically this will be a single location on the disk, and hence the same for all tasks and specified in the E<lt>defaultE<gt> section. =back =back =head2 USAGE This perl module was written with an automated backup functionality in mind. So, even if it can be run manually and on demand, it is best suited to be integrated in a regular batch (overnight maybe) or even better as a B<cron> task. To facilitate this task there is a script client in the bin directory of this distribution, B<backup.pl>, which can be easily scheduled as cron task and, that can be run as follow: C<backup.pl -c /pathto/plan.xml -l /pathto/log4perl.conf> when using I<system> B<tar>, or as C<backup.pl -c /pathto/plan.xml -l /pathto/log4perl.conf -t perl> for I<perl> B<tar>. =head2 DEPENDENCIES The list of module dependencies is as follows: =over =item * B<XML::DOM> for parsing the configuration file, =item * B<Log::Log4perl> for logging, =item * B<File::Find> to collect the entire content of a directory substructure when using Archive::Tar =item * B<Archive::Tar> to perform perl based tar, instead of using system tar =item * B<tar> executable used in Linux environment for storage and compression =back On a B<Linux> system it is recommended to use the I<system> B<tar> executable, which is the default behaviour for this module. There is also the option of using L<Archive::Tar> perl module isntead of the I<system> B<tar>. This is recommended for Windows based systems, or if the B<tar> executable is not available. This behaviour is designated as I<perl> B<tar> and is selected by setting C<$App::BackupPlan::TAR='perl'>. =head2 EXPORT None by default. =head1 SEE ALSO L<XML::DOM>, L<Log::Log4perl>, L<File::Find>, L<Archive::Tar> =head1 AUTHOR Gualtiero Chiaia, E<lt>gchiaia@prismaqf.comE<gt> =head1 COPYRIGHT AND LICENSE Copyright (C) 2012 by Gualtiero Chiaia This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.12.3 or, at your option, any later version of Perl 5 you may have available. =cut