use
5.006;
our
$VERSION
=
'0.056'
;
use
open
':std'
,
':utf8'
;
our
@checksum_algos
=
qw(md5 sha1)
;
our
$DEBUG
=0;
sub
new {
my
(
$class
,
$bag_path
) =
@_
;
my
$self
= {};
bless
$self
,
$class
;
$bag_path
=~s!/$!!;
$self
->{
'bag_path'
} =
$bag_path
||
""
;
if
(
$bag_path
) {
$self
->_open();
}
return
$self
;
}
sub
_open {
my
(
$self
) =
@_
;
$self
->_load_manifests();
$self
->_load_tagmanifests();
return
$self
;
}
sub
_load_manifests {
my
(
$self
) =
@_
;
my
@manifests
=
$self
->manifest_files();
foreach
my
$manifest_file
(
@manifests
) {
die
(
"Cannot open $manifest_file: $!"
)
unless
(
open
(
my
$MANIFEST
,
"<:encoding(utf8)"
,
$manifest_file
));
while
(
my
$line
= <
$MANIFEST
>) {
chomp
(
$line
);
my
(
$digest
,
$file
);
(
$digest
,
$file
) =
$line
=~ /^([a-f0-9]+)\s+(.+)$/;
if
(!
$file
) {
die
(
"This is not a valid manifest file"
);
}
else
{
print
"file: $file \n"
if
$DEBUG
;
$self
->{entries}->{
$file
} =
$digest
;
}
}
close
(
$MANIFEST
);
}
return
$self
;
}
sub
_load_tagmanifests {
my
(
$self
) =
@_
;
my
@tagmanifests
=
$self
->tagmanifest_files();
foreach
my
$tagmanifest_file
(
@tagmanifests
) {
die
(
"Cannot open $tagmanifest_file: $!"
)
unless
(
open
(
my
$TAGMANIFEST
,
"<:encoding(utf8)"
,
$tagmanifest_file
));
while
(
my
$line
= <
$TAGMANIFEST
>) {
chomp
(
$line
);
my
(
$digest
,
$file
) =
split
(/\s+/,
$line
, 2);
$self
->{tagentries}->{
$file
} =
$digest
;
}
close
(
$TAGMANIFEST
);
}
return
$self
;
}
sub
make_bag {
my
(
$class
,
$bag_dir
) =
@_
;
unless
( -d
$bag_dir
) {
die
(
"source bag directory doesn't exist"
); }
unless
( -d
$bag_dir
.
"/data"
) {
rename
(
$bag_dir
,
$bag_dir
.
".tmp"
);
mkdir
(
$bag_dir
);
rename
(
$bag_dir
.
".tmp"
,
$bag_dir
.
"/data"
);
}
my
$self
=
$class
->new(
$bag_dir
);
$self
->_write_bagit(
$bag_dir
);
$self
->_write_baginfo(
$bag_dir
);
$self
->_manifest_md5(
$bag_dir
);
$self
->_tagmanifest_md5(
$bag_dir
);
$self
->_open();
return
$self
;
}
sub
_write_bagit {
my
(
$self
,
$bagit
) =
@_
;
open
(
my
$BAGIT
,
">"
,
$bagit
.
"/bagit.txt"
) or
die
(
"Can't open $bagit/bagit.txt for writing: $!"
);
print
(
$BAGIT
"BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8"
);
close
(
$BAGIT
);
return
1;
}
sub
_write_baginfo {
my
(
$self
,
$bagit
,
%param
) =
@_
;
open
(
my
$BAGINFO
,
">"
,
$bagit
.
"/bag-info.txt"
) or
die
(
"Can't open $bagit/bag-info.txt for writing: $!"
);
$param
{
'Bagging-Date'
} = POSIX::strftime(
"%F"
,
gmtime
(
time
));
while
(
my
(
$key
,
$value
) =
each
(
%param
)) {
print
(
$BAGINFO
"$key: $value\n"
);
}
close
(
$BAGINFO
);
return
1;
}
sub
_manifest_crc32 {
my
(
$self
,
$bagit
) =
@_
;
my
$manifest_file
=
"$bagit/manifest-crc32.txt"
;
my
$data_dir
=
"$bagit/data"
;
open
(
my
$fh
,
">:encoding(utf8)"
,
$manifest_file
) or
die
(
"Cannot create manifest-crc32.txt: $!\n"
);
find(
sub
{
$_
=decode(
'utf8'
,
$_
);
my
$file
= decode(
'utf8'
,
$File::Find::name
);
if
(-f
$_
) {
open
(
my
$DATA
,
"<:encoding(utf8)"
,
$_
) or
die
(
"Cannot read $_: $!"
);
my
$digest
=
sprintf
(
"%010d"
,crc32(
$DATA
));
close
(
$DATA
);
my
$filename
=
substr
(
$file
,
length
(
$bagit
) + 1);
print
(
$fh
"$digest $filename\n"
);
}
},
$data_dir
);
close
(
$fh
);
return
;
}
sub
_manifest_md5 {
my
(
$self
,
$bagit
) =
@_
;
my
$manifest_file
=
"$bagit/manifest-md5.txt"
;
my
$data_dir
=
"$bagit/data"
;
open
(
my
$md5_fh
,
">:encoding(utf8)"
,
$manifest_file
) or
die
(
"Cannot create manifest-md5.txt: $!\n"
);
find(
sub
{
my
$file
= decode(
'utf8'
,
$File::Find::name
);
if
(-f
$_
) {
open
(
my
$DATA
,
"<:raw"
,
"$_"
) or
die
(
"Cannot read $_: $!"
);
my
$digest
= Digest::MD5->new->addfile(
$DATA
)->hexdigest;
close
(
$DATA
);
my
$filename
=
substr
(
$file
,
length
(
$bagit
) + 1);
print
(
$md5_fh
"$digest $filename\n"
);
}
},
$data_dir
);
close
(
$md5_fh
);
return
;
}
sub
_tagmanifest_md5 {
my
(
$self
,
$bagit
) =
@_
;
my
$tagmanifest_file
=
"$bagit/tagmanifest-md5.txt"
;
open
(
my
$md5_fh
,
">:encoding(utf8)"
,
$tagmanifest_file
) or
die
(
"Cannot create tagmanifest-md5.txt: $! \n"
);
find (
sub
{
$_
= decode(
'utf8'
,
$_
);
my
$file
= decode(
'utf8'
,
$File::Find::name
);
if
(
$_
=~m/^data$/) {
$File::Find::prune
=1;
}
elsif
(
$_
=~m/^tagmanifest-.*\.txt/) {
}
elsif
( -f
$_
) {
open
(
my
$DATA
,
"<:raw"
,
"$_"
) or
die
(
"Cannot read $_: $!"
);
my
$digest
= Digest::MD5->new->addfile(
$DATA
)->hexdigest;
close
(
$DATA
);
my
$filename
=
substr
(
$file
,
length
(
$bagit
) + 1);
print
(
$md5_fh
"$digest $filename\n"
);
}
},
$bagit
);
close
(
$md5_fh
);
return
;
}
sub
verify_bag {
my
(
$self
,
$opts
) =
@_
;
my
$bagit
=
$self
->{
'bag_path'
};
my
$manifest_file
=
"$bagit/manifest-md5.txt"
;
my
$payload_dir
=
"$bagit/data"
;
my
%manifest
= ();
my
$return_all_errors
=
$opts
->{return_all_errors};
my
%invalids
;
my
@payload
= ();
die
(
"$manifest_file is not a regular file"
)
unless
-f (
$manifest_file
);
die
(
"$payload_dir is not a directory"
)
unless
-d (
$payload_dir
);
unless
(
$self
->version() > .95) {
die
(
"Bag Version is unsupported"
);
}
foreach
my
$entry
(
keys
(%{
$self
->{entries}})) {
$manifest
{
$entry
} =
$self
->{entries}->{
$entry
};
}
find(
sub
{
push
(
@payload
, decode(
'utf8'
,
$File::Find::name
)) },
$payload_dir
);
my
$digestobj
= Digest::MD5->new();
foreach
my
$file
(
@payload
) {
next
if
(-d (
$file
));
my
$local_name
=
substr
(
$file
,
length
(
$bagit
) + 1);
my
(
$digest
);
unless
(
$manifest
{
$local_name
}) {
die
(
"file found not in manifest: [$local_name]"
);
}
open
(
my
$fh
,
"<:raw"
,
"$bagit/$local_name"
) or
die
(
"Cannot open $local_name"
);
$digest
=
$digestobj
->addfile(
$fh
)->hexdigest;
close
(
$fh
);
unless
(
$digest
eq
$manifest
{
$local_name
}) {
if
(
$return_all_errors
) {
$invalids
{
$local_name
} =
$digest
;
}
else
{
die
(
"file: $local_name invalid"
);
}
}
delete
(
$manifest
{
$local_name
});
}
if
(
$return_all_errors
&&
keys
(
%invalids
) ) {
foreach
my
$invalid
(
keys
(
%invalids
)) {
print
"invalid: $invalid hash: "
.
$invalids
{
$invalid
}.
"\n"
;
}
die
(
"bag verify failed with invalid files"
);
}
if
(
keys
(
%manifest
)) {
die
(
"Missing files in bag"
); }
return
1;
}
sub
get_checksum {
my
(
$self
) =
@_
;
my
$bagit
=
$self
->{
'bag_path'
};
open
(
my
$SRCFILE
,
"<:raw"
,
$bagit
.
"/manifest-md5.txt"
);
my
$srchex
=Digest::MD5->new->addfile(
$SRCFILE
)->hexdigest;
close
(
$SRCFILE
);
return
$srchex
;
}
sub
version {
my
(
$self
) =
@_
;
my
$bagit
=
$self
->{
'bag_path'
};
my
$file
=
join
(
"/"
,
$bagit
,
"bagit.txt"
);
open
(
my
$BAGIT
,
"<"
,
$file
) or
die
(
"Cannot read $file: $!"
);
my
$version_string
= <
$BAGIT
>;
my
$encoding_string
= <
$BAGIT
>;
close
(
$BAGIT
);
$version_string
=~ /^BagIt-Version: ([0-9.]+)$/;
return
$1 || 0;
}
sub
payload_files {
my
(
$self
) =
@_
;
my
@payload
=
$self
->_payload_files();
return
@payload
;
}
sub
_payload_files{
my
(
$self
) =
@_
;
my
$payload_dir
=
join
(
"/"
,
$self
->{
"bag_path"
},
"data"
);
my
@payload
=();
File::Find::find(
sub
{
push
(
@payload
,decode(
'utf8'
,
$File::Find::name
));
},
$payload_dir
);
return
@payload
;
}
sub
non_payload_files{
my
(
$self
) =
@_
;
my
@non_payload
=
$self
->_non_payload_files();
return
@non_payload
;
}
sub
_non_payload_files {
my
(
$self
) =
@_
;
my
@payload
= ();
File::Find::find(
sub
{
$File::Find::name
= decode (
'utf8'
,
$File::Find::name
);
if
(-f
$File::Find::name
) {
my
(
$relpath
) = (
$File::Find::name
=~m!
$self
->{
"bag_path"
}/(.*$)!);
push
(
@payload
,
$relpath
);
}
elsif
(-d _ &&
$_
eq
"data"
) {
$File::Find::prune
=1;
}
else
{
}
},
$self
->{
"bag_path"
});
return
@payload
;
}
sub
manifest_files {
my
(
$self
) =
@_
;
my
@manifest_files
;
foreach
my
$algo
(
@checksum_algos
) {
my
$manifest_file
=
$self
->{
"bag_path"
}.
"/manifest-$algo.txt"
;
if
(-f
$manifest_file
) {
push
@manifest_files
,
$manifest_file
;
}
}
return
@manifest_files
;
}
sub
tagmanifest_files {
my
(
$self
) =
@_
;
my
@tagmanifest_files
;
foreach
my
$algo
(
@checksum_algos
) {
my
$tagmanifest_file
=
$self
->{
"bag_path"
}.
"/tagmanifest-$algo.txt"
;
if
(-f
$tagmanifest_file
) {
push
@tagmanifest_files
,
$tagmanifest_file
;
}
}
return
@tagmanifest_files
;
}
1;