package Archive::BagIt::Role::Manifest;
use strict;
use warnings;
use namespace::autoclean;
use Carp qw( croak carp );
use File::Spec ();
use Moo::Role;
with 'Archive::BagIt::Role::Plugin';
with 'Archive::BagIt::Role::Portability';
# ABSTRACT: A role that handles all manifest files for a specific Algorithm
our $VERSION = '0.091'; # VERSION

has 'algorithm' => (
    is => 'rw',
    #isa=>'HashRef',
);

has 'manifest_file' => (
    is      => 'rw',
    lazy    => 1,
    builder => '_build_manifest_file',
);

sub _build_manifest_file {
    my $self = shift;
    my $algorithm = $self->algorithm()->name;
    my $file = File::Spec->catfile($self->bagit->metadata_path, "manifest-$algorithm.txt");
    if (-f $file) {
        return $file;
    }
    return;
}


has 'tagmanifest_file' => (
    is      => 'rw',
    lazy    => 1,
    builder => '_build_tagmanifest_file'
);

sub _build_tagmanifest_file {
    my $self = shift;
    my $algorithm = $self->algorithm()->name;
    my $file = File::Spec->catfile( $self->bagit->metadata_path, "tagmanifest-$algorithm.txt");
    if (-f $file) {
        return $file;
    }
    return;
}

sub BUILD {}

after BUILD => sub {
    my $self = shift;
    my $algorithm = $self->algorithm->name;
    $self->{bagit}->{manifests}->{$algorithm} = $self;
};

has 'parallel_support' => (
    is        => 'ro',
    builder   => '_check_parallel_support',
    predicate => 1,
    lazy      => 1,
);

sub _check_parallel_support {
    my $self = shift;
    my $class = 'Parallel::parallel_map';
    if (!exists $INC{'Parallel/parallel_map.pm'}) {
        carp "Module '$class' not available, disable parallel support";
        $self->bagit->use_parallel( 0 );
        return 0;
    }
    load_class($class);
    $class->import( 'parallel_map' );
    return 1;
}

sub check_pluggable_modules() {
    my $self = shift;
    return ($self->has_parallel_support() && $self->has_async_support());
}



has 'manifest_entries' => (
    is => 'ro',
    lazy => 1,
    builder => '_build_manifest_entries',
);


has 'tagmanifest_entries' => (
    is => 'ro',
    lazy => 1,
    builder => '_build_tagmanifest_entries',
);

sub __build_xxxmanifest_entries {
    my ($self, $xxmanifest_file) = @_;
    my $xxmanifest_entries = {};
    my $algorithm = $self->algorithm()->name;
    open(my $XXMANIFEST, "<:encoding(UTF-8)", $xxmanifest_file) or croak("Cannot open $xxmanifest_file: $!");
    while (my $line = <$XXMANIFEST>) {
        $line = chomp_portable($line);
        my ($digest, $file) = split(/\s+/, $line, 2);
        next unless ((defined $digest) && (defined $file)); # empty lines!
        $xxmanifest_entries->{$file} = $digest;
    }
    close($XXMANIFEST);
    return $xxmanifest_entries;
}

sub _build_tagmanifest_entries {
    my ($self) = @_;
    my $tm_file = $self->tagmanifest_file();
    if (defined $tm_file) {
        return $self->__build_xxxmanifest_entries($tm_file);
    }
    return;
}

sub _build_manifest_entries {
    my ($self) = @_;
    my $m_file = $self->manifest_file();
    if (defined $m_file) {
        return $self->__build_xxxmanifest_entries($m_file);
    }
    return;
}

sub _fill_digest_hashref { # should be handle if empty values and ignore it (because parallel map)
    my ($self, $bagit, $localname) = @_;
    if ((!defined $localname) or (0 == length($localname)) ) {
        # croak "empty localname used!";
        return;
    }
    my $digest_hashref;
    my $fullname = File::Spec->catfile($bagit, $localname);
    my $calc_digest = $self->bagit->digest_callback();
    my $eval = &$calc_digest($self->algorithm(), $fullname);
    $digest_hashref->{calculated_digest} = $eval // '';
    $digest_hashref->{local_name} = $localname;
    $digest_hashref->{full_name} = $fullname;
    return $digest_hashref;
}




# calc digest
# expects expected_ref, array_ref of filenames
# returns arrayref of hashes where each entry has
# $tmp->{calculated_digest} = $digest;
# $tmp->{expected_digest} = $expected_digest;
# $tmp->{filename} = $filename;
sub calc_digests {
    my ($self, $bagit, $filenames_ref) = @_;
    $self->check_pluggable_modules(); # handles Modules
    my @digest_hashes;
    my %digest_results;
    if ($self->bagit->use_parallel()) {
        # Parallel::Map does not work at the moment, potential bug in Parallel::Map or IO::Async
        # @digest_hashes = pmap_scalar {
        #     $self->_fill_digest_hashref($bagit, $_);
        # } foreach => $filenames_ref;
        # works as expected:

        my $anon_sub = sub {
            my $filename = shift;
            return $self->_fill_digest_hashref($bagit, $filename);
        };
        ## no critic (ProhibitStringyEval);
        @digest_hashes = eval 'Parallel::parallel_map::parallel_map (
            sub { my $filename = shift; &$anon_sub($filename);}  , @{ $filenames_ref}
        )';
    } else {
        # serial variant
        @digest_hashes = map {$self->_fill_digest_hashref($bagit, $_)} @{$filenames_ref}
    }
    return \@digest_hashes;
}

sub _verify_XXX_manifests {
    my ($self, $xxprefix, $xxmanifest_entries, $files_ref, $return_all_errors) = @_;
    # Read the manifest file
    my @files = @{ $files_ref };
    my @invalid_messages;
    my $bagit = $self->bagit->bag_path;
    my $algorithm = $self->algorithm()->name;
    my $subref_invalid_report_or_die = sub {
        my $message = shift;
        if (defined $return_all_errors) {
            push @invalid_messages, $message;
        } else {
            croak($message);
        }
        return;
    };
    # Test readability
    foreach my $local_name (@files) {
        # local_name is relative to bagit base
        my $filepath = File::Spec->catfile($bagit, $local_name);
        unless (-r $filepath) {
            &$subref_invalid_report_or_die(
                "cannot read $local_name (bag-path:$bagit)",
            );
        }
    }
    # Evaluate each file against the manifest

    my $local_xxfilename = "${xxprefix}-${algorithm}.txt";

    # first check if each file from payload exists in manifest_entries for given alg
    foreach my $local_name (@files) {
        my $normalized_local_name = normalize_payload_filepath($local_name);
        # local_name is relative to bagit base
        unless (exists $xxmanifest_entries->{$normalized_local_name}) { # localname as value should exist!
            &$subref_invalid_report_or_die(
                "file '$local_name' (normalized='$normalized_local_name') found, which is not in '$local_xxfilename' (bag-path:'$bagit')!"
                    #."DEBUG: \n".join("\n", keys %{$xxmanifest_entries->{$algorithm}})
            );
        }
    }
    # second check if each file from manifest_entries for given alg exists in payload
    my %normalised_files;
    foreach my $file (@files) {
        $normalised_files{ normalize_payload_filepath( $file )} = 1;
    }
    foreach my $local_mf_entry_path (keys %{$xxmanifest_entries}) {
        if ( # to avoid escapes via manifest-files
            check_if_payload_filepath_violates($local_mf_entry_path)
        ) {
            &$subref_invalid_report_or_die("file '$local_mf_entry_path' not allowed in '$local_xxfilename' (bag-path:'$bagit'")
        }
        else {
            unless (exists $normalised_files{$local_mf_entry_path}) {
                &$subref_invalid_report_or_die(
                    "file '$local_mf_entry_path' NOT found, but expected via '$local_xxfilename' (bag-path:'$bagit')!"
                );
            }
        }
    }
    # all preconditions full filled, now calc all digests
    my $digest_hashes_ref = $self->calc_digests($bagit, \@files);
    # compare digests
    if (defined $digest_hashes_ref && (ref $digest_hashes_ref eq 'ARRAY')) {
        foreach my $digest_entry (@{$digest_hashes_ref}) {
            my $normalized = normalize_payload_filepath($digest_entry->{local_name});
            $digest_entry->{expected_digest} = $xxmanifest_entries->{$normalized};
            if (! defined $digest_entry->{expected_digest} ) { next; } # undef expected digests only occur if all preconditions fullfilled but return_all_errors was set, we should ignore it!
            if ($digest_entry->{calculated_digest} ne $digest_entry->{expected_digest}) {
                my $xxfilename = File::Spec->catfile($bagit, $local_xxfilename);
                &$subref_invalid_report_or_die(
                    sprintf("file '%s' (normalized='%s') invalid, digest (%s) calculated=%s, but expected=%s in file '%s'",
                        $digest_entry->{local_name},
                        $normalized,
                        $algorithm,
                        $digest_entry->{calculated_digest},
                        $digest_entry->{expected_digest},
                        $xxfilename
                    )
                );
            }
        }
    }

    if ($return_all_errors && (scalar @invalid_messages > 0)) {
        push @{$self->bagit->{errors}},
            join("\n\t",
                sort @invalid_messages
            );
        return;
    }
    return 1;
}


sub verify_manifest {
    my ($self, $payload_files_ref, $return_all_errors) = @_;
    if ($self->manifest_file()) {
        return $self->_verify_XXX_manifests(
            "manifest",
            $self->manifest_entries(),
            $payload_files_ref,
            $return_all_errors
        );
    }
    return;
}


sub verify_tagmanifest {
    my ($self, $non_payload_files_ref, $return_all_errors) = @_;
    my @non_payload_files = grep {$_ !~ m#tagmanifest-[0-9a-zA-Z]+\.txt$#} @{ $non_payload_files_ref };
    if ($self->tagmanifest_file()) {
        return $self->_verify_XXX_manifests(
            "tagmanifest",
            $self->tagmanifest_entries(),
            \@non_payload_files,
            $return_all_errors
        );
    }
    return;
}

sub __create_xxmanifest {
    my ($self, $prefix, $files_ref) = @_;
    my $algo = $self->algorithm->name;
    my $bagit = $self->bagit->bag_path;
    my $manifest_file = File::Spec->catfile($self->bagit->metadata_path, "$prefix-${algo}.txt");
    # Generate digests for all of the files under ./data
    my $digest_hashes_ref = $self->calc_digests($bagit, $files_ref);
    if (defined $digest_hashes_ref && (ref $digest_hashes_ref eq 'ARRAY')) {
        open(my $fh, ">:encoding(UTF-8)",$manifest_file) or croak("Cannot create $prefix-${algo}.txt: $!\n");
        foreach my $digest_entry (@{$digest_hashes_ref}) {
            my $normalized_file = normalize_payload_filepath($digest_entry->{local_name});
            my $digest = $digest_entry->{calculated_digest};
            print($fh "$digest  $normalized_file\n");
        }
        close($fh);
    }
    return 1;
}


sub create_manifest {
    my ($self) = @_;
    $self->__create_xxmanifest('manifest', $self->bagit->payload_files);
    return 1;
}


sub create_tagmanifest {
    my ($self) = @_;
    my @non_payload_files = grep {$_ !~ m#^tagmanifest-.*\.txt$#} @{ $self->bagit->non_payload_files };
    $self->__create_xxmanifest('tagmanifest', \@non_payload_files);
    return 1;
}

no Moo;
1;

__END__

=pod

=encoding UTF-8

=head1 NAME

Archive::BagIt::Role::Manifest - A role that handles all manifest files for a specific Algorithm

=head1 VERSION

version 0.091

=head2 manifest_entries()

returns the manifest_entries() for the current digest algorithm, the result is hashref like:

   {
       data/hello.txt   "e7c22b994c59d9cf2b48e549b1e24666636045930d3da7c1acb299d1c3b7f931f94aae41edda2c2b207a36e10f8bcb8d45223e54878f5b316e7ce3b6bc019629"
   }

=head2 tagmanifest_entries()

returns the tagmanifest_entries() for the current digest algorithm, the result is hashref, see L<manifest_entries()>

=head2 calc_digests($bagit, $filenames_ref, $opts)

Method to calculate and return all digests for a a list of files. This method will be overwritten by L<Archive::BagIt::Fast>.

=head2 verify_manifest($payload_files, $return_all_errors)

check fixities of payload files in both directions

=head2 verify_tagmanifest($non_payload_files, $return_all_errors)

check fixities of non-payload files in both directions

=head2 create_manifest()

creates a new manifest file for payload files

=head2 create_tagmanifest()

creates a new tagmanifest file for non payload files

=head1 AVAILABILITY

The latest version of this module is available from the Comprehensive Perl
Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
site near you, or see L<https://metacpan.org/module/Archive::BagIt/>.

=head1 BUGS AND LIMITATIONS

You can make new bug reports, and view existing ones, through the
web interface at L<http://rt.cpan.org>.

=head1 AUTHOR

Andreas Romeyke <cpan@andreas.romeyke.de>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2022 by Rob Schmidt <rjeschmi@gmail.com>, William Wueppelmann and Andreas Romeyke.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut