package File::Sticker::Scribe::Exif;
$File::Sticker::Scribe::Exif::VERSION = '4.0101';
=head1 NAME

File::Sticker::Scribe::Exif - read, write and standardize meta-data from EXIF file

=head1 VERSION

version 4.0101

=head1 SYNOPSIS

    use File::Sticker::Scribe::Exif;

    my $obj = File::Sticker::Scribe::Exif->new(%args);

    my %meta = $obj->read_meta($filename);

    $obj->write_meta(%args);

=head1 DESCRIPTION

This will write meta-data from EXIF files, and standardize it to a common
nomenclature, such as "tags" for things called tags, or Keywords or Subject etc.

=cut

use v5.10;
use Carp;
use common::sense;
use File::LibMagic;
use Image::ExifTool qw(:Public);
use Image::ExifTool::XMP;
use YAML::Any;
use File::Spec;
use List::MoreUtils qw(uniq);

use parent qw(File::Sticker::Scribe);

BEGIN {
    # Set the user-defined fields for EXIF
    %Image::ExifTool::UserDefined::sticker = (
        GROUPS => { 0 => 'XMP', 1 => 'XMP-sticker', 2 => 'Image' },
        NAMESPACE => { 'sticker' => 'http://ns.katspace.org/sticker/1.0/' },
        WRITABLE => 'string',
        # To maximize flexibility, this is going to be a plain string
        # which we will populate with YAML data.
        FreeFields => { },
    );

    %Image::ExifTool::UserDefined = (
        # new XMP namespaces (ie. XMP-xxx) must be added to the Main XMP table:
        'Image::ExifTool::XMP::Main' => {
            sticker => {
                SubDirectory => {
                    TagTable => 'Image::ExifTool::UserDefined::sticker'
                },
            },
        }
    );
    Image::ExifTool::XMP::RegisterNamespace(\%Image::ExifTool::UserDefined::sticker);
}

# FOR DEBUGGING
=head1 DEBUGGING

=head2 whoami

Used for debugging info

=cut
sub whoami  { ( caller(1) )[3] }

=head1 METHODS

=head2 init

Initialize the object.

    $scribe->init(wanted_fields=>{title=>'TEXT',count=>'NUMBER',tags=>'MULTI'});

=cut

sub init {
    my $self = shift;
    my %parameters = @_;

    $self->SUPER::init(%parameters);

} # init

=head2 priority

The priority of this writer.  Scribes with higher priority get tried first.

=cut

sub priority {
    my $class = shift;
    return 1;
} # priority

=head2 allowed_file

If this scribe can be used for the given file, then this returns true.
File must be one of: PDF or an image which is not a GIF.
(GIF files need to be treated separately)
(ExifTool can't write to EPUB)

=cut

sub allowed_file {
    my $self = shift;
    my $file = shift;
    say STDERR whoami(), " file=$file" if $self->{verbose} > 2;

    $file = $self->_get_the_real_file(filename=>$file);
    my $ft = $self->{file_magic}->info_from_filename($file);
    if ($ft->{mime_type} =~ /(image|pdf)/
            and $ft->{mime_type} !~ /gif/)
    {
        return 1;
    }
    return 0;
} # allowed_file

=head2 known_fields

Returns the fields which this scribe knows about.

    my $known_fields = $scribe->known_fields();

=cut

sub known_fields {
    my $self = shift;

    return {
        title=>'TEXT',
        creator=>'TEXT',
        description=>'TEXT',
        location=>'TEXT',
        tags=>'MULTI',
        %{$self->{wanted_fields}},
    };
} # known_fields

=head2 readonly_fields

Returns the fields which this scribe knows about, which can't be overwritten,
but are allowed to be "wanted" fields. Things like file-size etc.

    my $readonly_fields = $scribe->readonly_fields();

=cut

sub readonly_fields {
    my $self = shift;

    return {
        date=>'TEXT',
        copyright=>'TEXT',
        flash=>'TEXT',
        filesize=>'NUMBER',
        imagesize=>'TEXT',
        imageheight=>'NUMBER',
        imagewidth=>'NUMBER',
        megapixels=>'NUMBER'};
} # readonly_fields

=head2 read_meta

Read the meta-data from the given file.

    my $meta = $obj->read_meta($filename);

=cut

sub read_meta {
    my $self = shift;
    my $filename = shift;
    say STDERR whoami(), " filename=$filename" if $self->{verbose} > 2;

    $filename = $self->_get_the_real_file(filename=>$filename);
    my $exif_options = {DateFormat => "%Y-%m-%d %H:%M:%S"};
    my $info = ImageInfo($filename,$exif_options);
    my %meta = ();

    # Check if this is a Gutenberg book; they have quirks.
    my $is_gutenberg_book = 0;
    if (exists $info->{Identifier}
            and $info->{'Identifier'} =~ m!http://www.gutenberg.org/ebooks/\d+!)
    {
        $is_gutenberg_book = 1;
        # If this is a Gutenberg book, the Identifier holds the correct URL
        $meta{'url'} = $info->{'Identifier'};
    }
    # There are multiple fields which could be used as a file "description".
    # Check through them until you find a non-empty one.
    my $description = '';
    foreach my $field (qw(Caption-Abstract Comment UserComment ImageDescription Description))
    {
        if (exists $info->{$field}
                and $info->{$field}
                and $info->{$field} !~ /^---/ # YAML - not a description!
                and !$description)
        {
            $description = $info->{$field};
            $description =~ s/\n$//; # remove trailing newlines
        }
    }
    $meta{description} = $description if $description;
    # There are multiple fields which could be used as a file content creator.
    # Check through them until you find a non-empty one.
    my $creator = '';
    foreach my $field (qw(Author Artist Creator))
    {
        if (exists $info->{$field} and $info->{$field} and !$creator)
        {
            $creator = $info->{$field};
        }
    }
    $meta{creator} = $creator if $creator;

    # There are multiple fields which could be used as a copyright notice.
    # Check through them until you find a non-empty one.
    my $copyright = '';
    foreach my $field (qw(License Rights))
    {
        if (exists $info->{$field} and $info->{$field} and !$copyright)
        {
            $copyright = $info->{$field};
        }
    }
    $meta{copyright} = $copyright if $copyright;

    # There are multiple fields which could be used as a file date.
    # Check through them until you find a non-empty one.
    my $date = '';
    foreach my $field (qw(CreateDate DateTimeOriginal Date PublishedDate PublicationDate))
    {
        if (exists $info->{$field} and $info->{$field} and !$date)
        {
            $date = $info->{$field};
        }
    }
    $meta{date} = $date if $date;

    # Use a consistent naming for tag fields.
    # Combine the tag-like fields together.
    # Preserve the order and check for dupicates later with uniq
    my @tags = ();
    foreach my $field (qw(Keywords Subject))
    {
        if (exists $info->{$field} and $info->{$field})
        {
            my $val = $info->{$field};
            my @these_tags;
            if ($is_gutenberg_book)
            {
                # gutenberg tags are multi-word, separated by comma-space or ' -- '
                # and can have parens in them
                $val =~ s/\(//g;
                $val =~ s/\)//g;
                $val =~ s/\s--\s/,/g;
                @these_tags = split(/,\s?/, $val);
            }
            else
            {
                @these_tags = split(/,\s*/, $val);
            }
            foreach my $t (@these_tags)
            {
                $t =~ s/ - / /g; # remove isolated dashes
                $t =~ s/[^\w\s,-]//g; # remove non-word characters
                push @tags, $t;
            }
        }
    }
    # Are there any tags?
    if (@tags)
    {
        # remove duplicates
        $meta{tags} = [uniq @tags];
    }
    else # remove empty tag-field
    {
        delete $meta{tags};
    }

    # There are SOOOOOO many fields in EXIF data, just remember a subset of them
    foreach my $field (qw(
Flash
ImageHeight
ImageSize
ImageWidth
Megapixels
PageCount
Location
Title
))
    {
        if (exists $info->{$field} and $info->{$field})
        {
            $meta{lc($field)} = $info->{$field};
        }
    }

    # -------------------------------------------------
    # Freeform Fields
    # These are stored as YAML data in the XMP-sticker:FreeFields field.
    # They used to be stored in the XMP:Description field,
    # before then the ImageDescription field, before then the UserComment field
    # so they need to be checked too.
    # -------------------------------------------------
    if (exists $info->{FreeFields}
            and $info->{FreeFields}
            and $info->{FreeFields} =~ /^---/)
    {
        say STDERR sprintf("FreeFields='%s'", $info->{FreeFields}) if $self->{verbose} > 2;
        my $data;
        eval {$data = Load($info->{FreeFields});};
        if ($@)
        {
            warn __PACKAGE__, " Load of YAML data failed: $@";
        }
        elsif (!$data)
        {
            warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 2;
        }
        else # okay
        {
            foreach my $field (sort keys %{$data})
            {
                $meta{$field} = $data->{$field};
            }
        }
    }
    elsif (exists $info->{Description}
            and $info->{Description}
            and $info->{Description} =~ /^---/)
    {
        say STDERR sprintf("Description='%s'", $info->{Description}) if $self->{verbose} > 2;
        my $data;
        eval {$data = Load($info->{Description});};
        if ($@)
        {
            warn __PACKAGE__, " Load of YAML data failed: $@";
        }
        elsif (!$data)
        {
            warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 2;
        }
        else # okay
        {
            foreach my $field (sort keys %{$data})
            {
                $meta{$field} = $data->{$field};
            }
        }
    }
    elsif (exists $info->{ImageDescription}
            and $info->{ImageDescription}
            and $info->{ImageDescription} =~ /^---/)
    {
        say STDERR sprintf("ImageDescription='%s'", $info->{ImageDescription}) if $self->{verbose} > 2;
        my $data;
        eval {$data = Load($info->{ImageDescription});};
        if ($@)
        {
            warn __PACKAGE__, " Load of YAML data failed: $@";
        }
        elsif (!$data)
        {
            warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 2;
        }
        else # okay
        {
            foreach my $field (sort keys %{$data})
            {
                $meta{$field} = $data->{$field};
            }
        }
    }
    elsif (exists $info->{UserComment}
            and $info->{UserComment}
            and $info->{UserComment} =~ /^---/)
    {
        say STDERR sprintf("UserComment='%s'", $info->{UserComment}) if $self->{verbose} > 2;
        my $data;
        eval {$data = Load($info->{UserComment});};
        if ($@)
        {
            warn __PACKAGE__, " Load of YAML data failed: $@";
        }
        elsif (!$data)
        {
            warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 2;
        }
        else # okay
        {
            foreach my $field (sort keys %{$data})
            {
                $meta{$field} = $data->{$field};
            }
        }
    }

    return \%meta;
} # read_meta

=head1 Helper Functions

Private interface.

=head2 replace_one_field

Overwrite the given field. This does no checking.

    $writer->replace_one_field(filename=>$filename,field=>$field,value=>$value);

=cut

sub replace_one_field {
    my $self = shift;
    my %args = @_;
    say STDERR whoami(), " field=$args{field},value=$args{value}" if $self->{verbose} > 2;

    my $filename = $self->_get_the_real_file(filename=>$args{filename});
    my $field = $args{field};
    my $value = $args{value};

    my $ft = $self->{file_magic}->info_from_filename($filename);
    my $et = new Image::ExifTool;
    $et->Options(ListSep=>',',ListSplit=>',');
    $et->ExtractInfo($filename);

    my $success;
    if ($field eq 'creator')
    {
        $success = $et->SetNewValue('Creator', $value);
    }
    elsif ($field eq 'copyright')
    {
        $success = $et->SetNewValue('License', $value);
    }
    elsif ($field eq 'title')
    {
        $success = $et->SetNewValue('Title', $value);
    }
    elsif ($field eq 'location')
    {
        $success = $et->SetNewValue('Location', $value);
    }
    elsif ($field eq 'description')
    {
        # Okay, here's the messy relationship between the description,
        # the freeform data, and GIMP.
        #
        # I originally wrote the freeform data in the UserComment field, and
        # the description into the Description and Comment fields, then I found
        # that GIMP uses the UserComment field as the description field at a
        # higher priority than the Comment field.
        #
        # So then I wrote the freeform data in the ImageDescription field,
        # then I found that GIMP ALSO uses THAT field as the description field
        # at a higher priority than the Comment field.
        #
        # And that GIMP overwrites ALL THREE fields (Comment, UserComment, 
        # ImageDescription) with what it considers the description
        # when saving a file.
        #
        # So then I used the XMP:Description (Description) field for the
        # freeform data, because GIMP neither reads nor overwrites that.
        # 
        # But then I discovered you could DEFINE YOUR OWN fields in EXIF,
        # so I defined XMP-sticker:FreeFields to put my freeform YAML data into.

        # Before the decription is written, the freeform data
        # needs to be converted to its new home.
        $self->_convert_freeform_data(exif=>$et);

        # GIMP reads and overwrites the Comment, UserComment
        # and ImageDescription fields, so we need to do that too.
        # Escpecially since GIMP does not look at the Comment field
        # if one of the other two is not empty.
        $success = $et->SetNewValue('UserComment', $value);
        $success = $et->SetNewValue('ImageDescription', $value);
        
        if ($ft->{mime_type} =~ /image\/jpeg/)
        {
            $success = $et->SetNewValue('Comment', $value);
        }
    }
    elsif ($field eq 'tags')
    {
        if (ref $value eq 'ARRAY')
        {
            $success = $et->SetNewValue('Keywords', $value);
            $success = $et->SetNewValue('Subject', $value);
        }
        else
        {
            my @tags = split(/,/,$value);
            $success = $et->SetNewValue('Keywords', \@tags);
            $success = $et->SetNewValue('Subject', \@tags);
        }
    }
    else # freeform field
    {
        # Need to read all the YAML, change this field, and write it again
        my $fdata = $self->_read_freeform_data(exif=>$et);
        $fdata->{$field} = $value;
        $success = $self->_write_freeform_data(newdata=>$fdata,exif=>$et);
    }

    if ($success)
    {
        $et->WriteInfo($filename);
    }
    return $success;
} # replace_one_field

=head2 delete_field_from_file

Completely remove the given field. This does no checking.

    $writer->delete_field_from_file(filename=>$filename,field=>$field);

=cut

sub delete_field_from_file {
    my $self = shift;
    my %args = @_;
    say STDERR whoami(), " field=$args{field}" if $self->{verbose} > 2;

    my $filename = $self->_get_the_real_file(filename=>$args{filename});
    my $field = $args{field};

    my $ft = $self->{file_magic}->info_from_filename($filename);
    my $et = new Image::ExifTool;
    $et->Options(ListSep=>',',ListSplit=>',');
    $et->ExtractInfo($filename);

    my $success;
    if ($field eq 'creator')
    {
        $success = $et->SetNewValue('Creator')
    }
    elsif ($field eq 'title')
    {
        $success = $et->SetNewValue('Title')
    }
    elsif ($field eq 'description')
    {
        # GIMP reads and overwrites the Comment, UserComment
        # and ImageDescription fields, so we need to do that too.
        if ($ft->{mime_type} =~ /image\/jpeg/)
        {
            $success = $et->SetNewValue('Comment');
        }
        $success = $et->SetNewValue('ImageDescription');
        $success = $et->SetNewValue('UserComment');
    }
    elsif ($field eq 'tags')
    {
        $success = $et->SetNewValue('Keywords');
        $success = $et->SetNewValue('Subject');
    }
    else # freeform field
    {
        # Need to read all the YAML, change this field, and write it again
        my $fdata = $self->_read_freeform_data(exif=>$et);
        if (exists $fdata->{$field})
        {
            delete $fdata->{$field};
            $success = $self->_write_freeform_data(newdata=>$fdata,exif=>$et);
        }
    }

    if ($success)
    {
        $et->WriteInfo($filename);
    }
    return $success;
} # delete_field_from_file

=head2 _get_the_real_file

If the file is a directory, look for a cover file.
If the file is a soft link, look for the file it is pointing to
(because ExifTool behaves badly with soft links).

    my $real_file = $scribe->_get_the_real_file(filename=>$filename);

=cut

sub _get_the_real_file {
    my $self = shift;
    my %args = @_;
    say STDERR whoami(), " filename=$args{filename}" if $self->{verbose} > 2;

    my $filename = $args{filename};
    if (-d $filename) # is a directory, look for a cover file
    {
        my $cover_file = ($self->{cover_file} ? $self->{cover_file} : 'cover.jpg');
        $cover_file = File::Spec->catfile($filename, $cover_file);
        if (-f $cover_file)
        {
            $filename = $cover_file;
        }
        else # give up and die
        {
            croak "$args{filename} is directory, cannot find $cover_file";
        }
    }

    # ExifTool has a wicked habit of replacing soft-linked files with the
    # contents of the file rather than honouring the link.  While using the
    # exiftool script offers -overwrite_original_in_place to deal with this,
    # the Perl module does not appear to have such an option available.

    # So the way to get around this is to check if the file is a soft link, and
    # if it is, find the real file, and write to that. And if *that* file is
    # a soft link... go down the rabbit-hole as deep as it goes.
    while (-l $filename)
    {
        my $realfile = readlink $filename;
        if (-f $realfile)
        {
            $filename = $realfile;
        }
        else # give up and die
        {
            croak "$args{filename} is soft link, cannot find $realfile";
        }
    }

    return $filename;
} # _get_the_real_file

=head2 _read_freeform_data

Read the freeform data as YAML data from the XMP-sticker:FreeFields field.
 
    my $ydata = $self->_read_freeform_data(exif=>$exif);

=cut

sub _read_freeform_data {
    my $self = shift;
    my %args = @_;
    say STDERR whoami() if $self->{verbose} > 2;

    # CONVERT FREEFORM DATA if needed BEFOREHAND
    $self->_convert_freeform_data(%args);

    my $ydata;
    my $et = $args{exif};
    my $ystring = $et->GetValue('FreeFields');
    $ystring = $et->GetNewValue('FreeFields') if !$ystring;
    say STDERR "ystring=$ystring" if $self->{verbose} > 2;
    if ($ystring and $ystring =~ /^---/) # YAML data needs prefix
    {
        eval {$ydata = Load($ystring);};
        if ($@)
        {
            warn __PACKAGE__, " Load of YAML data failed: $@ // '$ystring'";
        }
        elsif (!$ydata)
        {
            warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 1;
        }
    }
    say STDERR Dump($ydata) if $self->{verbose} > 2;
    return $ydata;
} # _read_freeform_data

=head2 _write_freeform_data

Write the freeform data as YAML data into the XML-sticker:FreeFields field
This overwrites whatever is there, it does not check.
    
    $self->_write_freeform_data(newdata=>\%newdata,exif=>$exif);

=cut

sub _write_freeform_data {
    my $self = shift;
    my %args = @_;
    say STDERR whoami() if $self->{verbose} > 2;

    my $newdata = $args{newdata};
    my $et = $args{exif};
    # restore multi-value comma-separated fields to arrays
    foreach my $fn (keys %{$self->{wanted_fields}})
    {
        if ($self->{wanted_fields}->{$fn} eq 'MULTI'
                and exists $newdata->{$fn}
                and defined $newdata->{$fn}
                and $newdata->{$fn} =~ /,/)
        {
            my @vals = split(/,/, $newdata->{$fn});
            $newdata->{$fn} = \@vals;
        }
    }
    my $ystring = Dump($newdata);
    say STDERR "ystring=$ystring" if $self->{verbose} > 2;
    my $success = $et->SetNewValue('XMP-sticker:FreeFields', $ystring);
    return $success;
} # _write_freeform_data

=head2 _convert_freeform_data

Convert the freeform data so that it is placed into the XMP-sticker:FreeFields
field rather than the XMP:Description, UserComment or ImageDescription field.
 
    $self->_convert_freeform_data(exif=>$exif);

=cut

sub _convert_freeform_data {
    my $self = shift;
    my %args = @_;
    say STDERR whoami() if $self->{verbose} > 2;

    my $et = $args{exif};
    # Check if it needs conversion at all.
    # If the XMP-sticker:FreeFields field is not empty
    # and contains YAML data, then nothing needs to be done.
    my $ystring = $et->GetValue('XMP-sticker:FreeFields');
    $ystring = $et->GetNewValue('XMP-sticker:FreeFields') if !$ystring;
    if ($ystring and $ystring =~ /^---/) # Assume YAML data
    {
        # no conversion needed
        return 1;
    }

    # ------------------------------------
    # Conversion needed
    # Read from XMP:Description, write into XMP-sticker:FreeFields
    # Otherwise read from ImageDescription. 
    # The YAML data might be in UserComment instead if old.
    # ------------------------------------
    $ystring = $et->GetValue('Description');
    $ystring = $et->GetNewValue('Description') if !$ystring;
    if (!$ystring or $ystring !~ /^---/) # Try ImageDescription
    {
        $ystring = $et->GetValue('ImageDescription');
        $ystring = $et->GetNewValue('ImageDescription') if !$ystring;
    }
    if (!$ystring or $ystring !~ /^---/) # Try UserComment
    {
        $ystring = $et->GetValue('UserComment');
        $ystring = $et->GetNewValue('UserComment') if !$ystring;
    }

    my $ydata;
    my $success = 0;
    if ($ystring and $ystring =~ /^---/) # Probably YAML data
    {
        # Check if the YAML data is valid
        eval {$ydata = Load($ystring);};
        if ($@)
        {
            warn __PACKAGE__, " Load of YAML data failed: $@ ## '$ystring'";
        }
        elsif (!$ydata)
        {
            warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 1;
        }
        else # data is okay
        {
            $success = $et->SetNewValue('XMP-sticker:FreeFields', $ystring);
            if ($success)
            {
                # Clear out the XMP:Description field
                $et->SetNewValue('XMP:Description');

                # Put the description, if there is one,
                # into the UserComment, ImageDescription
                my $desc = $et->GetValue('Comment');
                $desc = $et->GetNewValue('Comment') if !$desc;
                $desc = $et->GetValue('Caption-Abstract') if !$desc;
                if ($desc)
                {
                    $et->SetNewValue('UserComment', $desc);
                    $et->SetNewValue('ImageDescription', $desc);
                }
                else
                {
                    # Otherwise clear out no longer valid data
                    $et->SetNewValue('ImageDescription');
                    $et->SetNewValue('UserComment');
                }
            }
        }
    }
    else # No YAML data to convert
    {
        # Put some empty data in there.
        my %newdata = ();
        my $nystring = Dump(\%newdata);
        $success = $et->SetNewValue('XMP-sticker:FreeFields', $nystring);
    }

    return $success;
} # _convert_freeform_data

=head1 BUGS

Please report any bugs or feature requests to the author.

=cut

1; # End of File::Sticker::Scribe
__END__