—#------------------------------------------------------------------------------
# File: Import.pm
#
# Description: Import CSV and JSON database files
#
# Revisions: 2011-03-05 - P. Harvey Created
#------------------------------------------------------------------------------
package
Image::ExifTool::Import;
use
strict;
require
Exporter;
$VERSION
=
'1.14'
;
@ISA
=
qw(Exporter)
;
@EXPORT_OK
=
qw(ReadCSV ReadJSON)
;
sub
ReadJSONObject($;$);
my
%unescapeJSON
= (
't'
=>
"\t"
,
'n'
=>
"\n"
,
'r'
=>
"\r"
,
'b'
=>
"\b"
,
'f'
=>
"\f"
);
my
$charset
;
#------------------------------------------------------------------------------
# Read CSV file
# Inputs: 0) CSV file name, file ref or RAF ref, 1) database hash ref,
# 2) missing tag value, 3) delimiter if other than ','
# Returns: undef on success, or error string
# Notes: There are various flavours of CSV, but here we assume that only
# double quotes are escaped, and they are escaped by doubling them
sub
ReadCSV($$;$$)
{
local
(
$_
, $/);
my
(
$file
,
$database
,
$missingValue
,
$delim
) =
@_
;
my
(
$buff
,
@tags
,
$found
,
$err
,
$raf
,
$openedFile
);
if
(UNIVERSAL::isa(
$file
,
'File::RandomAccess'
)) {
$raf
=
$file
;
$file
=
'CSV file'
;
}
elsif
(
ref
$file
eq
'GLOB'
) {
$raf
= File::RandomAccess->new(
$file
);
$file
=
'CSV file'
;
}
else
{
open
CSVFILE,
$file
or
return
"Error opening CSV file '${file}'"
;
binmode
CSVFILE;
$openedFile
= 1;
$raf
= File::RandomAccess->new(\
*CSVFILE
);
}
$delim
=
','
unless
defined
$delim
;
# set input record separator by first newline found in the file
# (safe because first line should contain only tag names)
while
(
$raf
->Read(
$buff
, 65536)) {
$buff
=~ /(\x0d\x0a|\x0d|\x0a)/ and $/ = $1,
last
;
}
$raf
->Seek(0,0);
while
(
$raf
->ReadLine(
$buff
)) {
my
(
@vals
,
$v
,
$i
,
%fileInfo
);
my
@toks
=
split
/\Q
$delim
/,
$buff
;
while
(
@toks
) {
(
$v
=
shift
@toks
) =~ s/^ +//;
# remove leading spaces
if
(
$v
=~ s/^"//) {
# quoted value must end in an odd number of quotes
while
(
$v
!~ /("+)\s*$/ or not
length
($1) & 1) {
if
(
@toks
) {
$v
.=
$delim
.
shift
@toks
;
}
else
{
# read another line from the file
$raf
->ReadLine(
$buff
) or
last
;
@toks
=
split
/\Q
$delim
/,
$buff
;
last
unless
@toks
;
$v
.=
shift
@toks
;
}
}
$v
=~ s/"\s*$//;
# remove trailing quote and whitespace
$v
=~ s/
""
/"/g;
# un-escape quotes
}
else
{
$v
=~ s/[ \n\r]+$//;
# remove trailing spaces/newlines
}
push
@vals
,
$v
;
}
if
(
@tags
) {
# save values for each tag
$fileInfo
{_ordered_keys_} = [ ];
for
(
$i
=0;
$i
<
@vals
and
$i
<
@tags
; ++
$i
) {
# ignore empty entries unless missingValue is empty too
next
unless
length
$vals
[
$i
] or
defined
$missingValue
and
$missingValue
eq
''
;
# delete tag (set value to undef) if value is same as missing tag
$fileInfo
{
$tags
[
$i
]} =
(
defined
$missingValue
and
$vals
[
$i
] eq
$missingValue
) ?
undef
:
$vals
[
$i
];
push
@{
$fileInfo
{_ordered_keys_}},
$tags
[
$i
];
}
# figure out the file name to use
if
(
$fileInfo
{SourceFile}) {
$$database
{
$fileInfo
{SourceFile}} = \
%fileInfo
;
$found
= 1;
}
}
else
{
# the first row should be the tag names
foreach
(
@vals
) {
# terminate at first blank tag name (eg. extra comma at end of line)
last
unless
length
$_
;
@tags
or s/^\xef\xbb\xbf//;
# remove UTF-8 BOM if it exists
/^([-_0-9A-Z]+:)*[-_0-9A-Z]+
#?$/i or $err = "Invalid tag name '${_}'", last;
push
(
@tags
,
$_
);
}
last
if
$err
;
@tags
or
$err
=
'No tags found'
,
last
;
# fix "SourceFile" case if necessary
$tags
[0] =
'SourceFile'
if
lc
$tags
[0] eq
'sourcefile'
;
}
}
close
CSVFILE
if
$openedFile
;
undef
$raf
;
$err
=
'No SourceFile column'
unless
$found
or
$err
;
return
$err
?
"$err in $file"
:
undef
;
}
#------------------------------------------------------------------------------
# Convert unicode code point to UTF-8
# Inputs: 0) integer Unicode character
# Returns: UTF-8 bytes
sub
ToUTF8($)
{
return
Image::ExifTool::Charset::Recompose(
undef
, [
$_
[0]],
$charset
);
}
#------------------------------------------------------------------------------
# Read JSON object from file
# Inputs: 0) RAF reference or undef, 1) optional scalar reference for data
# to read before reading from file (ie. the file read buffer)
# Returns: JSON object (scalar, hash ref, or array ref), or undef on EOF or
# empty object or array (and sets $$buffPt to empty string on EOF)
# Notes: position in buffer is significant
sub
ReadJSONObject($;$)
{
my
(
$raf
,
$buffPt
) =
@_
;
# initialize buffer if necessary
my
(
$pos
,
$readMore
,
$rtnVal
,
$tok
,
$key
,
$didBOM
);
if
(
$buffPt
) {
$pos
=
pos
$$buffPt
;
$pos
=
pos
(
$$buffPt
) = 0
unless
defined
$pos
;
}
else
{
my
$buff
=
''
;
$buffPt
= \
$buff
;
$pos
= 0;
}
Tok:
for
(;;) {
# (didn't spend the time to understand how $pos could be undef, but
# put a test here to be safe because one user reported this problem)
last
unless
defined
$pos
;
if
(
$pos
>=
length
$$buffPt
or
$readMore
) {
last
unless
defined
$raf
;
# read another 64kB and add to unparsed data
my
$offset
=
length
(
$$buffPt
) -
$pos
;
if
(
$offset
) {
my
$buff
;
$raf
->Read(
$buff
, 65536) or
$$buffPt
=
''
,
last
;
$$buffPt
=
substr
(
$$buffPt
,
$pos
) .
$buff
;
}
else
{
$raf
->Read(
$$buffPt
, 65536) or
$$buffPt
=
''
,
last
;
}
unless
(
$didBOM
) {
$$buffPt
=~ s/^\xef\xbb\xbf//;
# remove UTF-8 BOM if it exists
$didBOM
= 1;
}
$pos
=
pos
(
$$buffPt
) = 0;
$readMore
= 0;
}
unless
(
$tok
) {
# skip white space and find next character
$$buffPt
=~ /(\S)/g or
$pos
=
length
(
$$buffPt
),
next
;
$tok
= $1;
$pos
=
pos
$$buffPt
;
}
# see what type of object this is
if
(
$tok
eq
'{'
) {
# object (hash)
$rtnVal
= {
_ordered_keys_
=> [ ] }
unless
defined
$rtnVal
;
for
(;;) {
# read "KEY":"VALUE" pairs
unless
(
defined
$key
) {
$key
= ReadJSONObject(
$raf
,
$buffPt
);
$pos
=
pos
$$buffPt
;
}
# ($key may be undef for empty JSON object)
if
(
defined
$key
) {
# scan to delimiting ':'
$$buffPt
=~ /(\S)/g or
$readMore
= 1,
next
Tok;
$1 eq
':'
or
return
undef
;
# error if not a colon
my
$val
= ReadJSONObject(
$raf
,
$buffPt
);
$pos
=
pos
$$buffPt
;
return
undef
unless
defined
$val
;
$$rtnVal
{
$key
} =
$val
;
push
@{
$$rtnVal
{_ordered_keys_}},
$key
;
undef
$key
;
}
# scan to delimiting ',' or bounding '}'
$$buffPt
=~ /(\S)/g or
$readMore
= 1,
next
Tok;
last
if
$1 eq
'}'
;
# check for end of object
$1 eq
','
or
return
undef
;
# error if not a comma
}
}
elsif
(
$tok
eq
'['
) {
# array
$rtnVal
= [ ]
unless
defined
$rtnVal
;
for
(;;) {
my
$item
= ReadJSONObject(
$raf
,
$buffPt
);
$pos
=
pos
$$buffPt
;
# ($item may be undef for empty array)
push
@$rtnVal
,
$item
if
defined
$item
;
# scan to delimiting ',' or bounding ']'
$$buffPt
=~ /(\S)/g or
$readMore
= 1,
next
Tok;
last
if
$1 eq
']'
;
# check for end of array
$1 eq
','
or
return
undef
;
# error if not a comma
}
}
elsif
(
$tok
eq
'"'
) {
# quoted string
for
(;;) {
$$buffPt
=~ /(\\*)"/g or
$readMore
= 1,
next
Tok;
last
unless
length
($1) & 1;
# check for escaped quote
}
$rtnVal
=
substr
(
$$buffPt
,
$pos
,
pos
(
$$buffPt
)-
$pos
-1);
# unescape characters
$rtnVal
=~ s/\\u([0-9a-f]{4})/ToUTF8(
hex
$1)/ige;
$rtnVal
=~ s/\\(.)/
$unescapeJSON
{$1}||$1/sge;
# decode base64 (binary data) values
if
(
$rtnVal
=~ /^base64:[A-Za-z0-9+\/]*={0,2}$/ and
length
(
$rtnVal
) % 4 == 3) {
$rtnVal
= ${Image::ExifTool::XMP::DecodeBase64(
substr
(
$rtnVal
,7))};
}
}
elsif
(
$tok
eq
']'
or
$tok
eq
'}'
or
$tok
eq
','
) {
# return undef for empty object, array, or list item
# (empty list item actually not valid JSON)
pos
(
$$buffPt
) =
pos
(
$$buffPt
) - 1;
}
else
{
# number, 'true', 'false', 'null'
$$buffPt
=~ /([\s:,\}\]])/g or
$readMore
= 1,
next
;
pos
(
$$buffPt
) =
pos
(
$$buffPt
) - 1;
$rtnVal
=
$tok
.
substr
(
$$buffPt
,
$pos
,
pos
(
$$buffPt
)-
$pos
);
}
last
;
}
return
$rtnVal
;
}
#------------------------------------------------------------------------------
# Read JSON file
# Inputs: 0) JSON file name, file ref, RAF ref or SCALAR ref, 1) database hash ref,
# 2) flag to delete "-" tags, 3) character set
# Returns: undef on success, or error string
sub
ReadJSON($$;$$)
{
local
$_
;
my
(
$file
,
$database
,
$missingValue
,
$chset
) =
@_
;
my
(
$raf
,
$openedFile
);
# initialize character set for converting "\uHHHH" chars
$charset
=
$chset
||
'UTF8'
;
if
(UNIVERSAL::isa(
$file
,
'File::RandomAccess'
)) {
$raf
=
$file
;
$file
=
'JSON file'
;
}
elsif
(
ref
$file
eq
'GLOB'
) {
$raf
= File::RandomAccess->new(
$file
);
$file
=
'JSON file'
;
}
elsif
(
ref
$file
eq
'SCALAR'
) {
$raf
= File::RandomAccess->new(
$file
);
$file
=
'in memory'
;
}
else
{
open
JSONFILE,
$file
or
return
"Error opening JSON file '${file}'"
;
binmode
JSONFILE;
$openedFile
= 1;
$raf
= File::RandomAccess->new(\
*JSONFILE
);
}
my
$obj
= ReadJSONObject(
$raf
);
close
JSONFILE
if
$openedFile
;
unless
(
ref
$obj
eq
'ARRAY'
) {
ref
$obj
eq
'HASH'
or
return
"Format error in JSON file '${file}'"
;
$obj
= [
$obj
];
}
my
(
$info
,
$found
);
foreach
$info
(
@$obj
) {
next
unless
ref
$info
eq
'HASH'
;
# fix "SourceFile" case, or assume '*' if SourceFile not specified
unless
(
defined
$$info
{SourceFile}) {
my
(
$key
) =
grep
/^SourceFile$/i,
keys
%$info
;
if
(
$key
) {
$$info
{SourceFile} =
$$info
{
$key
};
delete
$$info
{
$key
};
}
else
{
$$info
{SourceFile} =
'*'
;
}
}
if
(
defined
$missingValue
) {
$$info
{
$_
} eq
$missingValue
and
$$info
{
$_
} =
undef
foreach
keys
%$info
;
}
$$database
{
$$info
{SourceFile}} =
$info
;
$found
= 1;
}
return
$found
?
undef
:
"No valid JSON objects in '${file}'"
;
}
1;
# end
__END__
=head1 NAME
Image::ExifTool::Import - Import CSV and JSON database files
=head1 SYNOPSIS
use Image::ExifTool::Import qw(ReadCSV ReadJSON);
$err = ReadCSV($csvFile, \%database);
$err = ReadJSON($jsonfile, \%database);
=head1 DESCRIPTION
This module contains routines for importing tag information from CSV (Comma
Separated Value) and JSON (JavaScript Object Notation) database files.
=head1 EXPORTS
Exports nothing by default, but ReadCSV and ReadJSON may be exported.
=head1 METHODS
=head2 ReadCSV / ReadJSON
Read CSV or JSON file into a database hash.
=over 4
=item Inputs:
0) CSV file name or file reference.
1) Hash reference for database object.
2) Optional string used to represent an undefined (missing) tag value.
(Used for deleting tags.)
3) For ReadCSV this gives the delimiter for CSV entries, with a default of
",". For ReadJSON this is the character set for converting Unicode escape
sequences in strings, with a default of "UTF8". See the ExifTool Charset
option for a list of valid character sets.
=item Return Value:
These functions return an error string, or undef on success and populate the
database hash with entries from the CSV or JSON file. Entries are keyed
based on the SourceFile column of the CSV or JSON information, and are
stored as hash lookups of tag name/value for each SourceFile. The order
of the keys (CSV column order or order in a JSON object) is stored as an
ARRAY reference in a special "_ordered_keys_" element of this hash.
=back
=head1 AUTHOR
Copyright 2003-2025, Phil Harvey (philharvey66 at gmail.com)
This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=head1 SEE ALSO
L<Image::ExifTool(3pm)|Image::ExifTool>
=cut