$PDF::Parse::VERSION
=
"1.09"
;
require
5.004;
@ISA
=
qw(Exporter PDF::Core)
;
@EXPORT_OK
=
qw( GetInfo TargetFile Pages PageSize PageRotation)
;
sub
ReadCrossReference_pass1 {
my
$fd
=
shift
;
my
$offset
=
shift
;
my
$self
=
shift
;
my
$initial_number
;
my
$obj_counter
=0;
my
$global_obj_counter
=0;
my
$buf
;
binmode
$fd
;
$_
=PDF::Core::PDFGetline (
$fd
,\
$offset
);
die
"Can't read cross-reference section, according to trailer\n"
if
! /xref\r?\n?/ ;
while
() {
$_
=PDF::Core::PDFGetline (
$fd
,\
$offset
);
s/^\n//;
s/^\r//;
last
if
/^trailer\r?\n?/ ;
/^\d+\s+\d+\s+n\r?\n?/ &&
do
{
my
$buf
=
$_
;
my
$ind
=
$initial_number
+ (
$obj_counter
++);
$self
->{Objects}[
$ind
] >= 0 &&
do
{
$self
->{Objects}[
$ind
] =
int
substr
(
$buf
,0,10);
$self
->{Gen_Num}[
$ind
] =
int
substr
(
$buf
,11,5);
};
$_
=
$buf
;
s/^.{18}//;
next
;
};
/^\d+\s+\d+\s+f\r?\n?/ &&
do
{
my
$buf
=
$_
;
my
$objects_generation_nr
=
substr
(
$buf
,11,5);
my
$Num
=
substr
(
$buf
,0,10);
my
$ind
=
$initial_number
+ (
$obj_counter
++);
$self
->{Objects}[
$ind
] = -
$Num
;
$self
->{Gen_Num}[
$ind
] =
$objects_generation_nr
;
$_
=
$buf
;
s/^.{18}//;
next
;
};
/^\d+\s+\d+\r?\n?/ &&
do
{
my
$buf
=
$_
;
$initial_number
=
$buf
;
$initial_number
=~ s/^(\d+)\s+\d+\r?\n?.*/$1/;
$global_obj_counter
+=
$obj_counter
;
$obj_counter
=0;
next
;
};
}
$global_obj_counter
+=
$obj_counter
;
while
() {
$_
=PDF::Core::PDFGetline (
$fd
,\
$offset
);
s/^\n//;
s/^\r//;
last
if
/startxref\r?\n?/ ;
/Size\s*\d+\r?\n?/ &&
do
{ s/\/Size\s*(\d+)\r?\n?/$1/;
if
( !
$self
->{Cross_Reference_Size}) {
$self
->{Cross_Reference_Size} =
$_
;
}
next
;} ;
/Root/ &&
do
{ s/\/Root\s+(\d+\s+\d+)\s+R\r?\n?/$1/;
if
( !
$self
->{Root_Object}) {
$self
->{Root_Object}=
$_
;
}
next
;
};
/Info/ &&
next
;
/ID/ &&
next
;
/Encrypt/ &&
do
{ s/\/Encrypt\s+(\d+\s+\d+)\s+R\r?\n?/$1/;
if
( !
$self
->{Crypt_Object}) {
$self
->{Crypt_Object}=
$_
;
}
next
;
};
/Prev/ &&
do
{
s/\/Prev\s*(\d+)\r?\n?/$1/;
$self
->{Updated}=1;
my
$old_seek
=
tell
$fd
;
$global_obj_counter
+= ReadCrossReference_pass1(
$fd
,
$_
,
$self
);
seek
$fd
,
$old_seek
, 0;
next
;
};
}
return
$global_obj_counter
;
}
sub
ReadCrossReference_pass2 {
my
$fd
=
shift
;
my
$offset
=
shift
;
my
$self
=
shift
;
seek
$fd
,
$offset
, 0;
$_
=PDF::Core::PDFGetline (
$fd
,\
$offset
);
die
"Can't read cross-reference section, according to trailer\n"
if
! /xref\r?\n?/ ;
while
() {
$_
=PDF::Core::PDFGetline (
$fd
,\
$offset
);
s/^\n//;
s/^\r//;
last
if
/startxref\r?\n?/ ;
/Size/ &&
next
;
/Root/ &&
next
;
/Info/ &&
do
{
s/\/Info\s+(\d+\s+\d+\s+R)\r?\n?/$1/;
my
$old_seek
=
tell
$fd
;
ReadInfo(
$fd
,
$self
,
$_
);
seek
$fd
,
$old_seek
, 0;
next
;
};
/ID/ &&
do
{
$PDF::Verbose
&&
warn
"ID! Not yet implemented :-(\n"
;
next
;
};
/Encrypt/ &&
do
{
$PDF::Verbose
&&
warn
"Encrypt! Not yet implemented :-(\n"
;
next
;
};
/Prev/ &&
do
{
s/\/Prev\s*(\d+)\r?\n?/$1/;
my
$old_seek
=
tell
$fd
;
ReadCrossReference_pass2(
$fd
,
$_
,
$self
);
seek
$fd
,
$old_seek
, 0;
next
;
};
}
}
sub
ReadInfo {
my
$fd
=
shift
;
my
$self
=
shift
;
my
$info_obj
=
shift
;
my
(
$ro
,
$gen
) =
split
(
" "
,
$info_obj
);
my
$ro_gen
=
$self
->{Gen_Num}[
$ro
];
my
$offset
=
$self
->{Objects}[
$ro
] ,0 ;
seek
$fd
,
$offset
,0 ;
my
$readinfo_buffer
;
while
() {
$_
=PDF::Core::PDFGetline (
$fd
,\
$offset
);
last
if
/>>\r?\n?/ ;
my
(
$a
,
$n
)=
''
;
while
(/(\\\d+)/) {
$a
.=$`;
$_
=$';
$n
=$1;
$n
=~s/\\//g;
$a
.=
chr
(
oct
(
$n
));
}
$a
.=
$_
;
$_
=
$a
;
/\\\r?\n?$/ &&
do
{ s/\\\r?\n?//;
$readinfo_buffer
=
$readinfo_buffer
.
$_
;
next
;
};
if
(
$readinfo_buffer
) {
$readinfo_buffer
=
$readinfo_buffer
.
$_
;
$readinfo_buffer
=~ s/\r?\n?$//;
$_
=
$readinfo_buffer
;
$readinfo_buffer
=
""
;
}
/\/Author/ &&
do
{
if
( s/\/Author\s*\(([^\)]*)\)\r?\n?/$1/ ) {
$self
->{Author} =
$_
if
(!(
$self
->{Author}));
}
else
{
s/\r?\n?$//;
$readinfo_buffer
=
$_
;
}
next
;
};
/\/CreationDate/ &&
do
{ s/\/CreationDate\s\(([^\)]*)\)\r?\n?/$1/;
$self
->{CreationDate} =
$_
if
(!(
$self
->{CreationDate}));
next
;
};
/\/ModDate/ &&
do
{ s/\/ModDate\s\(([^\)]*)\)\r?\n?/$1/;
$self
->{ModDate} =
$_
if
(!(
$self
->{ModDate}));
next
;
};
/\/Creator/ &&
do
{
if
( s/\/Creator\s\(([^\)]*)\)\r?\n?/$1/ ) {
$self
->{Creator} =
$_
if
(!(
$self
->{Creator}));
}
else
{
s/\r?\n?$//;
$readinfo_buffer
=
$_
;
}
next
;
};
/\/Producer/ &&
do
{
if
( s/\/Producer\s\(([^\)]*)\)\r?\n?/$1/) {
$self
->{Producer} =
$_
if
(!(
$self
->{Producer}));
}
else
{
s/\r?\n?$//;
$readinfo_buffer
=
$_
;
}
next
;
};
/\/Title/ &&
do
{
if
( s/\/Title\s\(([^\)]*)\)\r?\n?/$1/) {
$self
->{Title} =
$_
if
(!(
$self
->{Title}));
}
else
{
s/\r?\n?$//;
$readinfo_buffer
=
$_
;
}
next
;
};
/\/Subject/ &&
do
{
if
( s/\/Subject\s\(([^\)]*)\)\r?\n?/$1/) {
$self
->{Subject} =
$_
if
(!(
$self
->{Subject}));
}
else
{
s/\r?\n?$//;
$readinfo_buffer
=
$_
;
}
next
;
};
/\/Keywords/ &&
do
{
if
( s/\/Keywords\s\(([^\)]*)\)\r?\n?/$1/) {
$self
->{Keywords} =
$_
if
(!(
$self
->{Keywords}));
}
else
{
s/\r?\n?$//;
$readinfo_buffer
=
$_
;
}
next
;
};
}
}
sub
TargetFile {
my
$self
=
shift
;
my
$file
=
shift
;
croak
"Already linked to the file "
,
$self
->{File_Name},
"\n"
if
$self
->{File_Name} ;
my
$offset
;
if
(
$file
) {
open
(FILE,
"< $file"
) or croak
"can't open $file: $!"
;
binmode
FILE;
$self
->{File_Name} =
$file
;
$self
->{File_Handler} = \
*FILE
;
my
$buf
;
read
(FILE,
$buf
,4);
if
(
$buf
ne
"%PDF"
) {
print
"File $_[0] is not PDF compliant !\n"
if
$PDF::Verbose
;
return
0 ;
}
read
(FILE,
$buf
,4);
$buf
=~ s/-//;
$self
->{Header}=
$buf
;
seek
FILE,-50,2;
read
( FILE,
$offset
, 50 );
$offset
=~ s/[^s]
*startxref
\r?\n?(\d*)\r?\n?%
%EOF
\r?\n?/$1/;
ReadCrossReference_pass1(\
*FILE
,
$offset
,
$self
);
ReadCrossReference_pass2(\
*FILE
,
$offset
,
$self
);
$self
->{File_Handler} = \
*FILE
;
return
1;
}
else
{
croak
"I need a file name (!)"
;
}
}
sub
GetInfo {
my
$self
=
shift
;
$_
=
shift
;
croak
"PDF File not specified !\n"
if
!
$self
->{File_Name} ;
/Author/ &&
return
$self
->{Author};
/CreationDate/ &&
return
$self
->{CreationDate};
/ModDate/ &&
return
$self
->{ModDate};
/Creator/ &&
return
$self
->{Creator};
/Producer/ &&
return
$self
->{Producer};
/Title/ &&
return
$self
->{Title};
/Subject/ &&
return
$self
->{Subject};
/Keywords/ &&
return
$self
->{Keywords};
}
sub
Pages {
my
$self
=
shift
;
croak
"PDF File not specified !\n"
if
!
$self
->{File_Name} ;
$self
->{PageTree}->ReadPageTree(
$self
)
if
!
$self
->{PageTree}->{Count};
return
$self
->{PageTree}->{Count};
}
sub
PageSize {
my
$self
=
shift
;
croak
"PDF File not specified !\n"
if
!
$self
->{File_Name} ;
$self
->{PageTree}->ReadPageTree(
$self
)
if
!
$self
->{PageTree}->{Count};
return
@{
$self
->{PageTree}->{MediaBox}};
}
sub
PageRotation {
my
$self
=
shift
;
my
$r
=
$self
->{PageTree}->{Rotation};
$r
=0
if
( !
$r
) ;
croak
"PDF File not specified !\n"
if
!
$self
->{File_Name} ;
$self
->{PageTree}->ReadPageTree(
$self
)
if
!
$self
->{PageTree}->{Count};
$PDF::Verbose
&&
do
{
print
"Rotation "
,
$r
,
": Portrait"
if
$r
== 0 ||
$r
== 180 ;
print
"Rotation "
,
$r
,
": Landscape"
if
$r
== 90 ||
$r
== 270 ;
};
return
$r
;
}
1;