#!/usr/bin/perl -w
use strict;
use lib './lib';
our $VERSION = sprintf "%d.%02d", q$Revision: 1.5 $ =~ /(\d+)/g;
my $conf={};
my $abs_conf = '/etc/pdf2ocr.conf';
if (-f $abs_conf ){
$conf = config('/etc/pdf2ocr.conf');
}
my $o= gopts('sna:C');
$conf->{CACHE_BY_SUM} = 1 if $o->{s};
$conf->{abs_cache} = $o->{a} if $o->{a};
my $pdfs = argv_aspaths();
scalar @$pdfs or man();
for (@$pdfs){
my $p = new PDF::OCR::Thorough::Cached($_) or next;
$PDF::OCR::Thorough::Cached::ABS_CACHE_DIR = $conf->{abs_cache} if $conf->{abs_cache};
$PDF::OCR::Thorough::Cached::CACHE_BY_SUM = $conf->{CACHE_BY_SUM} if $conf->{CACHE_BY_SUM};
debug("cache by sum? ".$PDF::OCR::Thorough::Cached::CACHE_BY_SUM);
debug("abs cache dir? ".$PDF::OCR::Thorough::Cached::ABS_CACHE_DIR);
my $abs_cache_file = $p->abs_cached;
debug("cache file: $abs_cache_file");
if ($o->{n}){
print STDERR "$abs_cache_file\n";
next;
}
if ($o->{C}){
print STDERR ( -f $abs_cache_file ? "$abs_cache_file\n" : "0\n");
next;
}
my $text = $p->get_text;
print $text;
}
__END__
=pod
=head1 NAME
pdf2ocr - get text content of pdf document images within
=head1 DESCRIPTION
Argument is a pdf file.
This script assumes that each page in the pdf is one 8.5x11 page.. ONE image
that's what the calculations are set up for.
=head1 USAGE EXAMPLES
=head1 OPTION FLAGS
-h help
-d debug
-v version
-s cache by sum on
-n don't do anything, just show where cache file would be
-C don't do anything, only show where cache file is if there
basically checking if it's cached or not.
=head1 PARAMETERS
-a abs cache dir
=head1 /etc/pdf2ocr.conf
---
abs_cache: /tmp/cache
CACHE_BY_SUM: 1
=head1 SEE ALSO
PDF::OCR - parent package
PDF::OCR::Thorough::Cached
LEOCHARRE::CLI
=head1 AUTHOR
Leo Charre leocharre at cpan dot org
=cut