NAME
PDF::Make::Extract::Result - Structured text extraction result
SYNOPSIS
my $result = $builder->extract_structured('doc.pdf', page => 0);
# Walk the hierarchy
for my $block ($result->blocks) {
for my $line ($block->lines) {
for my $word ($line->words) {
printf "'%s' at (%.0f, %.0f) size=%.0f\n",
$word->text, $word->x0, $word->y0, $word->font_size;
}
}
}
# Flat list of positioned words
my @items = $result->text_positions;
# Plain text
my $text = $result->to_string;
METHODS
blocks()
Returns list of PDF::Make::Extract::Block objects.
block_count()
Returns the number of blocks.
text_positions()
Returns a flat list of hashrefs with keys: text, x, y, w, h, font_size, baseline, plus mcid/tag when the source PDF is tagged.
words()
Convenience — returns a flat list of all PDF::Make::Extract::Word objects.
to_string()
Returns plain text (blocks separated by blank lines).