NAME

PDF::Make::Extract::Result - Structured text extraction result

SYNOPSIS

my $result = $builder->extract_structured('doc.pdf', page => 0);

# Walk the hierarchy
for my $block ($result->blocks) {
    for my $line ($block->lines) {
        for my $word ($line->words) {
            printf "'%s' at (%.0f, %.0f) size=%.0f\n",
                $word->text, $word->x0, $word->y0, $word->font_size;
        }
    }
}

# Flat list of positioned words
my @items = $result->text_positions;

# Plain text
my $text = $result->to_string;

METHODS

blocks()

Returns list of PDF::Make::Extract::Block objects.

block_count()

Returns the number of blocks.

text_positions()

Returns a flat list of hashrefs with keys: text, x, y, w, h, font_size, baseline, plus mcid/tag when the source PDF is tagged.

words()

Convenience — returns a flat list of all PDF::Make::Extract::Word objects.

to_string()

Returns plain text (blocks separated by blank lines).

SEE ALSO

PDF::Make::Extract::Block, PDF::Make::Builder