NAME
Mojo::DOM - Minimalistic HTML/XML DOM parser with CSS selectors
SYNOPSIS
my $dom = Mojo::DOM->new( '<div><p id="a">Test</p><p id="b">123</p></div>' );
say $dom ->at( '#b' )->text;
say $dom ->find( 'p' )-> map ( 'text' )-> join ( "\n" );
say $dom ->find( '[id]' )-> map ( attr => 'id' )-> join ( "\n" );
$dom ->find( 'p[id]' )-> reverse -> each ( sub { say $_ ->{id} });
for my $e ( $dom ->find( 'p[id]' )-> each ) {
say $e ->{id}, ':' , $e ->text;
}
$dom ->find( 'div p' )-> last ->append( '<p id="c">456</p>' );
$dom ->find( ':not(p)' )-> map ( 'strip' );
say "$dom" ;
|
DESCRIPTION
Mojo::DOM is a minimalistic and relaxed HTML/XML DOM parser with CSS selector support. It will even try to interpret broken HTML and XML, so you should not use it for validation.
CASE-SENSITIVITY
Mojo::DOM defaults to HTML semantics, that means all tags and attribute names are lowercased and selectors need to be lowercase as well.
my $dom = Mojo::DOM->new( '<P ID="greeting">Hi!</P>' );
say $dom ->at( 'p[id]' )->text;
|
If XML processing instructions are found, the parser will automatically switch into XML mode and everything becomes case-sensitive.
my $dom = Mojo::DOM->new( '<?xml version="1.0"?><P ID="greeting">Hi!</P>' );
say $dom ->at( 'P[ID]' )->text;
|
XML detection can also be disabled with the "xml" method.
$dom ->xml(1);
$dom ->xml(0);
|
METHODS
Mojo::DOM implements the following methods.
all_text
my $trimmed = $dom ->all_text;
my $untrimmed = $dom ->all_text(0);
|
Extract text content from all descendant nodes of this element, smart whitespace trimming is enabled by default.
$dom ->parse( "<div>foo\n<p>bar</p>baz\n</div>" )->at( 'div' )->all_text;
$dom ->parse( "<div>foo\n<p>bar</p>baz\n</div>" )->at( 'div' )->all_text(0);
|
ancestors
my $collection = $dom ->ancestors;
my $collection = $dom ->ancestors( 'div > p' );
|
Find all ancestor elements of this node matching the CSS selector and return a Mojo::Collection object containing these elements as Mojo::DOM objects. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
say $dom ->ancestors-> map ( 'tag' )-> join ( "\n" );
|
append
$dom = $dom ->append( '<p>I ♥ Mojolicious!</p>' );
|
Append HTML/XML fragment to this node.
$dom ->parse( '<div><h1>Test</h1></div>' )
->at( 'h1' )->append( '<h2>123</h2>' )->root;
$dom ->parse( '<p>Test</p>' )->at( 'p' )
->child_nodes->first->append( ' 123' )->root;
|
append_content
$dom = $dom ->append_content( '<p>I ♥ Mojolicious!</p>' );
|
Append HTML/XML fragment (for root
and tag
nodes) or raw content to this node's content.
$dom ->parse( '<div><h1>Test</h1></div>' )
->at( 'h1' )->append_content( '123' )->root;
$dom ->parse( '<!-- Test --><br>' )
->child_nodes->first->append_content( '123 ' )->root;
$dom ->parse( '<p>Test</p>' )->at( 'p' )->append_content( '<i>123</i>' )->root;
|
at
my $result = $dom ->at( 'div > p' );
|
Find first descendant element of this element matching the CSS selector and return it as a Mojo::DOM object or return undef
if none could be found. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
my $namespace = $dom ->at( '[xmlns\:svg]' )->{ 'xmlns:svg' };
|
attr
my $hash = $dom ->attr;
my $foo = $dom ->attr( 'foo' );
$dom = $dom ->attr({ foo => 'bar' });
$dom = $dom ->attr( foo => 'bar' );
|
This element's attributes.
say $dom ->find( '*' )-> map ( attr => 'id' )->compact-> join ( "\n" );
|
children
my $collection = $dom ->children;
my $collection = $dom ->children( 'div > p' );
|
Find all child elements of this element matching the CSS selector and return a Mojo::Collection object containing these elements as Mojo::DOM objects. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
say $dom ->children->shuffle->first->tag;
|
child_nodes
my $collection = $dom ->child_nodes;
|
Return a Mojo::Collection object containing all child nodes of this element as Mojo::DOM objects.
$dom ->parse( '<p>Test<b>123</b></p>' )->at( 'p' )->child_nodes->first->remove;
$dom ->parse( '<!-- Test --><b>123</b>' )->child_nodes->first;
|
content
my $str = $dom ->content;
$dom = $dom ->content( '<p>I ♥ Mojolicious!</p>' );
|
Return this node's content or replace it with HTML/XML fragment (for root
and tag
nodes) or raw content.
$dom ->parse( '<div><b>Test</b></div>' )->at( 'div' )->content;
$dom ->parse( '<div><h1>Test</h1></div>' )->at( 'h1' )->content( '123' )->root;
$dom ->parse( '<p>Test</p>' )->at( 'p' )->content( '<i>123</i>' )->root;
$dom ->parse( '<div><h1>Test</h1></div>' )->at( 'h1' )->content( '' )->root;
$dom ->parse( '<!-- Test --><br>' )->child_nodes->first->content;
$dom ->parse( '<div><!-- Test -->456</div>' )
->at( 'div' )->child_nodes->first->content( ' 123 ' )->root;
|
descendant_nodes
my $collection = $dom ->descendant_nodes;
|
Return a Mojo::Collection object containing all descendant nodes of this element as Mojo::DOM objects.
$dom ->parse( '<p><!-- Test --><b>123<!-- 456 --></b></p>' )
->descendant_nodes-> grep ( sub { $_ ->type eq 'comment' })
-> map ( 'remove' )->first;
|
find
my $collection = $dom ->find( 'div > p' );
|
Find all descendant elements of this element matching the CSS selector and return a Mojo::Collection object containing these elements as Mojo::DOM objects. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
my $id = $dom ->find( 'div' )->[23]{id};
my @headers = $dom ->find( 'h1, h2, h3' )-> map ( 'text' )-> each ;
my $hash = $dom ->find( '*' )->reduce( sub { $a ->{ $b ->tag}++; $a }, {});
my @divs = $dom ->find( 'div.foo\.bar' )-> each ;
|
following
my $collection = $dom ->following;
my $collection = $dom ->following( 'div > p' );
|
Find all sibling elements after this node matching the CSS selector and return a Mojo::Collection object containing these elements as Mojo::DOM objects. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
say $dom ->following-> map ( 'tag' )-> join ( "\n" );
|
following_nodes
my $collection = $dom ->following_nodes;
|
Return a Mojo::Collection object containing all sibling nodes after this node as Mojo::DOM objects.
$dom ->parse( '<p>A</p><!-- B -->C' )->at( 'p' )->following_nodes-> last ->content;
|
matches
my $bool = $dom ->matches( 'div > p' );
|
Check if this element matches the CSS selector. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
$dom ->parse( '<p class="a">A</p>' )->at( 'p' )->matches( '.a' );
$dom ->parse( '<p class="a">A</p>' )->at( 'p' )->matches( 'p[class]' );
$dom ->parse( '<p class="a">A</p>' )->at( 'p' )->matches( '.b' );
$dom ->parse( '<p class="a">A</p>' )->at( 'p' )->matches( 'p[id]' );
|
namespace
my $namespace = $dom ->namespace;
|
Find this element's namespace or return undef
if none could be found.
my $namespace = $dom ->at( 'svg > svg\:circle' )->namespace;
my $namespace = $dom ->at( 'svg > circle' )->namespace;
|
new
my $dom = Mojo::DOM->new;
my $dom = Mojo::DOM->new( '<foo bar="baz">I ♥ Mojolicious!</foo>' );
|
Construct a new scalar-based Mojo::DOM object and "parse" HTML/XML fragment if necessary.
next
my $sibling = $dom -> next ;
|
Return Mojo::DOM object for next sibling element or undef
if there are no more siblings.
$dom ->parse( '<div><h1>Test</h1><h2>123</h2></div>' )->at( 'h1' )-> next ;
|
next_node
my $sibling = $dom ->next_node;
|
Return Mojo::DOM object for next sibling node or undef
if there are no more siblings.
$dom ->parse( '<p><b>123</b><!-- Test -->456</p>' )
->at( 'b' )->next_node->next_node;
$dom ->parse( '<p><b>123</b><!-- Test -->456</p>' )
->at( 'b' )->next_node->content;
|
parent
my $parent = $dom ->parent;
|
Return Mojo::DOM object for parent of this node or undef
if this node has no parent.
parse
$dom = $dom ->parse( '<foo bar="baz">I ♥ Mojolicious!</foo>' );
|
Parse HTML/XML fragment with Mojo::DOM::HTML.
my $dom = Mojo::DOM->new->xml(1)->parse( $xml );
|
preceding
my $collection = $dom ->preceding;
my $collection = $dom ->preceding( 'div > p' );
|
Find all sibling elements before this node matching the CSS selector and return a Mojo::Collection object containing these elements as Mojo::DOM objects. All selectors from "SELECTORS" in Mojo::DOM::CSS are supported.
say $dom ->preceding-> map ( 'tag' )-> join ( "\n" );
|
preceding_nodes
my $collection = $dom ->preceding_nodes;
|
Return a Mojo::Collection object containing all sibling nodes before this node as Mojo::DOM objects.
$dom ->parse( 'A<!-- B --><p>C</p>' )->at( 'p' )->preceding_nodes->first->content;
|
prepend
$dom = $dom ->prepend( '<p>I ♥ Mojolicious!</p>' );
|
Prepend HTML/XML fragment to this node.
$dom ->parse( '<div><h2>123</h2></div>' )
->at( 'h2' )->prepend( '<h1>Test</h1>' )->root;
$dom ->parse( '<p>123</p>' )
->at( 'p' )->child_nodes->first->prepend( 'Test ' )->root;
|
prepend_content
$dom = $dom ->prepend_content( '<p>I ♥ Mojolicious!</p>' );
|
Prepend HTML/XML fragment (for root
and tag
nodes) or raw content to this node's content.
$dom ->parse( '<div><h2>123</h2></div>' )
->at( 'h2' )->prepend_content( 'Test' )->root;
$dom ->parse( '<!-- 123 --><br>' )
->child_nodes->first->prepend_content( ' Test' )->root;
$dom ->parse( '<p>Test</p>' )->at( 'p' )->prepend_content( '<i>123</i>' )->root;
|
previous
my $sibling = $dom ->previous;
|
Return Mojo::DOM object for previous sibling element or undef
if there are no more siblings.
$dom ->parse( '<div><h1>Test</h1><h2>123</h2></div>' )->at( 'h2' )->previous;
|
previous_node
my $sibling = $dom ->previous_node;
|
Return Mojo::DOM object for previous sibling node or undef
if there are no more siblings.
$dom ->parse( '<p>123<!-- Test --><b>456</b></p>' )
->at( 'b' )->previous_node->previous_node;
$dom ->parse( '<p>123<!-- Test --><b>456</b></p>' )
->at( 'b' )->previous_node->content;
|
remove
my $parent = $dom ->remove;
|
Remove this node and return "parent".
$dom ->parse( '<div><h1>Test</h1></div>' )->at( 'h1' )->remove;
$dom ->parse( '<p>123<b>456</b></p>' )
->at( 'p' )->child_nodes->first->remove->root;
|
replace
my $parent = $dom ->replace( '<div>I ♥ Mojolicious!</div>' );
|
Replace this node with HTML/XML fragment and return "parent".
$dom ->parse( '<div><h1>Test</h1></div>' )->at( 'h1' )->replace( '<h2>123</h2>' );
$dom ->parse( '<p>Test</p>' )
->at( 'p' )->child_nodes->[0]->replace( '<b>123</b>' )->root;
|
root
Return Mojo::DOM object for root node.
strip
my $parent = $dom ->strip;
|
Remove this element while preserving its content and return "parent".
$dom ->parse( '<div><h1>Test</h1></div>' )->at( 'h1' )->strip;
|
tag
my $tag = $dom ->tag;
$dom = $dom ->tag( 'div' );
|
This element's tag name.
say $dom ->children-> map ( 'tag' )-> join ( "\n" );
|
tap
$dom = $dom ->tap( sub {...});
|
Alias for "tap" in Mojo::Base.
text
my $trimmed = $dom ->text;
my $untrimmed = $dom ->text(0);
|
Extract text content from this element only (not including child elements), smart whitespace trimming is enabled by default.
$dom ->parse( "<div>foo\n<p>bar</p>baz\n</div>" )->at( 'div' )->text;
$dom ->parse( "<div>foo\n<p>bar</p>baz\n</div>" )->at( 'div' )->text(0);
|
to_string
my $str = $dom ->to_string;
|
Render this node and its content to HTML/XML.
$dom ->parse( '<div><b>Test</b></div>' )->at( 'div b' )->to_string;
|
tree
my $tree = $dom ->tree;
$dom = $dom ->tree([ 'root' ]);
|
Document Object Model. Note that this structure should only be used very carefully since it is very dynamic.
type
This node's type, usually cdata
, comment
, doctype
, pi
, raw
, root
, tag
or text
.
wrap
$dom = $dom ->wrap( '<div></div>' );
|
Wrap HTML/XML fragment around this node, placing it as the last child of the first innermost element.
$dom ->parse( '<b>Test</b>' )->at( 'b' )->wrap( '<p>123</p>' )->root;
$dom ->parse( '<b>Test</b>' )->at( 'b' )->wrap( '<div><p></p>123</div>' )->root;
$dom ->parse( '<b>Test</b>' )->at( 'b' )->wrap( '<p></p><p>123</p>' )->root;
$dom ->parse( '<p>Test</p>' )->at( 'p' )->child_nodes->first->wrap( '<b>' )->root;
|
wrap_content
$dom = $dom ->wrap_content( '<div></div>' );
|
Wrap HTML/XML fragment around this node's content, placing it as the last children of the first innermost element.
$dom ->parse( '<p>Test<p>' )->at( 'p' )->wrap_content( '<b>123</b>' )->root;
$dom ->parse( '<b>Test</b>' )->wrap_content( '<p></p><p>123</p>' );
|
xml
my $bool = $dom ->xml;
$dom = $dom ->xml( $bool );
|
Disable HTML semantics in parser and activate case-sensitivity, defaults to auto detection based on processing instructions.
OPERATORS
Mojo::DOM overloads the following operators.
array
Alias for "child_nodes".
$dom ->parse( '<!-- Test --><b>123</b>' )->[0];
|
bool
Always true.
hash
Alias for "attr".
$dom ->parse( '<div id="test">Test</div>' )->at( 'div' )->{id};
|
stringify
Alias for "to_string".
SEE ALSO
Mojolicious, Mojolicious::Guides, http://mojolicio.us.