The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

# $Id: phyloxml.pm 11507 2007-06-23 01:37:45Z jason $
#
# BioPerl module for Bio::TreeIO::phyloxml
#
# Please direct questions and support issues to <bioperl-l@bioperl.org>
#
# Cared for by Mira Han <mirhan@indiana.edu>
#
# Copyright Mira Han
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
Bio::TreeIO::phyloxml - TreeIO implementation for parsing PhyloXML format.
=head1 SYNOPSIS
# do not use this module directly
use Bio::TreeIO;
my $treeio = Bio::TreeIO->new(-format => 'phyloxml',
-file => 'tree.dnd');
my $tree = $treeio->next_tree;
=head1 DESCRIPTION
This module handles parsing and writing of phyloXML format.
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to the
Bioperl mailing list. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
=head2 Support
Please direct usage questions or support issues to the mailing list:
I<bioperl-l@bioperl.org>
rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted viax the
web:
=head1 AUTHOR - Mira Han
Email mirhan@indiana.edu
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
# Let the code begin...
$Bio::TreeIO::phyloxml::VERSION = '1.7.8';
use strict;
# Object preamble - inherits from Bio::Root::Root
use base qw(Bio::TreeIO);
sub _initialize
{
my($self, %args) = @_;
$args{-treetype} ||= 'Bio::Tree::Tree';
$args{-nodetype} ||= 'Bio::Tree::AnnotatableNode';
$self->SUPER::_initialize(%args);
# phyloxml TreeIO does not use SAX,
# therefore no need to attach EventHandler
# instead we will define a reader that is a pull-parser of libXML
if ($self->mode eq 'r') {
if ($self->_fh) {
$self->{'_reader'} = XML::LibXML::Reader->new(
IO => $self->_fh,
no_blanks => 1
);
}
if (!$self->{'_reader'}) {
$self->throw("XML::LibXML::Reader not initialized");
}
}
elsif ($self->mode eq 'w') {
# print default lines
$self->_print('<?xml version="1.0" encoding="UTF-8"?>',"\n");
}
$self->treetype($args{-treetype});
$self->nodetype($args{-nodetype});
$self->{'_lastitem'} = {}; # holds open items and the attribute hash
$self->_init_func();
}
sub _init_func
{
my ($self) = @_;
my %start_elements = (
'phylogeny' => \&element_phylogeny,
'clade' => \&element_clade,
'sequence_relation' => \&element_relation,
'clade_relation' => \&element_relation,
);
$self->{'_start_elements'} = \%start_elements;
my %end_elements = (
'phylogeny' => \&end_element_phylogeny,
'clade' => \&end_element_clade,
'sequence_relation' => \&end_element_relation,
'clade_relation' => \&end_element_relation,
);
$self->{'_end_elements'} = \%end_elements;
}
sub DESTROY {
my $self = shift;
if ($self->mode eq 'w') {
$self->_print('</phyloxml>');
$self->flush if $self->_flush_on_write && defined $self->_fh;
}
$self->SUPER::DESTROY;
}
=head2 next_tree
Title : next_tree
Usage : my $tree = $treeio->next_tree
Function: Gets the next tree in the stream
Returns : Bio::Tree::TreeI
Args : none
=cut
sub next_tree
{
my ($self) = @_;
my $reader = $self->{'_reader'};
my $tree;
while ($reader->read)
{
if ($reader->nodeType == XML_READER_TYPE_END_ELEMENT)
{
if ($reader->name eq 'phylogeny')
{
$tree = $self->end_element_phylogeny();
last;
}
}
$self->processXMLNode;
}
return $tree;
}
=head2 add_attribute
Title : add_phyloXML_annotation
Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -attr=>"id_source = \"A\"")
Function: add attributes to an object
Returns : the node that we added annotations to
Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode)
-attr => string in the form "A = B", where A is the attribute name and B is the attribute value
=cut
sub add_attribute
{
my ($self, @args) = @_;
my ($obj, $attr) = $self->_rearrange([qw(OBJ ATTR)], @args);
if ($attr) {
$attr = '<dummy '.$attr.'/>';
}
my $oldreader = $self->{'_reader'}; # save reader
$self->{'_reader'} = XML::LibXML::Reader->new(
string => $attr,
no_blanks => 1
);
my $reader = $self->{'_reader'};
$self->{'_currentannotation'} = []; # holds annotationcollection
$self->{'_currenttext'} = '';
#$self->{'_id_link'} = {};
# pretend we saw a <clade> element
$self->{'_lastitem'}->{'dummy'}++;
push @{$self->{'_lastitem'}->{'current'}}, { 'dummy'=>{}}; # current holds current element and empty hash for its attributes
# push object to annotate
push @{$self->{'_currentitems'}}, $obj;
# read attributes of element
while ($reader->read)
{
#$self->processXMLNode;
$self->processAttribute($self->current_attr);
}
# if there is id_source add sequence to _id_link
if (exists $self->current_attr->{'id_source'}) {
my $idsrc = $self->current_attr->{'id_source'};
$self->{'_id_link'}->{$idsrc} = $obj;
}
# check idref
my $idref = '';
if (exists $self->current_attr->{'id_ref'}) {
$idref = $self->current_attr->{'id_ref'};
}
my $srcbyidref = '';
$srcbyidref = $self->{'_id_link'}->{$idref};
# exception when id_ref is defined but id_src is not, or vice versa.
if ($idref xor $srcbyidref) {
$self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
}
# if attribute exists then add Annotation::Collection with tag '_attr'
my $newac = $obj->annotation;
if ( scalar keys %{$self->current_attr} ) {
my $newattr = Bio::Annotation::Collection->new();
foreach my $tag (keys %{$self->current_attr}) {
my $sv = Bio::Annotation::SimpleValue->new(
-value => $self->current_attr->{$tag}
);
$newattr->add_Annotation($tag, $sv);
}
$newac->add_Annotation('_attr', $newattr);
}
# pop from temporary list
pop @{$self->{'_currentitems'}};
$self->{'_lastitem'}->{ $reader->name }-- if $reader->name;
pop @{$self->{'_lastitem'}->{'current'}};
$self->{'_reader'} = $oldreader; # restore reader
return $obj;
}
=head2 add_phyloXML_annotation
Title : add_phyloXML_annotation
Usage : my $node = $treeio->add_phyloXML_annotation(-obj=>$node, -xml=>$xmlstring)
my $tree = $treeio->add_phyloXML_annotation('-obj'=>$tree, '-xml'=>'<sequence_relation id_ref_0="A" id_ref_1="B" type="orthology"/>')
Function: add annotations to a node in the phyloXML format string
Returns : the node that we added annotations to
Args : -obj => object that will have the Annotation. (Bio::Tree::AnnotatableNode)
-xml => string in phyloXML format that describes the annotation for the node
=cut
sub add_phyloXML_annotation
{
my ($self, @args) = @_;
my ($obj, $xml_string) = $self->_rearrange([qw(OBJ XML)], @args);
$xml_string = '<phyloxml>'.$xml_string.'</phyloxml>';
$self->debug( $xml_string );
my $oldreader = $self->{'_reader'}; # save reader
$self->{'_reader'} = XML::LibXML::Reader->new(
string => $xml_string,
no_blanks => 1
);
my $reader = $self->{'_reader'};
#$self->{'_currentannotation'} = []; # holds annotationcollection
#$self->{'_currenttext'} = '';
#$self->{'_id_link'} = {};
# pretend we saw a <clade> element
$self->{'_lastitem'}->{'clade'}++;
push @{$self->{'_lastitem'}->{'current'}}, { 'clade'=>{}}; # current holds current element and empty hash for its attributes
# our object to annotate (nodeI)
# push into temporary list
push @{$self->{'_currentitems'}}, $obj;
$reader->read; #read away the first element 'phyloxml'
while ($reader->read)
{
$self->processXMLNode;
}
# pop from temporary list
pop @{$self->{'_currentitems'}};
$self->{'_lastitem'}->{ $reader->name }-- if $reader->name;
pop @{$self->{'_lastitem'}->{'current'}};
$self->{'_reader'} = $oldreader; # restore reader
return $obj;
}
=head2 write_tree
Title : write_tree
Usage : $treeio->write_tree($tree);
Function: Write a tree out to data stream in phyloxml format
Returns : none
Args : Bio::Tree::TreeI object
=cut
sub write_tree
{
my ($self, @trees) = @_;
foreach my $tree (@trees) {
my $root = $tree->get_root_node;
$self->_print("<phylogeny");
my @tags = $tree->get_all_tags();
my $attr_str = '';
foreach my $tag (@tags) {
my @values = $tree->get_tag_values($tag);
foreach (@values) {
$attr_str .= " ".$tag."=\"".$_."\"";
}
}
# check if rooted
my ($b_rooted) = $tree->get_tag_values('rooted');
if ($b_rooted) {
$attr_str .= " rooted=\"true\"";
}
else {
if($tree->is_binary($tree->get_root_node)) {
$attr_str .= " rooted=\"true\"";
}
else {
$attr_str .= " rooted=\"false\"";
}
}
$self->_print($attr_str);
$self->_print(">");
if ($root->isa('Bio::Tree::AnnotatableNode')) {
$self->_print($self->_write_tree_Helper_annotatableNode($root));
}
else {
$self->_print($self->_write_tree_Helper_generic($root));
}
# print clade relations
while (my $str = pop (@{$self->{'_tree_attr'}->{'clade_relation'}})) {
$self->_print($str);
}
# print sequence relations
while (my $str = pop (@{$self->{'_tree_attr'}->{'sequence_relation'}})) {
$self->_print($str);
}
$self->_print("</phylogeny>");
}
$self->flush if $self->_flush_on_write && defined $self->_fh;
return;
}
=head2 _write_tree_Helper_annotatableNode
Title : _write_tree_Helper_annotatableNode
Usage : internal method used by write_tree, not to be used directly
Function: recursive helper function of write_tree for the annotatableNodes.
translates annotations into xml elements.
Returns : string describing the node
Args : Bio::Node::AnnotatableNode object, string
=cut
sub _write_tree_Helper_annotatableNode
{
my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
my $ac = $node->annotation;
# if clade_relation exists
my @relations = $ac->get_Annotations('clade_relation');
foreach (@relations) {
my $clade_rel = $self->_relation_to_string($node, $_, '');
# set as tree attr
push (@{$self->{'_tree_attr'}->{'clade_relation'}}, $clade_rel);
}
# start <clade>
$str .= '<clade';
my ($attr) = $ac->get_Annotations('_attr'); # check id_source
if ($attr) {
my ($id_source) = $attr->get_Annotations('id_source');
if ($id_source) {
$str .= " id_source=\"".$id_source->value."\"";
}
}
$str .= ">";
# print all descendent nodes
foreach my $child ( $node->each_Descendent() ) {
$str = $self->_write_tree_Helper_annotatableNode($child, $str);
}
# print all annotations
$str = print_annotation( $node, $str, $ac );
# print all sequences
if ($node->has_sequence) {
foreach my $seq (@{$node->sequence}) {
# if sequence_relation exists
my @relations = $seq->annotation->get_Annotations('sequence_relation');
foreach (@relations) {
my $sequence_rel = $self->_relation_to_string($seq, $_, '');
# set as tree attr
push (@{$self->{'_tree_attr'}->{'sequence_relation'}}, $sequence_rel);
}
$str = print_seq_annotation( $node, $str, $seq );
}
}
$str .= "</clade>";
return $str;
}
=head2 _write_tree_Helper_generic
Title : _write_tree_Helper_generic
Usage : internal method used by write_tree, not to be used directly
Function: recursive helper function of write_tree for generic NodesI.
all tags are translated into property elements.
Returns : string describing the node
Args : Bio::Node::NodeI object, string
=cut
sub _write_tree_Helper_generic
{
my ($self, $node, $str) = @_; # this self is a Bio::Tree::phyloxml
# start <clade>
$str .= '<clade>';
# print all descendent nodes
foreach my $child ( $node->each_Descendent() ) {
$str = $self->_write_tree_Helper_generic($child, $str);
}
# print all tags
my @tags = $node->get_all_tags();
foreach my $tag (@tags) {
my @values = $node->get_tag_values($tag);
foreach my $val (@values) {
$str .= "<property datatype=\"xsd:string\" ref=\"tag:$tag\" applies_to=\"clade\">";
$str .=$val;
$str .= "</property>";
}
}
# print NodeI features
if ($node->id) {
$str .= "<name>";
$str .= $node->id;
$str .= "</name>";
}
if ($node->branch_length) {
$str .= "<branch_length>";
$str .= $node->branch_length;
$str .= "</branch_length>";
}
if ($node->bootstrap) {
$str .= "<confidence type = \"bootstrap\">";
$str .= $node->bootstrap;
$str .= "</confidence>";
}
$str .= "</clade>";
return $str;
}
=head2 _relation_to_string
Title : _relation_to_string
Usage : internal method used by write_tree, not to be used directly
Function: internal function used by write_tree to translate Annotation::Relation objects into xml elements.
Returns : string describing the node
Args : Bio::Node::AnnotatableNode (or Bio::SeqI) object that contains the Annotation::Relation,
the Annotation::Relation object,
the string
=cut
# It may be more appropriate to make Annotation::Relation have
# a to_string callback function,
# and have this subroutine set as the callback when we are in
# phyloXML context.
# I've put it here for now, since write_tree is the only place it is used.
sub _relation_to_string {
my ($self, $obj, $rel, $str) = @_;
my @attr = $obj->annotation->get_Annotations('_attr'); # check id_source
if (@attr) {
my @id_source = $attr[0]->get_Annotations('id_source');
}
my ($id_ref_0) = $obj->annotation->get_nested_Annotations(
'-keys' => ['id_source'],
'-recursive' => 1);
my ($id_ref_1) = $rel->to->annotation->get_nested_Annotations(
'-keys' => ['id_source'],
'-recursive' => 1);
my $confidence = $rel->confidence();
my $confidence_type = $rel->confidence_type();
$str .= "<";
$str .= $rel->tagname;
$str .= " id_ref_0=\"".$id_ref_0->value."\"";
$str .= " id_ref_1=\"".$id_ref_1->value."\"";
$str .= " type=\"".$rel->type."\"";
if ($confidence) {
$str .= " ><confidence";
if ($confidence_type) {
$str .= " type=\"".$confidence_type."\"";
}
$str .= ">";
$str .= $confidence;
$str .= "</confidence>";
$str .= "</";
$str .= $rel->tagname;
$str .= ">";
}
else {
$str .= "/>";
}
return $str;
}
=head2 read_annotation
Title : read_annotation
Usage : $treeio->read_annotation(-obj=>$node, -path=>$path, -attr=>1);
Function: read text value (or attribute value) of the annotations corresponding to the element path
Returns : list of text values of the annotations matching the path
Args : -obj => object that contains the Annotation. (Bio::Tree::AnnotatableNode or Bio::SeqI)
-path => path of the nested elements
-attr => Boolean value to indicate whether to get the attribute of the element or the text value.
(default is 0, meaning text value is returned)
=cut
# It may be more appropriate to make a separate Annotation::phyloXML object
# and have this subroutine within that object so it can handle the
# reading and writing of the values and attributes.
# but since tagTree is a temporary stub and I didn't want to make
# a redundant object I've put it here for now.
sub read_annotation
{
my ($self, @args) = @_;
my ($obj, $path, $attr) = $self->_rearrange([qw(OBJ PATH ATTR)], @args);
my $ac = $obj->annotation;
if ($attr) {
my @elements = split ('/', $path);
my $final = pop @elements;
push (@elements, '_attr');
push (@elements, $final);
$path = join ('/', @elements);
return $self->_read_annotation_attr_Helper( [$ac], $path);
}
else {
return $self->_read_annotation_text_Helper( [$ac], $path);
}
}
sub _read_annotation_text_Helper
{
my ($self, $acs, $path) = @_;
my @elements = split ('/', $path);
my $key = shift @elements;
my @nextacs = ();
foreach my $ac (@$acs) {
foreach my $ann ($ac->get_Annotations($key)) {
if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
}
}
if (@elements == 0) {
my @values = ();
my @texts = map {$_->get_Annotations('_text')} @nextacs;
foreach (@texts) {
$_ && push (@values, $_->value);
}
return @values;
}
else {
$path = join ('/', @elements);
return $self->_read_annotation_text_Helper( \@nextacs, $path);
}
}
sub _read_annotation_attr_Helper
{
my ($self, $acs, $path) = @_;
my @elements = split ('/', $path);
my $key = shift @elements;
my @nextacs = ();
foreach my $ac (@$acs) {
foreach my $ann ($ac->get_Annotations($key)) {
if ($ann->isa('Bio::AnnotationCollectionI')) {push (@nextacs, $ann)}
}
}
if (@elements == 1) {
my $attrname = $elements[0];
my @sv = map {$_->get_Annotations($attrname)} @nextacs;
return map {$_->value} @sv;
}
else {
$path = join ('/', @elements);
return $self->_read_annotation_attr_Helper( \@nextacs, $path);
}
}
=head1 Methods for parsing the XML document
=cut
=head2 processXMLNode
Title : processXMLNode
Usage : $treeio->processXMLNode
Function: read the XML node and process according to the node type
Returns : none
Args : none
=cut
sub processXMLNode
{
my ($self) = @_;
my $reader = $self->{'_reader'};
my $nodetype = $reader->nodeType;
if ( $nodetype == XML_READER_TYPE_ELEMENT)
{
$self->{'_lastitem'}->{$reader->name}++;
push @{$self->{'_lastitem'}->{'current'}}, { $reader->name=>{}}; # current holds current element and empty hash for its attributes
if (exists $self->{'_start_elements'}->{$reader->name}) {
my $method = $self->{'_start_elements'}->{$reader->name};
$self->$method();
}
else {
$self->element_default();
}
if ($reader->isEmptyElement) {
# element is complete
# set nodetype so it can jump and
# do procedures for XML_READER_TYPE_END_ELEMENT
$nodetype = XML_READER_TYPE_END_ELEMENT;
}
}
if ($nodetype == XML_READER_TYPE_TEXT)
{
$self->{'_currenttext'} = $reader->value;
}
if ($nodetype == XML_READER_TYPE_END_ELEMENT)
{
if (exists $self->{'_end_elements'}->{$reader->name}) {
my $method = $self->{'_end_elements'}->{$reader->name};
$self->$method();
}
else {
$self->end_element_default();
}
$self->{'_lastitem'}->{ $reader->name }--;
pop @{$self->{'_lastitem'}->{'current'}};
$self->{'_currenttext'} = '';
}
}
=head2 processAttribute
Title : processAttribute
Usage : $treeio->processAttribute(\%hash_for_attribute);
Function: reads the attributes of the current element into a hash
Returns : none
Args : hash reference where the attributes will be stored.
=cut
sub processAttribute
{
my ($self, $data) = @_;
my $reader = $self->{'_reader'};
# several ways of reading attributes:
# read all attributes:
if ($reader-> moveToFirstAttribute) {
do {
$data->{$reader->name()} = $reader->value;
} while ($reader-> moveToNextAttribute);
$reader-> moveToElement;
}
}
=head2 element_phylogeny
Title : element_phylogeny
Usage : $treeio->element_phylogeny
Function: initialize the parsing of a tree
Returns : none
Args : none
=cut
sub element_phylogeny
{
my ($self) = @_;
$self->{'_currentitems'} = []; # holds nodes while parsing current level
$self->{'_currentnodes'} = []; # holds nodes while constructing tree
$self->{'_currentannotation'} = []; # holds annotationcollection
$self->{'_currenttext'} = '';
$self->{'_levelcnt'} = [];
$self->{'_id_link'} = {};
$self->{'_tree_attr'} = $self->current_attr;
$self->processAttribute($self->current_attr);
return;
}
=head2 end_element_phylogeny
Title : end_element_phylogeny
Usage : $treeio->end_element_phylogeny
Function: ends the parsing of a tree building a Tree::TreeI object.
Returns : Tree::TreeI
Args : none
=cut
sub end_element_phylogeny
{
my ($self) = @_;
my $root;
# if there is more than one node in _currentnodes
# aggregate the nodes into trees basically ad-hoc.
if ( @{$self->{'_currentnodes'}} > 1)
{
$root = $self->nodetype->new(
-id => '',
tostring => \&node_to_string,
);
while ( @{$self->{'_currentnodes'}} ) {
my ($node) = ( shift @{$self->{'_currentnodes'}});
$root->add_Descendent($node);
}
}
# if there is only one node in _currentnodes
# that node is root.
elsif ( @{$self->{'_currentnodes'}} == 1)
{
$root = shift @{$self->{'_currentnodes'}};
}
my $tree = $self->treetype->new(
-root => $root,
-id => $self->current_attr->{'name'},
%{$self->current_attr}
);
foreach my $tag ( keys %{$self->current_attr} ) {
$tree->add_tag_value( $tag, $self->current_attr->{$tag} );
}
return $tree;
}
=head2 element_clade
Title : element_clade
Usage : $treeio->element_clade
Function: initialize the parsing of a node
creates a new AnnotatableNode with annotations
Returns : none
Args : none
=cut
sub element_clade
{
my ($self) = @_;
my $reader = $self->{'_reader'};
my %clade_attr = (); # doesn't use current attribute in order to save memory
$self->processAttribute(\%clade_attr);
# create a node (Annotatable Node)
my $tnode = $self->nodetype->new(
-id => '',
tostring => \&node_to_string,
%clade_attr,
);
# add all attributes as annotation collection with tag '_attr'
my $ac = $tnode->annotation;
my $newattr = Bio::Annotation::Collection->new();
foreach my $tag (keys %clade_attr) {
my $sv = Bio::Annotation::SimpleValue->new(
-value => $clade_attr{$tag}
);
$newattr->add_Annotation($tag, $sv);
}
$ac->add_Annotation('_attr', $newattr);
# if there is id_source add clade to _id_link
if (exists $clade_attr{'id_source'}) {
$self->{'_id_link'}->{$clade_attr{'id_source'}} = $tnode;
}
# push into temporary list
push @{$self->{'_currentitems'}}, $tnode;
}
=head2 end_element_clade
Title : end_element_clade
Usage : $treeio->end_element_clade
Function: ends the parsing of a node
Returns : none
Args : none
=cut
sub end_element_clade
{
my ($self) = @_;
my $reader = $self->{'_reader'};
my $curcount = scalar @{$self->{'_currentnodes'}};
my $level = $reader->depth() - 2;
my $childcnt = $self->{'_levelcnt'}->[$level+1] || 0;
# pop from temporary list
my $tnode = pop @{$self->{'_currentitems'}};
if ( $childcnt > 0) {
if( $childcnt > $curcount)
{
$self->throw("something wrong with event construction treelevel ".
"$level is recorded as having $childcnt nodes ".
"but current nodes at this level is $curcount\n");
}
my @childnodes = splice( @{$self->{'_currentnodes'}}, - $childcnt);
for ( @childnodes ) {
$tnode->add_Descendent($_);
}
$self->{'_levelcnt'}->[$level+1] = 0;
}
push @{$self->{'_currentnodes'}}, $tnode;
$self->{'_levelcnt'}->[$level]++;
}
=head2 element_relation
Title : element_relation
Usage : $treeio->element_relation
Function: starts the parsing of clade relation & sequence relation
Returns : none
Args : none
=cut
sub element_relation
{
my ($self) = @_;
$self->processAttribute($self->current_attr);
my $relationtype = $self->current_attr->{'type'};
my $id_ref_0 = $self->current_attr->{'id_ref_0'};
my $id_ref_1 = $self->current_attr->{'id_ref_1'};
my @srcbyidref = ();
$srcbyidref[0] = $self->{'_id_link'}->{$id_ref_0};
$srcbyidref[1] = $self->{'_id_link'}->{$id_ref_1};
# exception when id_ref is defined but id_src is not, or vice versa.
if ( ($id_ref_0 xor $srcbyidref[0])||($id_ref_1 xor $srcbyidref[1]) ) {
$self->throw("id_ref and id_src incompatible: $id_ref_0, $id_ref_1, ", $srcbyidref[0], $srcbyidref[1]);
}
# set id_ref_0
my $ac0 = $srcbyidref[0]->annotation;
my $newann = Bio::Annotation::Relation->new(
'-type' => $relationtype,
'-to' => $srcbyidref[1],
'-tagname' => $self->current_element
);
$ac0->add_Annotation($self->current_element, $newann);
# set id_ref_1
my $ac1 = $srcbyidref[1]->annotation;
$newann = Bio::Annotation::Relation->new(
'-type' => $relationtype,
'-to' => $srcbyidref[0],
'-tagname' => $self->current_element
);
$ac1->add_Annotation($self->current_element, $newann);
push (@{$self->{'_currentannotation'}}, $newann);
}
=head2 end_element_relation
Title : end_element_relation
Usage : $treeio->end_element_relation
Function: ends the parsing of clade relation & sequence relation
Returns : none
Args : none
=cut
sub end_element_relation
{
my ($self) = @_;
my $ac = pop (@{$self->{'_currentannotation'}});
}
=head2 element_default
Title : element_default
Usage : $treeio->element_default
Function: starts the parsing of all other elements
Returns : none
Args : none
=cut
sub element_default
{
my ($self) = @_;
my $reader = $self->{'_reader'};
my $current = $self->current_element();
my $prev = $self->prev_element();
# read attributes of element
$self->processAttribute($self->current_attr);
# check idref
my $idref = '';
if (exists $self->current_attr->{'id_ref'}) {
$idref = $self->current_attr->{'id_ref'};
}
my $srcbyidref = '';
$srcbyidref = $self->{'_id_link'}->{$idref};
# exception when id_ref is defined but id_src is not, or vice versa.
if ($idref xor $srcbyidref) {
$self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
}
# we are annotating a Node
# set _currentannotation
if ( ($srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade') ) {
# find node to annotate
my $tnode;
if ($srcbyidref) {
$tnode = $srcbyidref;
}
else {
$tnode = $self->{'_currentitems'}->[-1];
}
my $ac = $tnode->annotation();
# add the new anncollection with the current element as key
my $newann = Bio::Annotation::Collection->new();
$ac->add_Annotation($current, $newann);
# push to current annotation
push (@{$self->{'_currentannotation'}}, $newann);
}
# we are within sequence_relation or clade_relation
elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') {
# do nothing?
}
# we are already within an annotation
else {
my $ac = $self->{'_currentannotation'}->[-1];
if ($ac) {
# add the new anncollection with the current element as key
my $newann = Bio::Annotation::Collection->new();
$ac->add_Annotation($current, $newann);
push (@{$self->{'_currentannotation'}}, $newann);
}
}
}
=head2 end_element_default
Title : end_element_default
Usage : $treeio->end_element_default
Function: ends the parsing of all other elements
Returns : none
Args : none
=cut
sub end_element_default
{
my ($self) = @_;
my $reader = $self->{'_reader'};
my $current = $self->current_element();
my $prev = $self->prev_element();
# check idsrc
my $idsrc = $self->current_attr->{'id_source'};
# check idref
my $idref = '';
if (exists $self->current_attr->{'id_ref'}) {
$idref = $self->current_attr->{'id_ref'};
delete $self->current_attr->{'id_ref'};
}
my $srcbyidref = '';
$srcbyidref = $self->{'_id_link'}->{$idref};
# exception when id_ref is defined but id_src is not, or vice versa.
if ($idref xor $srcbyidref) {
$self->throw("id_ref and id_src incompatible: $idref, $srcbyidref");
}
# we are annotating a Tree
if ((!$srcbyidref) && $prev eq 'phylogeny') {
# annotate Tree via tree attribute
$self->prev_attr->{$current} = $self->{'_currenttext'};
}
# we are within sequence_relation or clade_relation
elsif ($prev eq 'clade_relation' || $prev eq 'sequence_relation') {
my $ann_relation = $self->{'_currentannotation'}->[-1];
# we are here only with <confidence>
if ($current eq 'confidence') {
if (exists $self->current_attr->{'type'}) {
$ann_relation->confidence_type($self->current_attr->{'type'});
}
$ann_relation->confidence($self->{'_currenttext'});
}
else {
$self->throw($current, " is not allowed within <*_relation>");
}
}
# we are annotating a Node
elsif (( $srcbyidref && $srcbyidref->isa($self->nodetype)) || ((!$srcbyidref) && $prev eq 'clade'))
{
# pop from current annotation
my $ac = pop (@{$self->{'_currentannotation'}});
$self->annotateNode( $current, $ac);
# additional setups for compatibility with NodeI
my $tnode;
if ($srcbyidref) {
$tnode = $srcbyidref;
}
else {
$tnode = $self->{'_currentitems'}->[-1];
}
if ($current eq 'name') {
$tnode->id($self->{'_currenttext'});
}
elsif ($current eq 'branch_length') {
$tnode->branch_length($self->{'_currenttext'});
}
elsif ($current eq 'confidence') {
if ((exists $self->current_attr->{'type'}) && ($self->current_attr->{'type'} eq 'bootstrap')) {
$tnode->bootstrap($self->{'_currenttext'}); # this needs to change (adds 'B' annotation)
}
}
elsif ($current eq 'sequence') {
# if annotation is <sequence>
# transform the Bio::Annotation object into a Bio::Seq object
my $str = '';
# retrieve the sequence
if (my ($molseq) = $ac->get_Annotations('mol_seq')) {
my ($strac) = $molseq->get_Annotations('_text');
$str = $strac->value();
}
# create Seq object with sequence
my $newseq = Bio::Seq->new( -seq => $str,
-annotation=>$ac,
-nowarnonempty=>1);
$tnode->sequence($newseq);
$ac->remove_Annotations('mol_seq');
$tnode->annotation->remove_Annotations($current);
# if there is id_source add sequence to _id_link
if ($idsrc) {
$self->{'_id_link'}->{$idsrc} = $newseq;
}
}
elsif ($idsrc && $current eq 'taxonomy') {
# if there is id_source add sequence to _id_link
$self->{'_id_link'}->{$idsrc} = $ac;
}
}
# we are within a default Annotation
else {
my $ac = pop (@{$self->{'_currentannotation'}});
if ($ac) {
$self->annotateNode( $current, $ac);
}
}
}
=head2 annotateNode
Title : annotateNode
Usage : $treeio->annotateNode($element, $ac)
Function: adds text value and attributes to the AnnotationCollection
that has element name as key. If there are nested elements,
optional AnnotationCollections are added recursively,
with the nested element name as key.
The structure of each AnnotationCollection is
'element' => AnnotationCollection {
'_text' => SimpleValue (text value)
'_attr' => AnnotationCollection {
attribute1 => SimpleValue (attribute value 1)
attribute2 => SimpleValue (attribute value 2)
...
}
['nested element' => AnnotationCollection ]
}
Returns : none
Args : none
=cut
sub annotateNode
{
my ($self, $element, $newac) = @_;
# if attribute exists then add Annotation::Collection with tag '_attr'
if ( scalar keys %{$self->current_attr} ) {
my $newattr = Bio::Annotation::Collection->new();
foreach my $tag (keys %{$self->current_attr}) {
my $sv = Bio::Annotation::SimpleValue->new(
-value => $self->current_attr->{$tag}
);
$newattr->add_Annotation($tag, $sv);
}
$newac->add_Annotation('_attr', $newattr);
}
# if text exists add text as SimpleValue with tag '_text'
if ( $self->{'_currenttext'} ) {
my $newvalue = Bio::Annotation::SimpleValue->new( -value => $self->{'_currenttext'} );
$newac->add_Annotation('_text', $newvalue);
}
}
=head1 Methods for exploring the document
=cut
=head2 current_attr
Title : current_attr
Usage : $attr_hash = $treeio->current_attr;
Function: returns the attribute hash for current item
Returns : reference of the attribute hash
Args : none
=cut
sub current_attr {
my ($self) = @_;
return 0 if ! defined $self->{'_lastitem'} ||
! defined $self->{'_lastitem'}->{'current'}->[-1];
my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
(@keys == 1) || die "there should be only one key for each hash";
return $self->{'_lastitem'}->{'current'}->[-1]->{$keys[0]};
}
=head2 prev_attr
Title : prev_attr
Usage : $hash_ref = $treeio->prev_attr
Function: returns the attribute hash for previous item
Returns : reference of the attribute hash
Args : none
=cut
sub prev_attr {
my ($self) = @_;
return 0 if ! defined $self->{'_lastitem'} ||
! defined $self->{'_lastitem'}->{'current'}->[-2];
my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
(@keys == 1) || die "there should be only one key for each hash";
return $self->{'_lastitem'}->{'current'}->[-2]->{$keys[0]};
}
=head2 current_element
Title : current_element
Usage : $element = $treeio->current_element
Function: returns the name of the current element
Returns : string (element name)
Args : none
=cut
sub current_element {
my ($self) = @_;
return 0 if ! defined $self->{'_lastitem'} ||
! defined $self->{'_lastitem'}->{'current'}->[-1];
my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-1]};
(@keys == 1) || die "there should be only one key for each hash";
return $keys[0];
}
=head2 prev_element
Title : prev_element
Usage : $element = $treeio->current_element
Function: returns the name of the previous element
Returns : string (element name)
Args : none
=cut
sub prev_element {
my ($self) = @_;
return 0 if ! defined $self->{'_lastitem'} ||
! defined $self->{'_lastitem'}->{'current'}->[-2];
my @keys = keys %{$self->{'_lastitem'}->{'current'}->[-2]};
(@keys == 1) || die "there should be only one key for each hash";
return $keys[0];
}
=head2 treetype
Title : treetype
Usage : $obj->treetype($newval)
Function: returns the tree type (default is Bio::Tree::Tree)
Returns : value of treetype
Args : newvalue (optional)
=cut
sub treetype{
my ($self,$value) = @_;
if( defined $value) {
$self->{'treetype'} = $value;
}
return $self->{'treetype'};
}
=head2 nodetype
Title : nodetype
Usage : $obj->nodetype($newval)
Function: returns the node type (default is Bio::Node::AnnotatableNode)
Returns : value of nodetype
Args : newvalue (optional)
=cut
sub nodetype{
my ($self,$value) = @_;
if( defined $value) {
$self->{'nodetype'} = $value;
}
return $self->{'nodetype'};
}
=head1 Methods for implementing to_string callback for AnnotatableNode
=cut
=head2 node_to_string
Title : node_to_string
Usage : $annotatablenode->to_string_callback(\&node_to_string)
Function: set as callback in AnnotatableNode, prints the node information in string
Returns : string of node information
Args : none
=cut
# this function is similar to _write_tree_Helper_annotatableNode,
# but it is not recursive
sub node_to_string
{
my ($self) = @_; # this self is a Bio::Tree::AnnotatableNode
# not a Bio::TreeIO::phyloxml
my $str = '';
my $ac = $self->annotation;
# start <clade>
$str .= '<clade';
my @attr = $ac->get_Annotations('_attr'); # check id_source
if (@attr) {
my @id_source = $attr[0]->get_Annotations('id_source');
if (@id_source) {
$str .= " id_source=\"".$id_source[0]->value."\"";
}
}
$str .= '>';
# print all annotations
$str = print_annotation( $self, $str, $ac );
# print all sequences
if ($self->has_sequence) {
foreach my $seq (@{$self->sequence}) {
$str = print_seq_annotation( $self, $str, $seq );
}
}
$str .= '</clade>';
return $str;
}
=head2 print_annotation
Title : print_annotation
Usage : $str = $annotatablenode->print_annotation($str, $annotationcollection)
Function: prints the annotationCollection in a phyloXML format.
Returns : string of annotation information
Args : string to which the Annotation should be concatenated to,
annotationCollection that holds the Annotations
=cut
# Again, it may be more appropriate to make a separate Annotation::phyloXML object
# and have this subroutine within that object so it can handle the
# reading and writing of the values and attributes.
# especially since this function is used both by
# Bio::TreeIO::phyloxml (through write_tree) and
# Bio::Node::AnnotatableNode (through node_to_string).
# but since tagTree is a temporary stub and I didn't want to make
# a redundant object I've put it here for now.
sub print_annotation
{
my ($self, $str, $ac) = @_;
my @all_anns = $ac->get_Annotations();
foreach my $ann (@all_anns) {
my $key = $ann->tagname;
if ($key eq '_attr') { next; } # attributes are already printed in the previous level
if ($ann->isa('Bio::Annotation::SimpleValue'))
{
if ($key eq '_text') {
$str .= $ann->value;
}
else {
$str .= "<$key>";
$str .= $ann->value;
$str .= "</$key>";
}
}
elsif ($ann->isa('Bio::Annotation::Collection'))
{
my @attrs = $ann->get_Annotations('_attr');
if (@attrs) { # if there is a attribute collection
$str .= "<$key";
$str = print_attr($self, $str, $attrs[0]);
$str .= ">";
}
else {
$str .= "<$key>";
}
$str = print_annotation($self, $str, $ann);
$str .= "</$key>";
}
}
return $str;
}
=head2 print_attr
Title : print_attr
Usage : $str = $annotatablenode->print_attr($str, $annotationcollection)
Function: prints the annotationCollection in a phyloXML format.
Returns : string of attributes
Args : string to which the Annotation should be concatenated to,
AnnotationCollection that holds the attributes
=cut
# Again, it may be more appropriate to make a separate Annotation::phyloXML object
# and have this subroutine within that object so it can handle the
# reading and writing of the values and attributes.
# especially since this function is used both by
# Bio::TreeIO::phyloxml and Bio::Node::AnnotatableNode
# (through print_annotation).
# but since tagTree is a temporary stub and I didn't want to make
# a redundant object I've put it here for now.
sub print_attr
{
my ($self, $str, $ac) = @_;
my @all_attrs = $ac->get_Annotations();
foreach my $attr (@all_attrs) {
if (!$attr->isa('Bio::Annotation::SimpleValue')) {
$self->throw("attribute should be a SimpleValue");
}
$str .= ' ';
$str .= $attr->tagname;
$str .= '=';
$str .= '"'.$attr->value.'"';
}
return $str;
}
=head2 print_sequence_annotation
Title : print_sequence_annotation
Usage : $str = $node->print_seq_annotation( $str, $seq );
Function: prints the Bio::Seq object associated with the node
in a phyloXML format.
Returns : string that describes the sequence
Args : string to which the Annotation should be concatenated to,
Seq object to print in phyloXML
=cut
# Again, it may be more appropriate to make a separate Annotation::phyloXML object
# and have this subroutine within that object so it can handle the
# reading and writing of the values and attributes.
# especially since this function is used both by
# Bio::TreeIO::phyloxml (through write_tree) and
# Bio::Node::AnnotatableNode (through node_to_string).
# but since tagTree is a temporary stub and I didn't want to make
# a redundant object I've put it here for now.
sub print_seq_annotation
{
my ($self, $str, $seq) = @_;
$str .= "<sequence";
my ($attr) = $seq->annotation->get_Annotations('_attr'); # check id_source
if ($attr) {
my ($id_source) = $attr->get_Annotations('id_source');
if ($id_source) {
$str .= " id_source=\"".$id_source->value."\"";
}
}
$str .= ">";
my @all_anns = $seq->annotation->get_Annotations();
foreach my $ann (@all_anns) {
my $key = $ann->tagname;
if ($key eq '_attr') { next; } # attributes are already printed in the previous level
if ($ann->isa('Bio::Annotation::SimpleValue'))
{
if ($key eq '_text') {
$str .= $ann->value;
}
else {
$str .= "<$key>";
$str .= $ann->value;
$str .= "</$key>";
}
}
elsif ($ann->isa('Bio::Annotation::Collection'))
{
my @attrs = $ann->get_Annotations('_attr');
if (@attrs) { # if there is a attribute collection
$str .= "<$key";
$str = print_attr($self, $str, $attrs[0]);
$str .= ">";
}
else {
$str .= "<$key>";
}
$str = print_annotation($self, $str, $ann);
$str .= "</$key>";
}
}
# print mol_seq
if ($seq->seq()) {
$str .= "<mol_seq>";
$str .= $seq->seq();
$str .= "</mol_seq>";
}
$str .= "</sequence>";
return $str;
}
1;