my
$ns_cdao
= _NS_CDAO_;
my
$ns_rdf
= _NS_RDF_;
my
$ns_rdfs
= _NS_RDFS_;
my
%prefix_for_ns
= (
$ns_cdao
=>
'cdao'
,
$ns_rdf
=>
'rdf'
,
$ns_rdfs
=>
'rdfs'
,
);
my
%objects
;
my
$prefixes
=
<<"PREFIXES";
PREFIX rdf: <${ns_rdf}>
PREFIX cdao: <${ns_cdao}>
PREFIX rdfs: <${ns_rdfs}>
PREFIXES
my
$query
=
<<"QUERY";
${prefixes}
SELECT
?subject
WHERE {
?subject rdf:type cdao:%s
}
QUERY
my
$subclass
=
<<"SUBCLASS";
${prefixes}
SELECT
?subject
WHERE {
?subject rdfs:subClassOf cdao:%s
}
SUBCLASS
my
$states
=
<<"STATES";
${prefixes}
SELECT
?subject ?stateset ?state ?label
WHERE {
?subject cdao:belongs_to_TU <%s> .
?subject cdao:belongs_to_Character <%s> .
?subject cdao:has_%sState ?state .
?state rdfs:label ?label .
?state rdf:type ?stateset
}
STATES
sub
_parse {
my
$self
=
shift
;
%objects
= ();
$self
->_args->{
'-opts'
} = {
'lang'
=>
'sparql'
,
'base'
=>
$self
->_args->{
'-base'
},
'update'
=> 0,
'load_data'
=> 0,
};
$self
->_project->set_base_uri(
$self
->_args->{
'-base'
});
$self
->_process_tus;
$self
->_process_trees;
$self
->_process_nodes;
$self
->_process_edges;
$self
->_process_matrices;
my
$proj
=
$self
->_project;
my
@objects
= ( @{
$proj
->get_taxa }, @{
$proj
->get_forests }, @{
$proj
->get_matrices } );
$proj
->clear;
return
@objects
;
}
sub
_object_from_resource {
my
(
$self
,
$resource
,
$creator
) =
@_
;
my
$fac
=
$self
->_factory;
my
$base
=
$self
->_args->{
'-base'
};
my
$uri
=
$resource
->value;
my
$id
=
$uri
;
$id
=~ s/^\Q
$base
\E
my
$object
=
$fac
->
$creator
(
'-guid'
=>
$id
,
'-xml_id'
=>
$id
);
my
$iterator
=
$self
->_args->{
'-model'
}->get_statements(
$resource
,
undef
,
undef
);
while
(
my
$inner
=
$iterator
->
next
) {
my
(
$predicate
,
$value
) = (
$inner
->predicate,
$inner
->object );
$self
->_process_annotation(
$predicate
->value,
$value
->value,
$object
);
}
$objects
{
$uri
} =
$object
;
}
sub
_parse_predicate {
my
(
$self
,
$predicate
) =
@_
;
my
(
$ns
,
$term
);
if
(
$predicate
=~ m/^(.+
(
$ns
,
$term
) = ( $1, $2 );
}
elsif
(
$predicate
=~ m/^(.+\/)([^\/]+?)$/ ) {
(
$ns
,
$term
) = ( $1, $2 );
}
elsif
(
$predicate
=~ m/^(.+:)([^:]+?)$/ ) {
(
$ns
,
$term
) = ( $1, $2 );
}
else
{
$self
->_logger->
warn
(
"Can't parse URI $predicate"
);
}
return
$ns
,
$term
;
}
sub
_process_annotation {
my
(
$self
,
$predicate
,
$value
,
$object
) =
@_
;
my
$fac
=
$self
->_factory;
$predicate
=~ s/^<(.+)>$/$1/;
return
if
$predicate
eq _NS_RDF_ .
'type'
;
my
(
$ns
,
$term
) =
$self
->_parse_predicate(
$predicate
);
my
$prefix
=
$prefix_for_ns
{
$ns
} ||
'ns'
.
scalar
(
keys
%prefix_for_ns
);
$prefix_for_ns
{
$prefix
} =
$ns
;
if
(
"${prefix}:${term}"
eq
'rdfs:label'
) {
$object
->set_name(
$value
);
return
;
}
if
(
"${prefix}:${term}"
eq
'cdao:represents_TU'
) {
$object
->set_taxon(
$objects
{
$value
} );
return
;
}
if
(
"${prefix}:${term}"
eq
'cdao:has_Ancestor'
) {
return
;
}
if
(
"${prefix}:${term}"
eq
'cdao:has_Root'
) {
return
;
}
$object
->set_namespaces(
$prefix
=>
$ns
);
$object
->add_meta(
$fac
->create_meta(
'-triple'
=> {
"${prefix}:${term}"
=>
$value
}
)
);
}
sub
_do_query {
my
(
$self
,
$type
,
$type_query
) =
@_
;
$type_query
=
$query
unless
$type_query
;
my
$sth
= RDF::Query->new(
sprintf
(
$type_query
,
$type
),
$self
->_args->{
'-opts'
} );
return
$sth
->execute(
$self
->_args->{
'-model'
} );
}
sub
_process_matrices {
my
$self
=
shift
;
my
$fac
=
$self
->_factory;
my
$model
=
$self
->_args->{
'-model'
};
my
$iter
=
$self
->_do_query(
'CharacterStateDataMatrix'
);
while
(
my
$row
=
$iter
->
next
) {
my
$subject
=
$row
->{
'subject'
};
my
$matrix
=
$self
->_object_from_resource(
$subject
,
'create_matrix'
);
my
(
$taxa
) = @{
$self
->_project->get_taxa };
$matrix
->set_taxa(
$taxa
);
my
(
$rowlist
,
%row
) =
$self
->_create_rows(
$matrix
);
my
(
$charlist
,
%char
) =
$self
->_create_characters(
$matrix
);
my
%types
= (
'Nucleotide_'
=>
'dna'
,
'Continuous_'
=>
'continuous'
,
'Standard_'
=>
'standard'
,
''
=>
'standard'
,
);
my
$datatype
;
for
my
$row_uri
( @{
$rowlist
} ) {
for
my
$col_uri
( @{
$charlist
} ) {
if
( not
$datatype
) {
TYPE_SEARCH :
for
my
$predicate
(
keys
%types
) {
my
$state_query
=
sprintf
(
$states
,
$row_uri
,
$col_uri
,
$predicate
);
my
$sth
= RDF::Query->new(
$state_query
,
$self
->_args->{
'-opts'
} );
my
$state_iterator
=
$sth
->execute(
$self
->_args->{
'-model'
} );
if
(
my
$state
=
$state_iterator
->
next
) {
$datatype
=
$predicate
;
$matrix
->set_type(
$types
{
$predicate
});
last
TYPE_SEARCH;
}
}
}
else
{
my
$state_query
=
sprintf
(
$states
,
$row_uri
,
$col_uri
,
$datatype
);
my
$sth
= RDF::Query->new(
$state_query
,
$self
->_args->{
'-opts'
} );
my
$state_iterator
=
$sth
->execute(
$self
->_args->{
'-model'
} );
while
(
my
$state
=
$state_iterator
->
next
) {
if
(
my
$val
=
$state
->{label}->value ) {
$row
{
$row_uri
}->insert(
$val
);
}
}
}
}
}
$self
->_logger->debug(
$matrix
->to_nexus);
}
}
sub
_create_rows {
my
(
$self
,
$matrix
) =
@_
;
my
$fac
=
$self
->_factory;
my
(
%row
,
@rowlist
);
my
$tu_metas
=
$matrix
->get_meta(
'cdao:has_TU'
);
for
my
$tu_meta
( @{
$tu_metas
} ) {
my
$tu_uri
=
$tu_meta
->get_object;
my
$row
=
$fac
->create_datum(
'-taxon'
=>
$objects
{
$tu_uri
},
'-name'
=>
$objects
{
$tu_uri
}->get_name,
);
$row
{
$tu_uri
} =
$row
;
$matrix
->insert(
$row
);
push
@rowlist
,
$tu_uri
;
}
return
\
@rowlist
,
%row
;
}
sub
_create_characters {
my
(
$self
,
$matrix
) =
@_
;
my
(
%char
,
@charlist
);
my
$characters
=
$matrix
->get_characters;
my
$char_metas
=
$matrix
->get_meta(
'cdao:has_Character'
);
for
my
$char_meta
( @{
$char_metas
} ) {
my
$char_uri
=
$char_meta
->get_object;
my
$char_resource
= RDF::Trine::Node::Resource->new(
$char_uri
);
my
$char
=
$self
->_object_from_resource(
$char_resource
,
'create_character'
);
$char
{
$char_uri
} =
$char
;
$characters
->insert(
$char
);
push
@charlist
,
$char_uri
;
}
return
\
@charlist
,
%char
;
}
sub
_process_tus {
my
$self
=
shift
;
my
$fac
=
$self
->_factory;
my
$taxa
=
$fac
->create_taxa;
my
$model
=
$self
->_args->{
'-model'
};
my
$iter
=
$self
->_do_query(
'TU'
);
while
(
my
$row
=
$iter
->
next
) {
my
$subject
=
$row
->{
'subject'
};
my
$taxon
=
$self
->_object_from_resource(
$subject
,
'create_taxon'
);
$taxa
->insert(
$taxon
);
}
$self
->_project->insert(
$taxa
);
}
sub
_process_trees {
my
$self
=
shift
;
my
$fac
=
$self
->_factory;
my
(
$taxa
) = @{
$self
->_project->get_items(_TAXA_) };
my
$forest
=
$fac
->create_forest(
'-taxa'
=>
$taxa
);
my
$model
=
$self
->_args->{
'-model'
};
my
$rooted_iter
=
$self
->_do_query(
'RootedTree'
);
while
(
my
$row
=
$rooted_iter
->
next
) {
my
$subject
=
$row
->{
'subject'
};
my
$tree
=
$self
->_object_from_resource(
$subject
,
'create_tree'
);
$forest
->insert(
$tree
);
}
my
$unrooted_iter
=
$self
->_do_query(
'UnrootedTree'
);
while
(
my
$row
=
$unrooted_iter
->
next
) {
my
$subject
=
$row
->{
'subject'
};
my
$tree
=
$self
->_object_from_resource(
$subject
,
'create_tree'
);
$tree
->set_as_unrooted;
$forest
->insert(
$tree
);
}
$self
->_project->insert(
$forest
);
}
sub
_process_nodes {
my
$self
=
shift
;
my
$model
=
$self
->_args->{
'-model'
};
my
$logger
=
$self
->_logger;
my
$node_iter
=
$self
->_do_query(
'Node'
);
while
(
my
$row
=
$node_iter
->
next
) {
my
$subject
=
$row
->{
'subject'
};
my
$node
=
$self
->_object_from_resource(
$subject
,
'create_node'
);
my
(
$value
) = @{
$node
->get_meta(
'cdao:belongs_to_Tree'
) };
$objects
{
$value
->get_object}->insert(
$node
)
if
$objects
{
$value
->get_object};
$node
->remove_meta(
$value
);
}
}
sub
_process_edges {
my
$self
=
shift
;
my
$model
=
$self
->_args->{
'-model'
};
my
$logger
=
$self
->_logger;
my
$edge_iter
=
$self
->_do_query(
'DirectedEdge'
);
while
(
my
$row
=
$edge_iter
->
next
) {
my
$subject
=
$row
->{
'subject'
};
my
$edge_statements
=
$model
->get_statements(
$subject
);
my
(
$parent_uri
,
$child_uri
,
$branch_length
);
LINK:
while
(
my
$st
=
$edge_statements
->
next
) {
my
$predicate
=
$st
->predicate->value;
$logger
->debug(
$predicate
);
if
(
$predicate
eq
"${ns_cdao}has_Parent_Node"
) {
$parent_uri
=
$st
->object->value;
}
elsif
(
$predicate
eq
"${ns_cdao}has_Child_Node"
) {
$child_uri
=
$st
->object->value;
}
elsif
(
$predicate
eq
"${ns_cdao}has_Annotation"
) {
my
$annotation_statements
=
$model
->get_statements(
$st
->object);
ANNO:
while
(
my
$anno
=
$annotation_statements
->
next
) {
my
$anno_pre
=
$anno
->predicate->value;
if
(
$anno_pre
=~ /^\Q${ns_cdao}\Ehas_(?:Int|Float)_Value/ ) {
$branch_length
=
$anno
->object->value;
last
ANNO;
}
}
}
last
LINK
if
$parent_uri
&&
$child_uri
;
}
$logger
->debug(
"Parent: $parent_uri Child: $child_uri"
);
$objects
{
$parent_uri
}->set_child(
$objects
{
$child_uri
});
$objects
{
$child_uri
}->set_branch_length(
$branch_length
)
if
defined
$branch_length
;
}
}
1;