$Bio::SearchIO::infernal::VERSION
=
'1.7.8'
;
our
%MODEMAP
= (
'Result'
=>
'result'
,
'Hit'
=>
'hit'
,
'Hsp'
=>
'hsp'
);
our
%MAPPING
= (
'Hsp_bit-score'
=>
'HSP-bits'
,
'Hsp_score'
=>
'HSP-score'
,
'Hsp_evalue'
=>
'HSP-evalue'
,
'Hsp_pvalue'
=>
'HSP-pvalue'
,
'Hsp_query-from'
=>
'HSP-query_start'
,
'Hsp_query-to'
=>
'HSP-query_end'
,
'Hsp_query-strand'
=>
'HSP-query_strand'
,
'Hsp_hit-from'
=>
'HSP-hit_start'
,
'Hsp_hit-to'
=>
'HSP-hit_end'
,
'Hsp_hit-strand'
=>
'HSP-hit_strand'
,
'Hsp_gaps'
=>
'HSP-hsp_gaps'
,
'Hsp_hitgaps'
=>
'HSP-hit_gaps'
,
'Hsp_querygaps'
=>
'HSP-query_gaps'
,
'Hsp_qseq'
=>
'HSP-query_seq'
,
'Hsp_ncline'
=>
'HSP-nc_seq'
,
'Hsp_hseq'
=>
'HSP-hit_seq'
,
'Hsp_midline'
=>
'HSP-homology_seq'
,
'Hsp_pline'
=>
'HSP-pp_seq'
,
'Hsp_structure'
=>
'HSP-meta'
,
'Hsp_align-len'
=>
'HSP-hsp_length'
,
'Hsp_stranded'
=>
'HSP-stranded'
,
'Hit_id'
=>
'HIT-name'
,
'Hit_len'
=>
'HIT-length'
,
'Hit_gi'
=>
'HIT-ncbi_gi'
,
'Hit_accession'
=>
'HIT-accession'
,
'Hit_desc'
=>
'HIT-description'
,
'Hit_def'
=>
'HIT-description'
,
'Hit_signif'
=>
'HIT-significance'
,
'Hit_p'
=>
'HIT-p'
,
'Hit_score'
=>
'HIT-score'
,
'Hit_bits'
=>
'HIT-bits'
,
'Infernal_program'
=>
'RESULT-algorithm_name'
,
'Infernal_version'
=>
'RESULT-algorithm_version'
,
'Infernal_query-def'
=>
'RESULT-query_name'
,
'Infernal_query-len'
=>
'RESULT-query_length'
,
'Infernal_query-acc'
=>
'RESULT-query_accession'
,
'Infernal_querydesc'
=>
'RESULT-query_description'
,
'Infernal_cm'
=>
'RESULT-cm_name'
,
'Infernal_db'
=>
'RESULT-database_name'
,
'Infernal_db-len'
=>
'RESULT-database_entries'
,
'Infernal_db-let'
=>
'RESULT-database_letters'
,
);
my
$MINSCORE
= 0;
my
$DEFAULT_ALGORITHM
=
'cmsearch'
;
my
$DEFAULT_VERSION
=
'1.1'
;
my
@VALID_SYMBOLS
=
qw(5-prime 3-prime single-strand unknown gap)
;
my
%STRUCTURE_SYMBOLS
= (
'5-prime'
=>
'<'
,
'3-prime'
=>
'>'
,
'single-strand'
=>
':'
,
'unknown'
=>
'?'
,
'gap'
=>
'.'
);
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
$self
->SUPER::_initialize(
@args
);
my
(
$model
,
$database
,
$convert
,
$symbols
,
$cutoff
,
$desc
,
$accession
,
$algorithm
,
$version
) =
$self
->_rearrange([
qw(MODEL
DATABASE
CONVERT_META
SYMBOLS
HSP_MINSCORE
QUERY_DESC
QUERY_ACC
ALGORITHM
VERSION)
],
@args
);
my
$handler
=
$self
->_eventHandler;
$handler
->register_factory(
'result'
,
Bio::Factory::ObjectFactory->new(
-type
=>
'Bio::Search::Result::INFERNALResult'
,
-interface
=>
'Bio::Search::Result::ResultI'
,
-verbose
=>
$self
->verbose
)
);
$handler
->register_factory(
'hit'
,
Bio::Factory::ObjectFactory->new(
-type
=>
'Bio::Search::Hit::ModelHit'
,
-interface
=>
'Bio::Search::Hit::HitI'
,
-verbose
=>
$self
->verbose
)
);
$handler
->register_factory(
'hsp'
,
Bio::Factory::ObjectFactory->new(
-type
=>
'Bio::Search::HSP::ModelHSP'
,
-interface
=>
'Bio::Search::HSP::HSPI'
,
-verbose
=>
$self
->verbose
)
);
defined
$model
&&
$self
->model(
$model
);
defined
$database
&&
$self
->database(
$database
);
defined
$accession
&&
$self
->query_accession(
$accession
);
defined
$convert
&&
$self
->convert_meta(
$convert
);
defined
$desc
&&
$self
->query_description(
$desc
);
$version
||=
$DEFAULT_VERSION
;
$self
->version(
$version
);
$symbols
||= \
%STRUCTURE_SYMBOLS
;
$self
->structure_symbols(
$symbols
);
$cutoff
||=
$MINSCORE
;
$self
->hsp_minscore(
$cutoff
);
$algorithm
||=
$DEFAULT_ALGORITHM
;
$self
->algorithm(
$algorithm
);
}
sub
next_result {
my
(
$self
) =
@_
;
unless
(
exists
$self
->{
'_handlerset'
}) {
my
$line
;
while
(
$line
=
$self
->_readline) {
next
if
$line
=~ m{^\s*$};
if
(
$line
=~ m{^\
my
$secondline
=
$self
->_readline;
if
(
$secondline
=~ m{INFERNAL 1\.1}) {
$self
->{
'_handlerset'
} =
'1.1'
;
}
else
{
$self
->{
'_handlerset'
} =
'latest'
;
}
$self
->_pushback(
$secondline
);
}
elsif
(
$line
=~ m{^CM\s\d+:}) {
$self
->{
'_handlerset'
} =
'pre-1.0'
;
}
else
{
$self
->{
'_handlerset'
} =
'old'
;
}
last
;
}
$self
->_pushback(
$line
);
}
return
(
$self
->{
'_handlerset'
} eq
'1.1'
) ?
$self
->_parse_v1_1 :
(
$self
->{
'_handlerset'
} eq
'latest'
) ?
$self
->_parse_latest :
(
$self
->{
'_handlerset'
} eq
'pre-1.0'
) ?
$self
->_parse_pre :
$self
->_parse_old;
}
sub
_parse_v1_1 {
my
(
$self
) =
@_
;
my
$seentop
= 0;
local
$/ =
"\n"
;
my
(
$accession
,
$description
) = (
$self
->query_accession,
$self
->query_description);
my
(
$buffer
,
$last
,
%modelcounter
,
@hit_list
,
%hitindex
,
@hsp_list
,
%hspindex
);
$self
->start_document();
$buffer
=
$self
->_readline;
if
( !
defined
$buffer
||
$buffer
=~ m/^\[ok/ ) {
return
undef
;
}
else
{
$self
->_pushback(
$buffer
);
}
PARSER:
while
(
defined
(
$buffer
=
$self
->_readline ) ) {
my
$hit_counter
= 0;
my
$lineorig
=
$buffer
;
chomp
$buffer
;
if
(
$buffer
=~ m/^\
$seentop
= 1;
my
$prog
= $1;
$self
->start_element( {
'Name'
=>
'Result'
} );
$self
->element_hash( {
'Infernal_program'
=>
uc
(
$prog
) } );
}
elsif
(
$buffer
=~ m/^\
my
$version
= $1;
my
$versiondate
= $2;
$self
->{
'_cmidline'
} =
$buffer
;
$self
->element_hash( {
'Infernal_version'
=>
$version
} );
}
elsif
(
$buffer
=~ /^\
$self
->{
'_cmfileline'
} =
$lineorig
;
$self
->element_hash( {
'Infernal_cm'
=> $1 } );
}
elsif
(
$buffer
=~ m/^\
$self
->{
'_cmseqline'
} =
$lineorig
;
$self
->element_hash( {
'Infernal_db'
=> $1 } );
}
elsif
(
$buffer
=~ m/^Query:\s+(\S+)\s+\[CLEN=(\d+)\]$/ ) {
$self
->element_hash( {
'Infernal_query-def'
=> $1,
'Infernal_query-len'
=> $2,
'Infernal_query-acc'
=>
$accession
,
'Infernal_querydesc'
=>
$description
,
} );
}
elsif
(
$buffer
=~ s/^Accession:\s+// && !
$accession
) {
$buffer
=~ s/\s+$//;
$self
->element_hash( {
'Infernal_query-acc'
=>
$buffer
} );
}
elsif
(
$buffer
=~ s/^Description:\s+// && !
$description
) {
$buffer
=~ s/\s+$//;
$self
->element_hash( {
'Infernal_querydesc'
=>
$buffer
} );
}
elsif
(
$buffer
=~ m/^Hit scores:/) {
@hit_list
= ();
while
(
defined
(
$buffer
=
$self
->_readline ) ) {
if
(
$buffer
=~ m/^Hit alignments:/ ) {
$self
->_pushback(
$buffer
);
last
;
}
elsif
(
$buffer
=~ m/^\s+rank\s+E-value/
||
$buffer
=~ m/\-\-\-/
||
$buffer
=~ m/^$/
||
$buffer
=~ m/No hits detected/ ) {
next
;
}
$hit_counter
++;
my
(
$rank
,
$threshold
,
$eval
,
$score
,
$bias
,
$hitid
,
$start
,
$end
,
$strand
,
$mdl
,
$truc
,
$gc
,
@desc
) =
split
(
" "
,
$buffer
);
my
$desc
=
join
" "
,
@desc
;
$desc
=
''
if
( !
defined
(
$desc
) );
push
@hit_list
, [
$hitid
,
$desc
,
$eval
,
$score
];
$hitindex
{
$hitid
.
$hit_counter
} =
$#hit_list
;
}
}
elsif
(
$buffer
=~ /^Hit alignments:/ ) {
my
$hitid
;
my
$align_counter
= 0;
while
(
defined
(
$buffer
=
$self
->_readline ) ) {
if
(
$buffer
=~ /^Internal CM pipeline statistics summary/ ) {
$self
->_pushback(
$buffer
);
last
;
}
if
(
$buffer
=~ m/^\>\>\s(\S*)\s+(.*)/ ) {
$hitid
= $1;
my
$desc
= $2;
$align_counter
++;
my
$hitid_alignctr
=
$hitid
.
$align_counter
;
$modelcounter
{
$hitid_alignctr
} = 0;
$hit_list
[
$hitindex
{
$hitid_alignctr
} ][1] =
$desc
;
while
(
defined
(
$buffer
=
$self
->_readline ) ) {
if
(
$buffer
=~ m/^Internal CM pipeline statistics/
||
$buffer
=~ m/NC$/
||
$buffer
=~ m/^\>\>/ ) {
$self
->_pushback(
$buffer
);
last
;
}
elsif
(
$buffer
=~ m/^\s+rank\s+E-value/
||
$buffer
=~ m/^\s----/
||
$buffer
=~ m/^$/
||
$buffer
=~ m/No hits detected/ ) {
next
;
}
my
(
$rank
,
$threshold
,
$eval
,
$score
,
$bias
,
$model
,
$cm_start
,
$cm_stop
,
$cm_cov
,
$seq_start
,
$seq_stop
,
$seq_strand
,
$seq_cov
,
$acc
,
$trunc
,
$gc
,
) =
split
(
" "
,
$buffer
);
my
$hitlength
= (
$seq_cov
=~ m/\]$/ ) ?
$seq_stop
: 0;
my
$tmphit
=
$hit_list
[
$hitindex
{
$hitid_alignctr
} ];
if
( !
defined
$tmphit
) {
$self
->
warn
(
"Incomplete information: can't find HSP $hitid in list of hits\n"
);
next
;
}
push
@hsp_list
, [
$hitid
,
$cm_start
,
$cm_stop
,
$seq_start
,
$seq_stop
,
$score
,
$eval
,
$hitlength
];
$modelcounter
{
$hitid_alignctr
}++;
my
$hsp_key
=
$hitid_alignctr
.
"_"
.
$modelcounter
{
$hitid_alignctr
};
$hspindex
{
$hsp_key
} =
$#hsp_list
;
}
}
elsif
(
$buffer
=~ m/NC$/ ) {
my
$csline
=
$self
->_readline;
$csline
=~ m/^(\s+)\S+ CS$/;
my
$offset
=
length
($1);
$self
->_pushback(
$csline
);
$self
->_pushback(
$buffer
);
my
(
$ct
,
$strln
) = 0;
my
$hspdata
;
HSP:
my
%hspline
= (
'0'
=>
'nc'
,
'1'
=>
'meta'
,
'2'
=>
'query'
,
'3'
=>
'midline'
,
'4'
=>
'hit'
,
'5'
=>
'pp'
);
HSP:
while
(
defined
(
$buffer
=
$self
->_readline)) {
chomp
$buffer
;
if
(
$buffer
=~ /^>>\s/
||
$buffer
=~ /^Internal CM pipeline statistics/) {
$self
->_pushback(
$buffer
);
last
HSP;
}
elsif
(
$ct
% 6 == 0 &&
$buffer
=~ /^$/ ) {
next
;
}
my
$iterator
=
$ct
% 6;
$strln
= (
length
(
$buffer
) - 3 )
if
$iterator
== 0;
my
$data
=
substr
(
$buffer
,
$offset
,
$strln
-
$offset
);
$hspdata
->{
$hspline
{
$iterator
} } .=
$data
;
$ct
++;
}
my
$strlen
= 0;
while
(
$hspdata
->{
'query'
} =~ m{\*\[\s*(\d+)\s*\]\*}g) {
$strlen
+= $1;
}
$strlen
+=
$hspdata
->{
'query'
} =~
tr
{A-Za-z}{A-Za-z};
my
$metastr
= (
$self
->convert_meta) ? (
$self
->simple_meta(
$hspdata
->{
'meta'
})) :
$hspdata
->{
'meta'
};
my
$hitid_alignctr
=
$hitid
.
$align_counter
;
my
$hsp_key
=
$hitid_alignctr
.
"_"
.
$modelcounter
{
$hitid_alignctr
};
my
$hsp
=
$hsp_list
[
$hspindex
{
$hsp_key
} ];
push
(
@$hsp
,
$hspdata
->{
'nc'
},
$metastr
,
$hspdata
->{
'query'
},
$hspdata
->{
'midline'
},
$hspdata
->{
'hit'
},
$hspdata
->{
'pp'
});
}
}
}
elsif
(
$buffer
=~ m/Internal CM pipeline statistics summary:/ ) {
while
(
defined
(
$buffer
=
$self
->_readline ) ) {
last
if
(
$buffer
=~ m!^//! );
if
(
$buffer
=~ /^Target sequences:\s+(\d+)\s+\((\d+) residues/ ) {
$self
->element_hash( {
'Infernal_db-len'
=> $1,
'Infernal_db-let'
=> $2, } );
}
}
last
;
}
else
{
$self
->debug(
$buffer
);
}
$last
=
$buffer
;
}
my
$hit_counter
= 0;
foreach
my
$hit
(
@hit_list
) {
$hit_counter
++;
my
(
$hit_name
,
$hit_desc
,
$hit_signif
,
$hit_score
) =
@$hit
;
my
$num_hsp
=
$modelcounter
{
$hit_name
.
$hit_counter
} || 0;
$self
->start_element( {
'Name'
=>
'Hit'
} );
$self
->element_hash( {
'Hit_id'
=>
$hit_name
,
'Hit_desc'
=>
$hit_desc
,
'Hit_signif'
=>
$hit_signif
,
'Hit_score'
=>
$hit_score
,
'Hit_bits'
=>
$hit_score
, } );
for
my
$i
( 1 ..
$num_hsp
) {
my
$hsp_key
=
$hit_name
.
$hit_counter
.
"_"
.
$i
;
my
$hsp
=
$hsp_list
[
$hspindex
{
$hsp_key
} ];
if
(
defined
$hsp
) {
my
$hspid
=
shift
@$hsp
;
my
(
$cm_start
,
$cm_stop
,
$seq_start
,
$seq_stop
,
$score
,
$eval
,
$hitlength
,
$ncline
,
$csline
,
$qseq
,
$midline
,
$hseq
,
$pline
) =
@$hsp
;
if
(
$hitlength
!= 0 ) {
$self
->element(
{
'Name'
=>
'Hit_len'
,
'Data'
=>
$hitlength
}
);
}
$self
->start_element( {
'Name'
=>
'Hsp'
} );
$self
->element_hash( {
'Hsp_stranded'
=>
'HIT'
,
'Hsp_query-from'
=>
$cm_start
,
'Hsp_query-to'
=>
$cm_stop
,
'Hsp_hit-from'
=>
$seq_start
,
'Hsp_hit-to'
=>
$seq_stop
,
'Hsp_score'
=>
$score
,
'Hsp_bit-score'
=>
$score
,
'Hsp_evalue'
=>
$eval
,
'Hsp_ncline'
=>
$ncline
,
'Hsp_structure'
=>
$csline
,
'Hsp_qseq'
=>
$qseq
,
'Hsp_midline'
=>
$midline
,
'Hsp_hseq'
=>
$hseq
,
'Hsp_pline'
=>
$pline
,
} );
$self
->end_element( {
'Name'
=>
'Hsp'
} );
}
}
$self
->end_element( {
'Name'
=>
'Hit'
} );
}
$self
->end_element( {
'Name'
=>
'Result'
} );
my
$result
=
$self
->end_document();
return
$result
;
}
sub
start_element {
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
if
(
$type
) {
if
(
$self
->_eventHandler->will_handle(
$type
) ) {
my
$func
=
sprintf
(
"start_%s"
,
lc
$type
);
$self
->_eventHandler->
$func
(
$data
->{
'Attributes'
} );
}
unshift
@{
$self
->{
'_elements'
} },
$type
;
}
if
(
defined
$type
&&
$type
eq
'result'
)
{
$self
->{
'_values'
} = {};
$self
->{
'_result'
} =
undef
;
}
}
sub
end_element {
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
my
$rc
;
if
(
$type
) {
if
(
$self
->_eventHandler->will_handle(
$type
) ) {
my
$func
=
sprintf
(
"end_%s"
,
lc
$type
);
$rc
=
$self
->_eventHandler->
$func
(
$self
->{
'_reporttype'
},
$self
->{
'_values'
} );
}
my
$lastelem
=
shift
@{
$self
->{
'_elements'
} };
if
(
$type
eq
'hit'
) {
delete
$self
->{_values}{
'HIT-length'
};
delete
$self
->{_values}{
'HSP-hit_length'
};
}
}
elsif
(
$MAPPING
{
$nm
} ) {
if
(
ref
(
$MAPPING
{
$nm
} ) =~ /hash/i ) {
my
$key
= (
keys
%{
$MAPPING
{
$nm
} } )[0];
$self
->{
'_values'
}->{
$key
}->{
$MAPPING
{
$nm
}->{
$key
} } =
$self
->{
'_last_data'
};
}
else
{
$self
->{
'_values'
}->{
$MAPPING
{
$nm
} } =
$self
->{
'_last_data'
};
}
}
else
{
$self
->debug(
"unknown nm $nm, ignoring\n"
);
}
$self
->{
'_last_data'
} =
''
;
$self
->{
'_result'
} =
$rc
if
(
defined
$type
&&
$type
eq
'result'
);
return
$rc
;
}
sub
element {
my
(
$self
,
$data
) =
@_
;
$self
->characters(
$data
);
$self
->end_element(
$data
);
}
sub
element_hash {
my
(
$self
,
$data
) =
@_
;
$self
->throw(
"Must provide data hash ref"
)
if
!
$data
|| !
ref
(
$data
);
for
my
$nm
(
sort
keys
%{
$data
}) {
next
if
$data
->{
$nm
} &&
$data
->{
$nm
} =~ m{^\s*$}o;
if
(
$MAPPING
{
$nm
} ) {
if
(
ref
(
$MAPPING
{
$nm
} ) =~ /hash/i ) {
my
$key
= (
keys
%{
$MAPPING
{
$nm
} } )[0];
$self
->{
'_values'
}->{
$key
}->{
$MAPPING
{
$nm
}->{
$key
} } =
$data
->{
$nm
};
}
else
{
$self
->{
'_values'
}->{
$MAPPING
{
$nm
} } =
$data
->{
$nm
};
}
}
}
}
sub
characters {
my
(
$self
,
$data
) =
@_
;
return
unless
(
defined
$data
->{
'Data'
} &&
$data
->{
'Data'
} !~ /^\s+$/o );
$self
->{
'_last_data'
} =
$data
->{
'Data'
};
}
sub
within_element {
my
(
$self
,
$name
) =
@_
;
return
0
if
( !
defined
$name
|| !
defined
$self
->{
'_elements'
}
||
scalar
@{
$self
->{
'_elements'
} } == 0 );
foreach
( @{
$self
->{
'_elements'
} } ) {
return
1
if
(
$_
eq
$name
);
}
return
0;
}
sub
in_element {
my
(
$self
,
$name
) =
@_
;
return
0
if
!
defined
$self
->{
'_elements'
}->[0];
return
(
$self
->{
'_elements'
}->[0] eq
$name
);
}
sub
start_document {
my
(
$self
) =
@_
;
$self
->{
'_lasttype'
} =
''
;
$self
->{
'_values'
} = {};
$self
->{
'_result'
} =
undef
;
$self
->{
'_elements'
} = [];
}
sub
end_document {
my
(
$self
) =
@_
;
return
$self
->{
'_result'
};
}
sub
result_count {
my
$self
=
shift
;
return
$self
->{
'_result_count'
};
}
sub
model {
my
$self
=
shift
;
return
$self
->{
'_model'
} =
shift
if
@_
;
return
$self
->{
'_model'
};
}
sub
database {
my
$self
=
shift
;
return
$self
->{
'_database'
} =
shift
if
@_
;
return
$self
->{
'_database'
};
}
sub
algorithm {
my
$self
=
shift
;
return
$self
->{
'_algorithm'
} =
shift
if
@_
;
return
$self
->{
'_algorithm'
};
}
sub
query_accession {
my
$self
=
shift
;
return
$self
->{
'_query_accession'
} =
shift
if
@_
;
return
$self
->{
'_query_accession'
};
}
sub
query_description {
my
$self
=
shift
;
return
$self
->{
'_query_description'
} =
shift
if
@_
;
return
$self
->{
'_query_description'
};
}
sub
hsp_minscore {
my
$self
=
shift
;
return
$self
->{
'_hsp_minscore'
} =
shift
if
@_
;
return
$self
->{
'_hsp_minscore'
};
}
sub
convert_meta {
my
$self
=
shift
;
return
$self
->{
'_convert_meta'
} =
shift
if
@_
;
return
$self
->{
'_convert_meta'
};
}
sub
version {
my
$self
=
shift
;
return
$self
->{
'_version'
} =
shift
if
@_
;
return
$self
->{
'_version'
};
}
sub
structure_symbols {
my
(
$self
,
$delim
) =
@_
;
if
(
$delim
) {
if
(
ref
(
$delim
) =~ m{HASH}) {
my
%data
= %{
$delim
};
for
my
$d
(
@VALID_SYMBOLS
) {
if
(
exists
$data
{
$d
} ) {
$self
->{
'_delimiter'
}->{
$d
} =
$data
{
$d
};
}
}
}
else
{
$self
->throw(
"Args to helix_delimiters() should be in a hash reference"
);
}
}
return
$self
->{
'_delimiter'
};
}
sub
simple_meta {
my
(
$self
,
$str
) =
@_
;
$self
->throw(
"No string arg sent!"
)
if
!
$str
;
my
$structs
=
$self
->structure_symbols();
my
(
$ls
,
$rs
,
$ss
,
$unk
,
$gap
) = (
$structs
->{
'5-prime'
},
$structs
->{
'3-prime'
},
$structs
->{
'single-strand'
},
$structs
->{
'unknown'
},
$structs
->{
'gap'
});
$str
=~ s{[\(\<\[\{]}{
$ls
}g;
$str
=~ s{[\)\>\]\}]}{
$rs
}g;
$str
=~ s{[:,_-]}{
$ss
}g;
$str
=~ s{\.}{
$gap
}g;
return
$str
;
}
sub
_parse_latest {
my
(
$self
) =
@_
;
my
$seentop
= 0;
local
$/ =
"\n"
;
my
(
$accession
,
$description
) = (
$self
->query_accession,
$self
->query_description);
my
(
$maxscore
,
$mineval
,
$minpval
);
$self
->start_document();
my
(
$lasthit
,
$lastscore
,
$lasteval
,
$lastpval
,
$laststart
,
$lastend
);
PARSER:
while
(
my
$line
=
$self
->_readline) {
next
if
$line
=~ m{^\s+$};
if
(
$line
=~ m{^\
$seentop
= 1;
$self
->start_element({
'Name'
=>
'Result'
});
$self
->element_hash({
'Infernal_program'
=>
'CMSEARCH'
});
}
elsif
(
$line
=~ m{^\
$self
->element_hash({
'Infernal_version'
=> $1,
});
}
elsif
(
$line
=~ m{^\
$self
->element_hash({
'Infernal_db'
=> $1,
});
}
elsif
(
$line
=~ m{^\
$self
->element_hash({
'Infernal_db-let'
=> $1 * 1e6
});
}
elsif
(
$line
=~ m{^CM(?:\s(\d+))?:\s*(\S+)}xms) {
$self
->element_hash({
'Infernal_query-def'
=> $2,
'Infernal_query-acc'
=>
$accession
,
'Infernal_querydesc'
=>
$description
});
}
elsif
(
$line
=~ m{^>\s*(\S+)} ){
if
(
$self
->in_element(
'hit'
)) {
$self
->element_hash({
'Hit_score'
=>
$maxscore
,
'Hit_bits'
=>
$maxscore
});
(
$maxscore
,
$minpval
,
$mineval
) =
undef
;
$self
->end_element({
'Name'
=>
'Hit'
});
}
$lasthit
= $1;
}
elsif
(
$line
=~ m{
^\sQuery\s=\s\d+\s-\s\d+,\s
Target\s=\s(\d+)\s-\s(\d+)
}xmso) {
(
$laststart
,
$lastend
) = ($1, $2);
}
elsif
(
$line
=~ m{
^\sScore\s=\s([\d\.]+),\s
(?:E\s=\s([\d\.e-]+),\s
P\s=\s([\d\.e-]+),\s)?
GC\s=
}xmso
) {
(
$lastscore
,
$lasteval
,
$lastpval
) = ($1, $2, $3);
$maxscore
||=
$lastscore
;
if
(
$lasteval
&&
$lastpval
) {
$mineval
||=
$lasteval
;
$minpval
||=
$lastpval
;
$mineval
= (
$mineval
>
$lasteval
) ?
$lasteval
:
$mineval
;
$minpval
= (
$minpval
>
$lastpval
) ?
$lastpval
:
$minpval
;
}
$maxscore
= (
$maxscore
<
$lastscore
) ?
$lastscore
:
$maxscore
;
if
(!
$self
->within_element(
'hit'
)) {
my
(
$gi
,
$acc
,
$ver
) =
$self
->_get_seq_identifiers(
$lasthit
);
$self
->start_element({
'Name'
=>
'Hit'
});
$self
->element_hash({
'Hit_id'
=>
$lasthit
,
'Hit_accession'
=>
$ver
?
"$acc.$ver"
:
$acc
?
$acc
:
$lasthit
,
'Hit_gi'
=>
$gi
});
}
if
(!
$self
->in_element(
'hsp'
)) {
$self
->start_element({
'Name'
=>
'Hsp'
});
}
}
elsif
(
$line
=~ m{^(\s+)[<>\{\}\(\)\[\]:_,-\.]+}xms) {
$self
->_pushback(
$line
);
my
$offset
=
length
($1);
my
(
$ct
,
$strln
) = 0;
my
$hsp
;
HSP:
my
%hsp_key
= (
'0'
=>
'meta'
,
'1'
=>
'query'
,
'2'
=>
'midline'
,
'3'
=>
'hit'
);
HSP:
while
(
defined
(
$line
=
$self
->_readline)) {
chomp
$line
;
next
if
(!
$line
);
if
(
$line
=~ m{^\s{0,2}\S+}) {
$self
->_pushback(
$line
);
last
HSP;
}
my
$iterator
=
$ct
% 4;
$strln
=
length
(
$line
)
if
$iterator
== 0;
my
$data
=
substr
(
$line
,
$offset
,
$strln
-
$offset
);
$hsp
->{
$hsp_key
{
$iterator
} } .=
$data
;
$ct
++;
}
if
(
$self
->in_element(
'hsp'
)) {
my
$strlen
= 0;
while
(
$hsp
->{
'query'
} =~ m{\*\[\s*(\d+)\s*\]\*}g) {
$strlen
+= $1;
}
$strlen
+=
$hsp
->{
'query'
} =~
tr
{A-Za-z}{A-Za-z};
my
$metastr
= (
$self
->convert_meta) ? (
$self
->simple_meta(
$hsp
->{
'meta'
})) :
$hsp
->{
'meta'
};
$self
->element_hash(
{
'Hsp_stranded'
=>
'HIT'
,
'Hsp_qseq'
=>
$hsp
->{
'query'
},
'Hsp_hseq'
=>
$hsp
->{
'hit'
},
'Hsp_midline'
=>
$hsp
->{
'midline'
},
'Hsp_structure'
=>
$metastr
,
'Hsp_query-from'
=> 1,
'Infernal_query-len'
=>
$strlen
,
'Hsp_query-to'
=>
$strlen
,
'Hsp_hit-from'
=>
$laststart
,
'Hsp_hit-to'
=>
$lastend
,
'Hsp_score'
=>
$lastscore
,
'Hsp_bit-score'
=>
$lastscore
,
});
$self
->element_hash(
{
'Hsp_evalue'
=>
$lasteval
,
'Hsp_pvalue'
=>
$lastpval
,
})
if
(
$lasteval
&&
$lastpval
);
$self
->end_element({
'Name'
=>
'Hsp'
});
}
}
elsif
(
$line
=~ m{^//}xms ) {
if
(
$self
->within_element(
'result'
) &&
$seentop
) {
if
(
$self
->in_element(
'hit'
)) {
$self
->element_hash({
'Hit_score'
=>
$maxscore
,
'Hit_bits'
=>
$maxscore
});
$self
->element_hash({
'Hit_signif'
=>
$mineval
})
if
$mineval
;
$self
->element_hash({
'Hit_p'
=>
$minpval
})
if
$minpval
;
$self
->end_element({
'Name'
=>
'Hit'
});
}
last
PARSER;
}
}
}
$self
->within_element(
'hit'
) &&
$self
->end_element( {
'Name'
=>
'Hit'
} );
$self
->end_element( {
'Name'
=>
'Result'
} )
if
$seentop
;
return
$self
->end_document();
}
sub
_parse_pre {
my
(
$self
) =
@_
;
my
$seentop
= 0;
local
$/ =
"\n"
;
my
(
$accession
,
$db
,
$algorithm
,
$description
,
$version
) =
(
$self
->query_accession,
$self
->database,
$self
->algorithm,
$self
->query_description,
'0.81'
);
my
(
$maxscore
,
$mineval
,
$minpval
);
$self
->start_document();
my
(
$lasthit
,
$lastscore
,
$lasteval
,
$lastpval
,
$laststart
,
$lastend
);
PARSER:
while
(
my
$line
=
$self
->_readline) {
next
if
$line
=~ m{^\s+$};
if
(
$line
=~ m{CM\s\d+:\s*(\S+)}xms) {
if
(!
$self
->within_element(
'result'
)) {
$seentop
= 1;
$self
->start_element({
'Name'
=>
'Result'
});
$self
->element_hash({
'Infernal_program'
=>
$algorithm
,
'Infernal_query-def'
=> $1,
'Infernal_query-acc'
=>
$accession
,
'Infernal_querydesc'
=>
$description
,
'Infernal_db'
=>
$db
});
}
}
elsif
(
$line
=~ m{^>\s*(\S+)} ){
if
(
$self
->in_element(
'hit'
)) {
$self
->element_hash({
'Hit_score'
=>
$maxscore
,
'Hit_bits'
=>
$maxscore
});
(
$maxscore
,
$minpval
,
$mineval
) =
undef
;
$self
->end_element({
'Name'
=>
'Hit'
});
}
$lasthit
= $1;
}
elsif
(
$line
=~ m{
^\sQuery\s=\s\d+\s-\s\d+,\s
Target\s=\s(\d+)\s-\s(\d+)
}xmso) {
(
$laststart
,
$lastend
) = ($1, $2);
}
elsif
(
$line
=~ m{
^\sScore\s=\s([\d\.]+),\s
(?:E\s=\s([\d\.e-]+),\s
P\s=\s([\d\.e-]+),\s)?
GC\s=
}xmso
) {
(
$lastscore
,
$lasteval
,
$lastpval
) = ($1, $2, $3);
$maxscore
||=
$lastscore
;
if
(
$lasteval
&&
$lastpval
) {
$mineval
||=
$lasteval
;
$minpval
||=
$lastpval
;
$mineval
= (
$mineval
>
$lasteval
) ?
$lasteval
:
$mineval
;
$minpval
= (
$minpval
>
$lastpval
) ?
$lastpval
:
$minpval
;
}
$maxscore
= (
$maxscore
<
$lastscore
) ?
$lastscore
:
$maxscore
;
if
(!
$self
->within_element(
'hit'
)) {
my
(
$gi
,
$acc
,
$ver
) =
$self
->_get_seq_identifiers(
$lasthit
);
$self
->start_element({
'Name'
=>
'Hit'
});
$self
->element_hash({
'Hit_id'
=>
$lasthit
,
'Hit_accession'
=>
$ver
?
"$acc.$ver"
:
$acc
?
$acc
:
$lasthit
,
'Hit_gi'
=>
$gi
});
}
if
(!
$self
->in_element(
'hsp'
)) {
$self
->start_element({
'Name'
=>
'Hsp'
});
}
}
elsif
(
$line
=~ m{^(\s+)[<>\{\}\(\)\[\]:_,-\.]+}xms) {
$self
->_pushback(
$line
);
my
$offset
=
length
($1);
my
(
$ct
,
$strln
) = 0;
my
$hsp
;
HSP:
my
%hsp_key
= (
'0'
=>
'meta'
,
'1'
=>
'query'
,
'2'
=>
'midline'
,
'3'
=>
'hit'
);
HSP:
while
(
defined
(
$line
=
$self
->_readline)) {
chomp
$line
;
next
if
(!
$line
);
if
(
$line
=~ m{^\s{0,2}\S+}) {
$self
->_pushback(
$line
);
last
HSP;
}
my
$iterator
=
$ct
%4;
$strln
=
length
(
$line
)
if
$iterator
== 0;
my
$data
=
substr
(
$line
,
$offset
,
$strln
-
$offset
);
$hsp
->{
$hsp_key
{
$iterator
} } .=
$data
;
$ct
++;
}
if
(
$self
->in_element(
'hsp'
)) {
my
$strlen
=
$hsp
->{
'query'
} =~
tr
{A-Za-z}{A-Za-z};
my
$metastr
;
$metastr
= (
$self
->convert_meta) ? (
$self
->simple_meta(
$hsp
->{
'meta'
})) :
(
$hsp
->{
'meta'
});
$self
->element_hash(
{
'Hsp_stranded'
=>
'HIT'
,
'Hsp_qseq'
=>
$hsp
->{
'query'
},
'Hsp_hseq'
=>
$hsp
->{
'hit'
},
'Hsp_midline'
=>
$hsp
->{
'midline'
},
'Hsp_structure'
=>
$metastr
,
'Hsp_query-from'
=> 1,
'Infernal_query-len'
=>
$strlen
,
'Hsp_query-to'
=>
$strlen
,
'Hsp_hit-from'
=>
$laststart
,
'Hsp_hit-to'
=>
$lastend
,
'Hsp_score'
=>
$lastscore
,
'Hsp_bit-score'
=>
$lastscore
,
});
$self
->element_hash(
{
'Hsp_evalue'
=>
$lasteval
,
'Hsp_pvalue'
=>
$lastpval
,
})
if
(
$lasteval
&&
$lastpval
);
$self
->end_element({
'Name'
=>
'Hsp'
});
}
}
elsif
(
$line
=~ m{^//}xms ) {
if
(
$self
->within_element(
'result'
) &&
$seentop
) {
$self
->element(
{
'Name'
=>
'Infernal_version'
,
'Data'
=>
$version
}
);
if
(
$self
->in_element(
'hit'
)) {
$self
->element_hash({
'Hit_score'
=>
$maxscore
,
'Hit_bits'
=>
$maxscore
});
$self
->element_hash({
'Hit_signif'
=>
$mineval
})
if
$mineval
;
$self
->end_element({
'Name'
=>
'Hit'
});
}
last
PARSER;
}
}
}
$self
->within_element(
'hit'
) &&
$self
->end_element( {
'Name'
=>
'Hit'
} );
$self
->end_element( {
'Name'
=>
'Result'
} )
if
$seentop
;
return
$self
->end_document();
}
sub
_parse_old {
my
(
$self
) =
@_
;
my
$seentop
= 0;
local
$/ =
"\n"
;
my
(
$accession
,
$db
,
$algorithm
,
$model
,
$description
,
$version
) =
(
$self
->query_accession,
$self
->database,
$self
->algorithm,
$self
->model,
$self
->query_description,
$self
->version);
my
$maxscore
;
my
$cutoff
=
$self
->hsp_minscore;
$self
->start_document();
local
(
$_
);
my
$line
;
my
(
$lasthit
,
$lastscore
,
$laststart
,
$lastend
);
my
$hitline
;
PARSER:
while
(
defined
(
$line
=
$self
->_readline ) ) {
next
if
$line
=~ m{^\s+$};
next
if
$line
=~ m{^HMM\shit};
if
(
$line
=~ m{^sequence:\s+(\S+)} ){
if
(!
$self
->within_element(
'result'
)) {
$seentop
= 1;
$self
->start_element({
'Name'
=>
'Result'
});
$self
->element_hash({
'Infernal_program'
=>
$algorithm
,
'Infernal_query-def'
=>
$model
,
'Infernal_query-acc'
=>
$accession
,
'Infernal_querydesc'
=>
$description
,
'Infernal_db'
=>
$db
});
}
if
(
$self
->in_element(
'hit'
)) {
$self
->element_hash({
'Hit_score'
=>
$maxscore
,
'Hit_bits'
=>
$maxscore
});
$maxscore
=
undef
;
$self
->end_element({
'Name'
=>
'Hit'
});
}
$lasthit
= $1;
}
elsif
(
$line
=~ m{^hit\s+\d+\s+:\s+(\d+)\s+(\d+)\s+(\d+\.\d+)\s+bits}xms) {
(
$laststart
,
$lastend
,
$lastscore
) = ($1, $2, $3);
$maxscore
=
$lastscore
unless
$maxscore
;
if
(
$lastscore
>
$cutoff
) {
if
(!
$self
->within_element(
'hit'
)) {
my
(
$gi
,
$acc
,
$ver
) =
$self
->_get_seq_identifiers(
$lasthit
);
$self
->start_element({
'Name'
=>
'Hit'
});
$self
->element_hash({
'Hit_id'
=>
$lasthit
,
'Hit_accession'
=>
$ver
?
"$acc.$ver"
:
$acc
?
$acc
:
$lasthit
,
'Hit_gi'
=>
$gi
});
}
if
(!
$self
->in_element(
'hsp'
)) {
$self
->start_element({
'Name'
=>
'Hsp'
});
}
$maxscore
= (
$maxscore
<
$lastscore
) ?
$lastscore
:
$maxscore
;
}
}
elsif
(
$line
=~ m{^(\s+)[<>\{\}\(\)\[\]:_,-\.]+}xms) {
$self
->_pushback(
$line
);
my
$offset
=
length
($1);
my
(
$ct
,
$strln
) = 0;
my
$hsp
;
HSP:
my
%hsp_key
= (
'0'
=>
'meta'
,
'1'
=>
'query'
,
'2'
=>
'midline'
,
'3'
=>
'hit'
);
HSP:
while
(
$line
=
$self
->_readline) {
next
if
$line
=~ m{^\s*$};
chomp
$line
;
if
(!
defined
(
$line
) ||
$line
=~ m{^\S+}) {
$self
->_pushback(
$line
);
last
HSP;
}
my
$iterator
=
$ct
%4;
$strln
=
length
(
$line
)
if
$iterator
== 0;
my
$data
=
substr
(
$line
,
$offset
,
$strln
-
$offset
);
$hsp
->{
$hsp_key
{
$iterator
} } .=
$data
;
$ct
++;
}
if
(
$self
->in_element(
'hsp'
)) {
my
$strlen
=
$hsp
->{
'query'
} =~
tr
{A-Za-z}{A-Za-z};
my
$metastr
;
$metastr
= (
$self
->convert_meta) ? (
$self
->simple_meta(
$hsp
->{
'meta'
})) :
(
$hsp
->{
'meta'
});
$self
->element_hash(
{
'Hsp_stranded'
=>
'HIT'
,
'Hsp_qseq'
=>
$hsp
->{
'query'
},
'Hsp_hseq'
=>
$hsp
->{
'hit'
},
'Hsp_midline'
=>
$hsp
->{
'midline'
},
'Hsp_structure'
=>
$metastr
,
'Hsp_query-from'
=> 1,
'Infernal_query-len'
=>
$strlen
,
'Hsp_query-to'
=>
$strlen
,
'Hsp_hit-from'
=>
$laststart
,
'Hsp_hit-to'
=>
$lastend
,
'Hsp_score'
=>
$lastscore
,
'Hsp_bit-score'
=>
$lastscore
});
$self
->end_element({
'Name'
=>
'Hsp'
});
}
}
elsif
(
$line
=~ m{^memory}xms ||
$line
=~ m{^CYK\smemory}xms ) {
if
(
$self
->within_element(
'result'
) &&
$seentop
) {
$self
->element(
{
'Name'
=>
'Infernal_version'
,
'Data'
=>
$version
}
);
if
(
$self
->in_element(
'hit'
)) {
$self
->element_hash({
'Hit_score'
=>
$maxscore
,
'Hit_bits'
=>
$maxscore
});
$self
->end_element({
'Name'
=>
'Hit'
});
}
last
PARSER;
}
}
}
$self
->within_element(
'hit'
) &&
$self
->end_element( {
'Name'
=>
'Hit'
} );
$self
->end_element( {
'Name'
=>
'Result'
} )
if
$seentop
;
return
$self
->end_document();
}
1;