$Bio::SearchIO::sim4::VERSION
=
'1.7.8'
;
use
vars
qw($DEFAULTFORMAT %ALIGN_TYPES
%MAPPING %MODEMAP $DEFAULT_WRITER_CLASS)
;
$DEFAULTFORMAT
=
'SIM4'
;
$DEFAULT_WRITER_CLASS
=
'Bio::SearchIO::Writer::HitTableWriter'
;
%ALIGN_TYPES
= (
0
=>
'Ruler'
,
1
=>
'Query'
,
2
=>
'Mid'
,
3
=>
'Sbjct'
);
%MODEMAP
= (
'Sim4Output'
=>
'result'
,
'Hit'
=>
'hit'
,
'Hsp'
=>
'hsp'
);
%MAPPING
= (
'Hsp_query-from'
=>
'HSP-query_start'
,
'Hsp_query-to'
=>
'HSP-query_end'
,
'Hsp_qseq'
=>
'HSP-query_seq'
,
'Hsp_qlength'
=>
'HSP-query_length'
,
'Hsp_querygaps'
=>
'HSP-query_gaps'
,
'Hsp_hit-from'
=>
'HSP-hit_start'
,
'Hsp_hit-to'
=>
'HSP-hit_end'
,
'Hsp_hseq'
=>
'HSP-hit_seq'
,
'Hsp_hlength'
=>
'HSP-hit_length'
,
'Hsp_hitgaps'
=>
'HSP-hit_gaps'
,
'Hsp_midline'
=>
'HSP-homology_seq'
,
'Hsp_score'
=>
'HSP-score'
,
'Hsp_align-len'
=>
'HSP-hsp_length'
,
'Hsp_identity'
=>
'HSP-identical'
,
'Hit_id'
=>
'HIT-name'
,
'Hit_desc'
=>
'HIT-description'
,
'Hit_len'
=>
'HIT-length'
,
'Sim4Output_program'
=>
'RESULT-algorithm_name'
,
'Sim4Output_query-def'
=>
'RESULT-query_name'
,
'Sim4Output_query-desc'
=>
'RESULT-query_description'
,
'Sim4Output_query-len'
=>
'RESULT-query_length'
,
);
sub
next_result {
my
(
$self
) =
@_
;
local
$/ =
"\n"
;
local
$_
;
$self
->{
'_last_data'
} =
''
;
my
(
$seentop
,
$qfull
,
@hsps
,
%alignment
,
$format
);
my
$hit_direction
= 1;
$self
->start_document();
$self
->start_element({
'Name'
=>
'Sim4Output'
});
my
$lastquery
=
''
;
while
(
defined
(
$_
=
$self
->_readline) ) {
next
if
( /^\s+$/);
chomp
;
if
(!
$seentop
) {
if
( /^\
elsif
( /^<|>/ ) {
$format
= 5; }
$self
->throw(
"Bio::SearchIO::sim4 module cannot parse 'type $format' outputs."
)
if
$format
;
}
if
( /^seq1\s*=\s*(\S+),\s+(\d+)/ ) {
my
(
$nm
,
$desc
) = ($1,$2);
if
( !
$seentop
) {
$self
->element( {
'Name'
=>
'Sim4Output_query-def'
,
'Data'
=>
$nm
} );
$self
->element( {
'Name'
=>
'Sim4Output_query-len'
,
'Data'
=>
$desc
} );
$seentop
= 1;
}
elsif
(
$nm
ne
$lastquery
) {
$self
->_pushback(
$_
);
last
;
}
$lastquery
=
$nm
;
$self
->end_element({
'Name'
=>
'Hsp'
})
if
(
$self
->in_element(
'hsp'
) );
if
(
$self
->in_element(
'hit'
) ) {
foreach
(
@hsps
) {
$self
->start_element({
'Name'
=>
'Hsp'
});
while
(
my
(
$name
,
$data
) =
each
%$_
) {
$self
->{
'_currentHSP'
}{
$name
} =
$data
;
}
$self
->end_element({
'Name'
=>
'Hsp'
});
$self
->{
'_currentHSP'
} = {};
}
$format
= 0
if
@hsps
;
@hsps
= ();
%alignment
= ();
$qfull
= 0;
$hit_direction
= 1;
$self
->end_element({
'Name'
=>
'Hit'
});
}
}
elsif
( /^seq2\s*=\s*(\S+)\s+\(>?(\S+)\s*\),\s*(\d+)/ ) {
$self
->start_element({
'Name'
=>
'Hit'
});
$self
->element( {
'Name'
=>
'Hit_id'
,
'Data'
=> $2} );
$self
->element( {
'Name'
=>
'Hit_desc'
,
'Data'
=> $1} );
$self
->element( {
'Name'
=>
'Hit_len'
,
'Data'
=> $3} );
}
elsif
( /^>(\S+)\s*(.*)?/ ) {
if
(
$qfull
) {
$format
= 4
if
!
$format
;
$self
->element({
'Name'
=>
'Hit_desc'
,
'Data'
=> $2});
}
else
{
$self
->element({
'Name'
=>
'Sim4Output_query-desc'
,
'Data'
=> $2});
$qfull
= 1;
}
}
elsif
( /^\(complement\)/ ) {
$hit_direction
= -1;
}
elsif
( /^\(?(\d+)\-(\d+)\)?\s+\(?(\d+)\-(\d+)\)?\s+(\d+)/ ) {
my
(
$qs
,
$qe
,
$hs
,
$he
,
$pid
) = ($1,$2,$3,$4,$5);
push
@hsps
, {
'Hsp_query-from'
=>
$qs
,
'Hsp_query-to'
=>
$qe
,
'Hsp_hit-from'
=>
$hit_direction
>= 0 ?
$hs
:
$he
,
'Hsp_hit-to'
=>
$hit_direction
>= 0 ?
$he
:
$hs
,
'Hsp_identity'
=> 0,
'Hsp_qlength'
=>
abs
(
$qe
-
$qs
) + 1,
'Hsp_hlength'
=>
abs
(
$he
-
$hs
) + 1,
'Hsp_align-len'
=>
abs
(
$qe
-
$qs
) + 1,
};
}
elsif
( /^\s+(\d+)\s/ ) {
for
(
my
$i
= 0;
defined
(
$_
) &&
$i
< 4;
$i
++ ) {
my
(
$start
,
$string
) = /^\s+(\d*)\s(.*)/;
$alignment
{
$ALIGN_TYPES
{
$i
}} = {
start
=>
$start
,
string
=>
$i
!= 2
?
$string
: (
' '
x (
length
(
$alignment
{
$ALIGN_TYPES
{
$i
-1}}{string}) -
length
(
$string
))) .
$string
};
$_
=
$self
->_readline();
}
if
(
$alignment
{Ruler}{start} == 0) {
$format
=
@hsps
? 3 : 1
if
!
$format
;
$self
->end_element({
'Name'
=>
'Hsp'
})
if
(
$self
->in_element(
'hsp'
) );
$self
->start_element({
'Name'
=>
'Hsp'
});
$self
->{
'_currentHSP'
} =
@hsps
?
shift
@hsps
: {
'Hsp_query-from'
=>
$alignment
{Query}{start},
'Hsp_hit-from'
=>
$alignment
{Sbjct}{start},
}
}
if
(
$alignment
{Mid}{string} =~ /<|>/g ) {
my
(
$hsp_start
,
$hsp_end
);
if
(
$self
->in_element(
'hsp'
) ) {
$hsp_end
= (
pos
$alignment
{Mid}{string}) - 1;
$self
->{
'_currentHSP'
}{
'Hsp_querygaps'
} +=
(
$self
->{
'_currentHSP'
}{
'Hsp_qseq'
} .=
substr
(
$alignment
{Query}{string}, 0,
$hsp_end
)) =~ s/ /-/g;
$self
->{
'_currentHSP'
}{
'Hsp_hitgaps'
} +=
(
$self
->{
'_currentHSP'
}{
'Hsp_hseq'
} .=
substr
(
$alignment
{Sbjct}{string}, 0,
$hsp_end
)) =~ s/ /-/g;
(
$self
->{
'_currentHSP'
}{
'Hsp_midline'
} .=
substr
(
$alignment
{Mid}{string}, 0,
$hsp_end
)) =~ s/-/ /g;
$self
->end_element({
'Name'
=>
'Hsp'
});
if
(
$alignment
{Mid}{string} =~ /\|/g ) {
$hsp_start
= (
pos
$alignment
{Mid}{string}) - 1;
$self
->start_element({
'Name'
=>
'Hsp'
});
$self
->{
'_currentHSP'
} =
@hsps
?
shift
@hsps
: {};
$self
->{
'_currentHSP'
}{
'Hsp_querygaps'
} +=
(
$self
->{
'_currentHSP'
}{
'Hsp_qseq'
} =
substr
(
$alignment
{Query}{string},
$hsp_start
)) =~ s/ /-/g;
$self
->{
'_currentHSP'
}{
'Hsp_hitgaps'
} +=
(
$self
->{
'_currentHSP'
}{
'Hsp_hseq'
} =
substr
(
$alignment
{Sbjct}{string},
$hsp_start
)) =~ s/ /-/g;
(
$self
->{
'_currentHSP'
}{
'Hsp_midline'
} =
substr
(
$alignment
{Mid}{string},
$hsp_start
)) =~ s/-/ /g;
}
}
else
{
$hsp_start
=
index
(
$alignment
{Mid}{string},
'|'
);
$self
->start_element({
'Name'
=>
'Hsp'
});
$self
->{
'_currentHSP'
} =
@hsps
?
shift
@hsps
: {
'Hsp_query-from'
=>
$alignment
{Query}{start},
};
$self
->{
'_currentHSP'
}{
'Hsp_querygaps'
} +=
(
$self
->{
'_currentHSP'
}{
'Hsp_qseq'
} =
substr
(
$alignment
{Query}{string},
$hsp_start
)) =~ s/ /-/g;
$self
->{
'_currentHSP'
}{
'Hsp_hitgaps'
} +=
(
$self
->{
'_currentHSP'
}{
'Hsp_hseq'
} =
substr
(
$alignment
{Sbjct}{string},
$hsp_start
)) =~ s/ /-/g;
(
$self
->{
'_currentHSP'
}{
'Hsp_midline'
} =
substr
(
$alignment
{Mid}{string},
$hsp_start
)) =~ s/-/ /g;
next
;
}
}
else
{
if
( !
$self
->in_element(
'hsp'
) ) {
$self
->start_element({
'Name'
=>
'Hsp'
});
$self
->{
'_currentHSP'
} =
@hsps
?
shift
@hsps
: {
'Hsp_query-from'
=>
$alignment
{Query}{start},
'Hsp_hit-from'
=>
$alignment
{Sbjct}{start},
}
}
$self
->{
'_currentHSP'
}{
'Hsp_query-from'
} ||=
$alignment
{Query}{start} -
length
(
$self
->{
'_currentHSP'
}{
'Hsp_qseq'
} ||
''
);
$self
->{
'_currentHSP'
}{
'Hsp_hit-from'
} ||=
$alignment
{Sbjct}{start} -
length
(
$self
->{
'_currentHSP'
}{
'Hsp_hseq'
} ||
''
);
$self
->{
'_currentHSP'
}{
'Hsp_querygaps'
} +=
(
$self
->{
'_currentHSP'
}{
'Hsp_qseq'
} .=
$alignment
{Query}{string}) =~ s/ /-/g;
$self
->{
'_currentHSP'
}{
'Hsp_hitgaps'
} +=
(
$self
->{
'_currentHSP'
}{
'Hsp_hseq'
} .=
$alignment
{Sbjct}{string}) =~ s/ /-/g;
(
$self
->{
'_currentHSP'
}{
'Hsp_midline'
} .=
$alignment
{Mid}{string}) =~ s/-/ /g;
}
}
}
if
(
$seentop
) {
$self
->end_element({
'Name'
=>
'Hsp'
})
if
(
$self
->in_element(
'hsp'
) );
if
(
$self
->in_element(
'hit'
) ) {
foreach
(
@hsps
) {
$self
->start_element({
'Name'
=>
'Hsp'
});
while
(
my
(
$name
,
$data
) =
each
%$_
) {
$self
->{
'_currentHSP'
}{
$name
} =
$data
;
}
$self
->end_element({
'Name'
=>
'Hsp'
});
}
$self
->end_element({
'Name'
=>
'Hit'
});
}
$self
->element({
'Name'
=>
'Sim4Output_program'
,
'Data'
=>
$DEFAULTFORMAT
.
' (A='
. (
defined
$format
?
$format
:
'?'
) .
')'
});
$self
->end_element({
'Name'
=>
'Sim4Output'
});
return
$self
->end_document();
}
return
;
}
sub
start_element{
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
if
(
$type
) {
if
(
$self
->_will_handle(
$type
) ) {
my
$func
=
sprintf
(
"start_%s"
,
lc
$type
);
$self
->_eventHandler->
$func
(
$data
->{
'Attributes'
});
}
unshift
@{
$self
->{
'_elements'
}},
$type
;
if
(
$type
eq
'result'
) {
$self
->{
'_values'
} = {};
$self
->{
'_result'
}=
undef
;
}
}
}
sub
end_element {
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
my
$rc
;
if
(
$nm
eq
'Hsp'
) {
$self
->{
'_currentHSP'
}{
'Hsp_midline'
} ||=
''
;
$self
->{
'_currentHSP'
}{
'Hsp_query-to'
} ||=
$self
->{
'_currentHSP'
}{
'Hsp_query-from'
} +
length
(
$self
->{
'_currentHSP'
}{
'Hsp_qseq'
}) - 1 -
$self
->{
'_currentHSP'
}{
'Hsp_querygaps'
};
$self
->{
'_currentHSP'
}{
'Hsp_hit-to'
} ||=
$self
->{
'_currentHSP'
}{
'Hsp_hit-from'
} +
length
(
$self
->{
'_currentHSP'
}{
'Hsp_hseq'
}) - 1 -
$self
->{
'_currentHSP'
}{
'Hsp_hitgaps'
};
$self
->{
'_currentHSP'
}{
'Hsp_identity'
} ||=
(
$self
->{
'_currentHSP'
}{
'Hsp_midline'
} =~
tr
/\|//);
$self
->{
'_currentHSP'
}{
'Hsp_qlength'
} ||=
abs
(
$self
->{
'_currentHSP'
}{
'Hsp_query-to'
} -
$self
->{
'_currentHSP'
}{
'Hsp_query-from'
}) + 1;
$self
->{
'_currentHSP'
}{
'Hsp_hlength'
} ||=
abs
(
$self
->{
'_currentHSP'
}{
'Hsp_hit-to'
} -
$self
->{
'_currentHSP'
}{
'Hsp_hit-from'
}) + 1;
$self
->{
'_currentHSP'
}{
'Hsp_align-len'
} ||=
abs
(
$self
->{
'_currentHSP'
}{
'Hsp_query-to'
} -
$self
->{
'_currentHSP'
}{
'Hsp_query-from'
}) + 1;
$self
->{
'_currentHSP'
}{
'Hsp_score'
} ||=
int
(100 * (
$self
->{
'_currentHSP'
}{
'Hsp_identity'
} /
$self
->{
'_currentHSP'
}{
'Hsp_align-len'
}));
foreach
(
keys
%{
$self
->{
'_currentHSP'
}}) {
$self
->element({
'Name'
=>
$_
,
'Data'
=>
delete
${
$self
->{
'_currentHSP'
}}{
$_
}});
}
}
if
(
$type
=
$MODEMAP
{
$nm
} ) {
if
(
$self
->_will_handle(
$type
) ) {
my
$func
=
sprintf
(
"end_%s"
,
lc
$type
);
$rc
=
$self
->_eventHandler->
$func
(
$self
->{
'_reporttype'
},
$self
->{
'_values'
});
}
shift
@{
$self
->{
'_elements'
}};
}
elsif
(
$MAPPING
{
$nm
} ) {
if
(
ref
(
$MAPPING
{
$nm
}) =~ /hash/i ) {
my
$key
= (
keys
%{
$MAPPING
{
$nm
}})[0];
$self
->{
'_values'
}->{
$key
}->{
$MAPPING
{
$nm
}->{
$key
}} =
$self
->{
'_last_data'
};
}
else
{
$self
->{
'_values'
}->{
$MAPPING
{
$nm
}} =
$self
->{
'_last_data'
};
}
}
else
{
$self
->debug(
"unknown nm $nm, ignoring\n"
);
}
$self
->{
'_last_data'
} =
''
;
$self
->{
'_result'
} =
$rc
if
(
defined
$type
&&
$type
eq
'result'
);
return
$rc
;
}
sub
element{
my
(
$self
,
$data
) =
@_
;
$self
->start_element(
$data
);
$self
->characters(
$data
);
$self
->end_element(
$data
);
}
sub
characters{
my
(
$self
,
$data
) =
@_
;
return
unless
(
defined
$data
->{
'Data'
} &&
$data
->{
'Data'
} !~ /^\s+$/ );
if
(
$self
->in_element(
'hsp'
) &&
$data
->{
'Name'
} =~ /Hsp\_(qseq|hseq|midline)/ ) {
$self
->{
'_last_hspdata'
}->{
$data
->{
'Name'
}} .=
$data
->{
'Data'
};
}
$self
->{
'_last_data'
} =
$data
->{
'Data'
};
}
sub
within_element{
my
(
$self
,
$name
) =
@_
;
return
0
if
( !
defined
$name
&&
!
defined
$self
->{
'_elements'
} ||
scalar
@{
$self
->{
'_elements'
}} == 0) ;
foreach
( @{
$self
->{
'_elements'
}} ) {
if
(
$_
eq
$name
) {
return
1;
}
}
return
0;
}
sub
in_element{
my
(
$self
,
$name
) =
@_
;
return
0
if
!
defined
$self
->{
'_elements'
}->[0];
return
(
$self
->{
'_elements'
}->[0] eq
$name
)
}
sub
start_document{
my
(
$self
) =
@_
;
$self
->{
'_lasttype'
} =
''
;
$self
->{
'_values'
} = {};
$self
->{
'_result'
}=
undef
;
$self
->{
'_elements'
} = [];
$self
->{
'_reporttype'
} =
$DEFAULTFORMAT
;
}
sub
end_document{
my
(
$self
,
@args
) =
@_
;
return
$self
->{
'_result'
};
}
sub
write_result {
my
(
$self
,
$blast
,
@args
) =
@_
;
if
( not
defined
(
$self
->writer) ) {
$self
->
warn
(
"Writer not defined. Using a $DEFAULT_WRITER_CLASS"
);
$self
->writer(
$DEFAULT_WRITER_CLASS
->new() );
}
$self
->SUPER::write_result(
$blast
,
@args
);
}
sub
result_count {
return
1;
}
sub
report_count {
shift
->result_count }
sub
_will_handle {
my
(
$self
,
$type
) =
@_
;
my
$handler
=
$self
->{
'_handler_cache'
} ||=
$self
->_eventHandler;
my
$will_handle
=
defined
(
$self
->{
'_will_handle_cache'
}->{
$type
})
?
$self
->{
'_will_handle_cache'
}->{
$type
}
: (
$self
->{
'_will_handle_cache'
}->{
$type
} =
$handler
->will_handle(
$type
));
return
$will_handle
?
$handler
:
undef
;
}
1;