$Bio::Tools::Genewise::VERSION
=
'1.7.8'
;
$Srctag
=
'genewise'
;
sub
new {
my
(
$class
,
@args
) =
@_
;
my
$self
=
$class
->SUPER::new(
@args
);
$self
->_initialize_io(
@args
);
return
$self
;
}
sub
_get_strand {
my
(
$self
,
$start
,
$end
) =
@_
;
defined
(
$start
) ||
$self
->throw(
"Need a start"
);
defined
(
$end
) ||
$self
->throw(
"Need an end"
);
my
$strand
;
if
(
$start
>
$end
) {
my
$tmp
=
$start
;
$start
=
$end
;
$end
=
$tmp
;
$strand
= -1;
}
else
{
$strand
= 1;
}
return
(
$start
,
$end
,
$strand
);
}
sub
_score {
my
$self
=
shift
;
return
$self
->{
'_score'
} =
shift
if
@_
;
return
$self
->{
'_score'
};
}
sub
_prot_id {
my
$self
=
shift
;
return
$self
->{
'_prot_id'
} =
shift
if
@_
;
return
$self
->{
'_prot_id'
};
}
sub
_target_id {
my
$self
=
shift
;
return
$self
->{
'_target_id'
} =
shift
if
@_
;
return
$self
->{
'_target_id'
};
}
sub
next_prediction {
my
(
$self
) =
@_
;
unless
(
$self
->parsed ){
$self
->_parse_genes;
$self
->parsed(1);
}
return
shift
@{
$self
->{
'_genes'
}};
}
sub
parsed {
my
$self
=
shift
;
return
$self
->{
'_parsed'
} = 1
if
@_
&&
$_
[0];
return
$self
->{
'_parsed'
};
}
sub
_parse_genes {
my
(
$self
) =
@_
;
my
(
@alignments
,
@genes
);
local
($/) =
"//"
;
while
(
defined
(
$_
=
$self
->_readline) ) {
$self
->debug(
$_
);
while
( /Alignment\s+(\d+)\s+Score\s+(\S+)\s+\(Bits\)/g ) {
$alignments
[$1] = $2;
}
if
( /Score\s+(\-?\d+(\.\d+)?)/ ) {
$self
->_score($1);
}
if
( /Query\s+(?:protein|model)\:\s+(\S+)/ ) {
$self
->_prot_id($1);
}
if
( /Target Sequence\s+(\S+)/ ) {
$self
->_target_id($1);
}
next
unless
/Gene\s+\d+\n/;
my
@genes_txt
=
split
(/Gene\s+\d+\n/,
$_
);
shift
@genes_txt
;
my
$gene_num
= 1;
foreach
my
$gene_txt
(
@genes_txt
) {
my
$score
=
$alignments
[
$gene_num
];
my
(
$g_start
,
$g_end
,
$type
) =
$gene_txt
=~ m/Gene\s+
(\d+)[\s-]+
(\d+)\s+
(?:\[(\w+)\])?
/x;
my
$g_strand
;
my
$source_tag
=
$type
?
"$Srctag"
.
"_$type"
:
$Srctag
;
my
$genes
= Bio::SeqFeature::Gene::GeneStructure->new
(
-source
=>
$source_tag
,
-score
=>
$self
->_score,
);
$genes
->add_tag_value(
'ID'
,
$self
->_prot_id.
".gene"
);
my
$transcript
= Bio::SeqFeature::Gene::Transcript->new
(
-source
=>
$source_tag
,
-score
=>
$score
);
(
$g_start
,
$g_end
,
$g_strand
) =
$self
->_get_strand(
$g_start
,
$g_end
);
$genes
->strand(
$g_strand
);
my
@exons
;
unless
(
@exons
=
$gene_txt
=~ m/(Exon .+\s+Supporting .+)/g ) {
@exons
=
$gene_txt
=~ m/(Exon .+\s+)/g;
}
my
$nbr
= 1;
foreach
my
$e
(
@exons
){
my
(
$e_start
,
$e_end
,
$phase
) =
$e
=~ m/Exon\s+
(\d+)[\s-]+
(\d+)\s+
phase\s+(\d+)
/x;
my
$e_strand
;
(
$e_start
,
$e_end
,
$e_strand
) =
$self
->_get_strand(
$e_start
,
$e_end
);
$transcript
->strand(
$e_strand
)
unless
$transcript
->strand != 0;
my
$exon
= Bio::SeqFeature::Gene::Exon->new
(
-seq_id
=>
$self
->_target_id,
-source
=>
$source_tag
,
-start
=>
$e_start
,
-end
=>
$e_end
,
-score
=>
$score
,
-strand
=>
$e_strand
);
$exon
->add_tag_value(
'phase'
,
$phase
);
$exon
->is_coding(1);
if
(
$self
->_prot_id ) {
$exon
->add_tag_value(
'Parent'
,
$self
->_prot_id);
}
$exon
->add_tag_value(
"Exon"
,
$nbr
++);
if
(
$e
=~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) {
my
(
$geno_start
,
$geno_end
,
$prot_start
,
$prot_end
) = ($1,$2,$3,$4);
my
$prot_strand
;
(
$prot_start
,
$prot_end
,
$prot_strand
) =
$self
->_get_strand(
$prot_start
,
$prot_end
);
my
$pf
= Bio::SeqFeature::Generic->new
(
-start
=>
$prot_start
,
-end
=>
$prot_end
,
-seq_id
=>
$self
->_prot_id,
-score
=>
$score
,
-strand
=>
$prot_strand
,
-source
=>
$source_tag
,
-primary_tag
=>
'supporting_protein_feature'
,);
my
$geno_strand
;
(
$geno_start
,
$geno_end
,
$geno_strand
) =
$self
->_get_strand(
$geno_start
,
$geno_end
);
my
$gf
= Bio::SeqFeature::Generic->new
(
-start
=>
$geno_start
,
-end
=>
$geno_end
,
-seq_id
=>
$self
->_target_id,
-score
=>
$score
,
-strand
=>
$geno_strand
,
-source
=>
$source_tag
,
-primary_tag
=>
'supporting_genomic_feature'
,);
my
$fp
= Bio::SeqFeature::FeaturePair->new
(
-feature1
=>
$gf
,
-feature2
=>
$pf
);
$exon
->add_tag_value(
'supporting_feature'
,
$fp
);
if
(
$self
->_prot_id ) {
$exon
->add_tag_value(
'Target'
,
'Protein:'
.
$self
->_prot_id);
$exon
->add_tag_value(
'Target'
,
$prot_start
);
$exon
->add_tag_value(
'Target'
,
$prot_end
);
}
}
$transcript
->add_exon(
$exon
);
}
$transcript
->seq_id(
$self
->_target_id);
$transcript
->add_tag_value(
'ID'
,
$self
->_prot_id);
$transcript
->add_tag_value(
'Parent'
,
$self
->_prot_id.
".gene"
);
$genes
->add_transcript(
$transcript
);
$genes
->seq_id(
$self
->_target_id);
push
@genes
,
$genes
;
$gene_num
++;
}
}
$self
->{
'_genes'
} = \
@genes
;
}
1;