$Bio::MUST::Core::Seq::VERSION
=
'0.250380'
;
has
'seq_id'
=> (
is
=>
'rw'
,
isa
=>
'Bio::MUST::Core::SeqId'
,
required
=> 1,
coerce
=> 1,
handles
=>
qr{.*}
xms,
);
has
'seq'
=> (
traits
=> [
'String'
],
is
=>
'ro'
,
isa
=>
'Bio::MUST::Core::Types::Seq'
,
default
=>
q{}
,
coerce
=> 1,
writer
=>
'_set_seq'
,
handles
=> {
seq_len
=>
'length'
,
append_seq
=>
'append'
,
replace_seq
=>
'replace'
,
edit_seq
=>
'substr'
,
},
);
sub
clone {
my
$self
=
shift
;
return
$self
->new(
seq_id
=>
$self
->full_id,
seq
=>
$self
->seq
);
}
sub
is_protein {
my
$self
=
shift
;
return
1
if
$self
->seq =~
$PROTLIKE
;
return
0;
}
sub
is_rna {
my
$self
=
shift
;
return
1
if
$self
->seq =~
$RNALIKE
&& (not
$self
->is_protein);
return
0;
}
sub
is_aligned {
my
$self
=
shift
;
return
1
if
$self
->seq =~
$GAP
;
return
0;
}
sub
is_subseq_of {
my
$self
=
shift
;
my
$seq2
=
shift
;
$self
=
$self
->raw_str;
$seq2
=
$seq2
->isa(
'Bio::MUST::Core::Seq'
)
?
$seq2
->raw_str : _strip_gaps(
$seq2
);
return
1
if
$seq2
=~ m/
$self
/xmsi;
return
0;
}
sub
is_superseq_of {
my
$self
=
shift
;
my
$seq2
=
shift
;
$self
=
$self
->raw_str;
$seq2
=
$seq2
->isa(
'Bio::MUST::Core::Seq'
)
?
$seq2
->raw_str : _strip_gaps(
$seq2
);
return
1
if
$self
=~ m/
$seq2
/xmsi;
return
0;
}
sub
first_site {
my
$self
=
shift
;
my
(
$leading_gaps
) =
$self
->seq =~ m{ \A (
$GAP
+) }xms;
return
length
$leading_gaps
// 0;
}
sub
uc
{
my
$self
=
shift
;
$self
->_set_seq(
uc
$self
->seq );
return
$self
;
}
sub
uc_seq {
carp
'[BMC] Warning: Method uc_seq is deprecated; use uc instead!'
;
return
shift
->
uc
(
@_
);
}
sub
recode {
my
$self
=
shift
;
my
$base_for
=
shift
;
my
@states
=
$self
->all_states;
my
@rec_states
;
for
my
$state
(
@states
) {
my
$rec_state
=
$base_for
->{
$state
} //
$FRAMESHIFT
;
push
@rec_states
,
$rec_state
;
}
my
$new_seq
=
join
q{}
,
@rec_states
;
$self
->_set_seq(
$new_seq
);
return
$self
;
}
sub
recode_seq {
carp
'[BMC] Warning: Method recode_seq is deprecated; use recode instead!'
;
return
shift
->recode(
@_
);
}
sub
degap {
my
$self
=
shift
;
$self
->_set_seq(
$self
->raw_str);
return
$self
;
}
sub
gapify {
my
$self
=
shift
;
my
$char
=
shift
//
'*'
;
my
$regex
=
$PROTMISS
;
unless
(
$self
->is_protein) {
$regex
=
$DNAMISS
;
$char
=
'N'
if
$char
=~
$DNAMISS
;
}
(
my
$seq
=
$self
->seq ) =~ s{
$regex
}{
$char
}xmsg;
$self
->_set_seq(
$seq
);
return
$self
;
}
sub
spacify {
my
$self
=
shift
;
my
$seq
=
$self
->seq;
$seq
=~ s{ (
$GAP
+ \ + ) }{
' '
x
length
($1) }xmseg;
$seq
=~ s{ ( \ +
$GAP
+ ) }{
' '
x
length
($1) }xmseg;
$self
->_set_seq(
$seq
);
return
$self
;
}
sub
trim {
my
$self
=
shift
;
$self
->replace_seq(
qr{ $GAP+\z }
xms,
q{}
);
return
$self
;
}
sub
pad_to {
my
$self
=
shift
;
my
$bound
=
shift
;
$self
->append_seq(
q{ }
x (
$bound
-
$self
->seq_len) );
return
$self
;
}
sub
clear_new_tag {
my
$self
=
shift
;
(
my
$full_id
=
$self
->full_id) =~ s{
$NEW_TAG
\z}{}xms;
$self
->set_seq_id( SeqId->new(
full_id
=>
$full_id
) );
return
$self
;
}
sub
all_states {
my
$self
=
shift
;
return
split
//,
$self
->seq;
}
sub
state_at {
return
shift
->edit_seq(
@_
, 1);
}
sub
delete_site {
my
$self
=
shift
;
$self
->edit_seq(
@_
, 1,
q{}
);
return
$self
;
}
sub
is_missing {
my
$self
=
shift
;
my
$site
=
shift
;
my
$state
=
$self
->state_at(
$site
);
return
1
if
$state
=~
$PROTMISS
;
return
1
if
$state
=~
$DNAMISS
&& (not
$self
->is_protein);
return
0;
}
sub
is_gap {
my
$self
=
shift
;
my
$site
=
shift
;
return
1
if
$self
->state_at(
$site
) =~
$GAP
;
return
0;
}
around
qw(purity reverse_complemented_seq codons)
=>
sub
{
my
$method
=
shift
;
my
$self
=
shift
;
if
(
$self
->is_protein) {
carp
'[BMC] Warning: sequence looks like a protein; returning undef!'
;
return
undef
;
}
return
$self
->
$method
(
@_
);
};
sub
nomiss_seq_len {
my
$self
=
shift
;
my
$regex
=
$self
->is_protein ?
$PROTMISS
:
$DNAMISS
;
(
my
$raw_str
=
$self
->raw_str) =~ s/
$regex
//xmsg;
return
length
$raw_str
;
}
sub
purity {
my
$self
=
shift
;
(
my
$pure_seq
=
$self
->seq) =~ s/
$NONPUREDNA
//xmsg;
my
$purity
= 1.0 *
length
(
$pure_seq
) /
$self
->seq_len;
return
$purity
;
}
sub
reverse_complemented_seq {
my
$self
=
shift
;
my
$new_seq
=
scalar
reverse
$self
->seq;
$new_seq
=~
tr
/ATUGCYRSWKMBDHVN/TAACGRYSWMKVHDBN/;
$new_seq
=~
tr
/atugcyrswkmbdhvn/taacgryswmkvhdbn/;
return
$self
->new(
seq_id
=>
$self
->full_id,
seq
=>
$new_seq
);
}
sub
spliced_seq {
my
$self
=
shift
;
my
$blocks
=
shift
;
my
$new_seq
;
my
$seq
=
$self
->seq;
for
my
$block
( @{
$blocks
} ) {
my
(
$start
,
$end
) = @{
$block
};
$new_seq
.=
substr
$seq
,
$start
- 1,
$end
-
$start
+ 1;
}
return
$self
->new(
seq_id
=>
$self
->full_id,
seq
=>
$new_seq
);
}
sub
raw_str {
my
$self
=
shift
;
return
_strip_gaps(
$self
->seq);
}
sub
raw_seq {
carp
'[BMC] Warning: Method raw_seq is deprecated; use raw_str instead!'
;
return
shift
->raw_str(
@_
);
}
sub
wrapped_str {
my
$self
=
shift
;
my
$chunk
=
shift
// 60;
my
$nowrap
=
$chunk
< 0 ? 1 : 0;
my
$width
=
$self
->seq_len;
$chunk
=
$width
if
$nowrap
;
my
$str
;
for
(
my
$site
= 0;
$site
<
$width
;
$site
+=
$chunk
) {
$str
.=
$self
->edit_seq(
$site
,
$chunk
) .
"\n"
;
}
return
$str
;
}
sub
codons {
my
$self
=
shift
;
my
$frame
=
shift
// 1;
my
$dna
=
$frame
< 0 ?
$self
->reverse_complemented_seq->seq :
$self
->seq;
$dna
=~
tr
/Uu/Tt/;
my
@codons
;
for
(
my
$i
= (
abs
$frame
) - 1;
$i
<
length
$dna
;
$i
+= 3) {
my
$codon
=
substr
$dna
,
$i
, 3;
push
@codons
,
$codon
if
length
$codon
== 3;
}
return
\
@codons
;
}
sub
_strip_gaps {
my
$seq
=
shift
;
$seq
=~ s/
$GAP
+//xmsg;
return
$seq
;
}
__PACKAGE__->meta->make_immutable;
1;