Hide Show 78 lines of Pod
use
constant
INTERNAL_SEPARATOR
=>
'!:!'
;
$DB_BTREE
->{
'flags'
} = R_DUP;
Hide Show 18 lines of Pod
sub
new {
my
(
$class
,
@args
) =
@_
;
my
$self
=
$class
->SUPER::new(
@args
);
my
(
$dat_dir
,
$index_dir
,
$tax_db
,
$force
) =
$self
->_rearrange([
qw(DAT_DIR INDEX_DIR TAX_DB FORCE)
],
@args
);
$self
->throw(
"At least one of -dat_dir and -index_dir must be supplied"
)
unless
(
$dat_dir
||
$index_dir
);
$self
->index_directory(
$index_dir
||
$dat_dir
);
$self
->{_tax_db} =
$tax_db
if
$tax_db
;
if
(
$dat_dir
) {
$self
->_build_index(
$dat_dir
,
$force
);
}
$self
->_db_connect;
return
$self
;
}
Hide Show 4 lines of Pod
sub
_get_ids {
my
(
$self
,
$dat
,
@args
) =
@_
;
@args
% 2 == 0 ||
$self
->throw(
"Must provide key => value pairs"
);
my
$hash
=
$self
->{
$dat
} ||
$self
->throw(
"Unknown .dat type '$dat'"
);
if
(
@args
) {
my
@final
;
my
%args
=
@args
;
my
$multiple
= 0;
while
(
my
(
$type
,
$value
) =
each
%args
) {
unless
(
$value
) {
$self
->
warn
(
"Arguement '$type' has no value, ignored"
);
next
;
}
$type
=~ s/-//;
$type
=
lc
(
$type
);
my
$converter
=
$hash
->{
$type
};
unless
(
$converter
) {
$self
->
warn
(
"Unknown search type '$type' for .dat type '$dat'"
);
next
;
}
my
@ids
=
$converter
->get_dup(
$value
);
unless
(
@ids
) {
@ids
=
$converter
->get_dup(
lc
(
$value
));
}
if
(
$multiple
) {
@final
||
return
;
my
%final
=
map
{
$_
=> 1 }
@final
;
@final
=
grep
{
$final
{
$_
} }
@ids
;
}
else
{
@final
=
@ids
;
$multiple
++;
}
}
return
@final
;
}
else
{
my
$db_file_hash
=
$self
->{
$dat
}->{id};
my
(
$key
,
$prev_key
,
$value
) = (
'_!_'
,
'!_!'
);
my
@ids
;
while
(1) {
$db_file_hash
->seq(
$key
,
$value
, R_NEXT);
last
if
$prev_key
eq
$key
;
push
(
@ids
,
$value
);
$prev_key
=
$key
;
}
return
@ids
;
}
}
Hide Show 10 lines of Pod
sub
get_reference {
my
(
$self
,
$id
) =
@_
;
$id
||
return
;
my
$data
=
$self
->{reference}->{data}->{
$id
} ||
return
;
my
@data
=
split
(SEPARATOR,
$data
);
return
Bio::Annotation::Reference->new(
-pubmed
=>
$data
[0],
-authors
=>
$data
[1],
-title
=>
$data
[2],
-location
=>
$data
[3] );
}
Hide Show 11 lines of Pod
sub
get_genemap {
my
(
$self
,
$id
,
$upstream
) =
@_
;
$id
||
return
;
return
$self
->{got_map}->{
$id
}
if
defined
$self
->{got_map}->{
$id
};
$upstream
||= 1000;
my
$data
=
$self
->{gene}->{data}->{
$id
} ||
return
;
my
@data
=
split
(SEPARATOR,
$data
);
my
$taxon
=
$self
->{_tax_db} ?
$self
->{_tax_db}->get_taxon(
$data
[3]) ||
$data
[3] :
$data
[3];
my
$map
= Bio::Map::GeneMap->get(
-uid
=>
$id
,
-gene
=>
$data
[1],
-species
=>
$taxon
,
-description
=>
$data
[2],
-upstream
=>
$upstream
);
$self
->{got_map}->{
$id
} =
$map
;
foreach
my
$sid
(
$self
->get_site_ids(
-gene
=>
$id
)) {
foreach
my
$fid
(
$self
->get_factor_ids(
-site
=>
$sid
)) {
no
warnings
"recursion"
;
$self
->get_factor(
$fid
);
}
}
return
$map
;
}
Hide Show 12 lines of Pod
sub
get_seq {
my
(
$self
,
$id
) =
@_
;
$id
||
return
;
my
$data
=
$self
->{site}->{data}->{
$id
} ||
return
;
my
@data
=
split
(SEPARATOR,
$data
);
my
$seq
= Bio::Seq->new(
-seq
=>
$data
[2],
-accession_number
=>
$id
,
-description
=>
$data
[6] ?
'Genomic sequence'
:
'Consensus or artificial sequence'
,
-id
=>
$data
[0],
-strand
=> 1,
-alphabet
=>
$data
[7] ||
'dna'
,
-species
=>
$data
[6]);
my
$annot
=
$seq
->annotation;
my
$sv
= Bio::Annotation::SimpleValue->new(
-tagname
=>
'relative_start'
,
-value
=>
$data
[4] || 1);
$annot
->add_Annotation(
$sv
);
$sv
= Bio::Annotation::SimpleValue->new(
-tagname
=>
'relative_end'
,
-value
=>
$data
[5] || (
$data
[4] || 1 +
length
(
$data
[2]) - 1));
$annot
->add_Annotation(
$sv
);
$sv
= Bio::Annotation::SimpleValue->new(
-tagname
=>
'relative_type'
,
-value
=>
$data
[3] ||
'artificial'
);
$annot
->add_Annotation(
$sv
);
$sv
= Bio::Annotation::SimpleValue->new(
-tagname
=>
'relative_to'
,
-value
=>
$data
[1]);
$annot
->add_Annotation(
$sv
);
return
$seq
;
}
Hide Show 10 lines of Pod
sub
get_fragment {
my
(
$self
,
$id
) =
@_
;
$id
||
return
;
my
$data
=
$self
->{fragment}->{data}->{
$id
} ||
return
;
my
@data
=
split
(SEPARATOR,
$data
);
return
new Bio::Seq(
-seq
=>
$data
[4],
-accession_number
=>
$id
,
-description
=>
'Between genes '
.
$data
[1].
' and '
.
$data
[2],
-species
=>
$data
[3],
-id
=>
$data
[0],
-alphabet
=>
'dna'
);
}
Hide Show 12 lines of Pod
sub
get_matrix {
my
(
$self
,
$id
,
$seq
) =
@_
;
$id
||
return
;
$seq
||=
'atgc'
;
$seq
=
lc
(
$seq
);
my
$data
=
$self
->{matrix}->{data}->{
$id
} ||
return
;
my
@data
=
split
(SEPARATOR,
$data
);
$data
[4] ||
$self
->throw(
"Matrix data missing for $id"
);
my
(
$a
,
$c
,
$g
,
$t
);
foreach
my
$position
(
split
(INTERNAL_SEPARATOR,
$data
[4])) {
my
(
$a_count
,
$c_count
,
$g_count
,
$t_count
) =
split
(
"\t"
,
$position
);
push
(@{
$a
},
$a_count
);
push
(@{
$c
},
$c_count
);
push
(@{
$g
},
$g_count
);
push
(@{
$t
},
$t_count
);
}
my
$a_freq
= (
$seq
=~
tr
/a//) /
length
(
$seq
);
my
$c_freq
= (
$seq
=~
tr
/c//) /
length
(
$seq
);
my
$g_freq
= (
$seq
=~
tr
/g//) /
length
(
$seq
);
my
$t_freq
= (
$seq
=~
tr
/t//) /
length
(
$seq
);
my
$psm
= new Bio::Matrix::PSM::SiteMatrix(
-pA
=>
$a
,
-pC
=>
$c
,
-pG
=>
$g
,
-pT
=>
$t
,
-id
=>
$data
[0],
-accession_number
=>
$id
,
-sites
=>
$data
[3],
-width
=>
scalar
(@{
$a
}),
-correction
=> 1,
-model
=> {
A
=>
$a_freq
,
C
=>
$c_freq
,
G
=>
$g_freq
,
T
=>
$t_freq
} );
return
$psm
;
}
Hide Show 15 lines of Pod
sub
get_aln {
my
(
$self
,
$id
,
$via_factors
) =
@_
;
$id
||
return
;
my
$data
=
$self
->{matrix}->{data}->{
$id
} ||
$self
->throw(
"matrix '$id' had no data in DB_File"
);
my
@data
=
split
(SEPARATOR,
$data
);
if
(!
$data
[5] &&
$via_factors
) {
my
%site_seqs
;
my
%factor_ids
;
foreach
my
$factor_id
(
$self
->get_factor_ids(
-matrix
=>
$id
)) {
$factor_ids
{
$factor_id
} = 1;
foreach
my
$site_id
(
$self
->get_site_ids(
-factor
=>
$factor_id
)) {
next
if
defined
$site_seqs
{
$site_id
};
my
$seq
=
$self
->get_seq(
$site_id
);
my
$seq_str
=
$seq
->seq ||
next
;
$seq_str
=~ /[MRWSYKVHDB]/ and
next
;
$site_seqs
{
$site_id
} =
$seq
;
}
}
my
@seqs
=
values
%site_seqs
;
if
(
@seqs
> 1) {
my
$matrix
=
$self
->get_matrix(
$id
);
my
$desired_sequences
=
$matrix
->sites;
return
if
@seqs
<
$desired_sequences
;
my
$desired_length
=
$matrix
->width;
my
%best_seqs
;
foreach
my
$seq
(
@seqs
) {
my
$for_str
=
$seq
->seq;
next
if
length
(
$for_str
) <
$desired_length
;
my
$rev_str
=
$seq
->revcom->seq;
my
$best_score
= 0;
my
$best_subseq
=
''
;
my
$best_i
= 0;
my
$best_subseq_caps
= 0;
my
$best_revcom
;
my
$revcom
= 0;
foreach
my
$seq_str
(
$for_str
,
$rev_str
) {
for
my
$i
(0..(
length
(
$seq_str
) -
$desired_length
)) {
my
$subseq
=
substr
(
$seq_str
,
$i
,
$desired_length
);
$subseq
=~ s/[^ACGTacgt]//g;
next
unless
length
(
$subseq
) ==
$desired_length
;
my
$score
=
$matrix
->sequence_match_weight(
$subseq
);
my
$caps
=
$subseq
=~
tr
/ACGT//;
if
(
$score
>
$best_score
|| (
"$score"
eq
"$best_score"
&&
$caps
>
$best_subseq_caps
)) {
$best_score
=
$score
;
$best_subseq_caps
=
$caps
;
$best_subseq
=
$subseq
;
$best_i
=
$i
;
$best_revcom
=
$revcom
;
}
}
$revcom
++;
}
if
(
$best_score
) {
$best_seqs
{
$seq
->accession_number} = [
$best_subseq
,
$seq
->accession_number, (
$best_i
+ 1),
$revcom
? -1 : 1,
$best_score
];
}
}
my
@sorted
=
sort
{
$best_seqs
{
$b
}->[-1] <=>
$best_seqs
{
$a
}->[-1] }
keys
%best_seqs
;
return
if
@sorted
<
$desired_sequences
;
splice
(
@sorted
,
$desired_sequences
);
my
%wanted
=
map
{
$_
=> 1 }
@sorted
;
my
@site_data
;
foreach
my
$seq
(
@seqs
) {
next
unless
exists
$wanted
{
$seq
->accession_number};
my
@data
= @{
$best_seqs
{
$seq
->accession_number}};
pop
(
@data
);
push
(
@site_data
,
join
(
'_'
,
@data
));
}
$data
[5] =
join
(INTERNAL_SEPARATOR,
@site_data
);
$self
->{matrix}->{data}->{
$id
} =
join
(SEPARATOR,
@data
);
}
}
$data
[5] ||
return
;
my
@blocks
=
split
(INTERNAL_SEPARATOR,
$data
[5]);
my
$longest
= 0;
foreach
(
@blocks
) {
my
(
$seq
) =
split
(
'_'
,
$_
);
my
$length
=
length
(
$seq
);
if
(
$length
>
$longest
) {
$longest
=
$length
;
}
}
foreach
my
$i
(0..
$#blocks
) {
my
$block
=
$blocks
[
$i
];
my
(
$seq
,
$seq_id
) =
split
(
'_'
,
$block
);
my
$length
=
length
(
$seq
);
if
(
$length
<
$longest
) {
my
$orig_seq
=
$seq
;
$seq
.=
'-'
x(
$longest
-
$length
);
$block
=~ s/^${orig_seq}_/${seq}_/;
$blocks
[
$i
] =
$block
;
}
}
my
$aln
= Bio::SimpleAlign->new(
-source
=>
'transfac_pro'
);
my
%done_ids
;
foreach
(
@blocks
) {
my
(
$seq
,
$seq_acc
,
$start
,
$strand
) =
split
(
'_'
,
$_
);
my
$seq_id
;
$done_ids
{
$seq_acc
}++;
if
(
$done_ids
{
$seq_acc
} > 1) {
$seq_id
=
$seq_acc
.
'_'
.
$done_ids
{
$seq_acc
};
}
else
{
$seq_id
=
$seq_acc
;
}
my
$gaps
=
$seq
=~
tr
/-//;
my
$length
=
length
(
$seq
) -
$gaps
;
$self
->throw(
"seq '$seq_id' for matrix '$id' had seq '$seq'"
)
unless
$length
;
$aln
->add_seq(Bio::LocatableSeq->new(
-seq
=>
$seq
,
-id
=>
$seq_id
,
-accession_number
=>
$seq_acc
,
-start
=>
$start
,
-end
=>
$start
+
$length
- 1,
-strand
=>
$strand
));
}
$aln
->id(
$id
);
return
$aln
;
}
Hide Show 10 lines of Pod
sub
get_factor {
my
(
$self
,
$id
) =
@_
;
$id
||
return
;
return
$self
->{got_factor}->{
$id
}
if
defined
$self
->{got_factor}->{
$id
};
my
$data
=
$self
->{factor}->{data}->{
$id
} ||
return
;
my
@data
=
split
(SEPARATOR,
$data
);
my
$tf
= Bio::Map::TranscriptionFactor->get(
-id
=>
$id
,
-universal_name
=>
$data
[1]);
foreach
my
$sid
(
$self
->get_site_ids(
-factor
=>
$id
)) {
my
$s_data
=
$self
->{site}->{data}->{
$sid
} ||
next
;
my
@s_data
=
split
(SEPARATOR,
$s_data
);
$s_data
[1] ||
next
;
$s_data
[4] ||
next
;
$s_data
[5] ||=
$s_data
[4] + (
$s_data
[2] ?
length
(
$s_data
[2]) - 1 : 0);
no
warnings
"recursion"
;
my
$gene_map
=
$self
->get_genemap(
$s_data
[1]) ||
next
;
return
$self
->{got_factor}->{
$id
}
if
defined
$self
->{got_factor}->{
$id
};
my
$rel
= new Bio::Map::Relative(
-element
=>
$gene_map
->gene,
-description
=>
$s_data
[3]);
Bio::Map::Position->new(
-map
=>
$gene_map
,
-element
=>
$tf
,
-start
=>
$s_data
[4],
-end
=>
$s_data
[5],
-relative
=>
$rel
);
}
$self
->{got_factor}->{
$id
} =
$tf
;
return
$tf
;
}
sub
get_factor_name {
my
(
$self
,
$id
) =
@_
;
my
$details
=
$self
->_get_factor_details(
$id
) ||
return
;
return
$details
->{name};
}
sub
get_factor_species {
my
(
$self
,
$id
) =
@_
;
my
$details
=
$self
->_get_factor_details(
$id
) ||
return
;
return
$details
->{species};
}
sub
get_factor_sequence {
my
(
$self
,
$id
) =
@_
;
my
$details
=
$self
->_get_factor_details(
$id
) ||
return
;
return
$details
->{sequence};
}
sub
_get_factor_details {
my
(
$self
,
$id
) =
@_
;
$id
||
return
;
return
$self
->{factor_details}->{
$id
}
if
defined
$self
->{factor_details}->{
$id
};
my
$data
=
$self
->{factor}->{data}->{
$id
} ||
return
;
my
@data
=
split
(SEPARATOR,
$data
);
my
%details
= (
name
=>
$data
[1],
species
=>
$data
[2],
sequence
=>
$data
[3]);
$self
->{factor_details}->{
$id
} = \
%details
;
return
\
%details
;
}
Hide Show 12 lines of Pod
sub
get_reference_ids {
my
$self
=
shift
;
return
$self
->_get_ids(
'reference'
,
@_
);
}
sub
get_gene_ids {
my
$self
=
shift
;
return
$self
->_get_ids(
'gene'
,
@_
);
}
Hide Show 12 lines of Pod
sub
get_site_ids {
my
$self
=
shift
;
return
$self
->_get_ids(
'site'
,
@_
);
}
Hide Show 12 lines of Pod
sub
get_matrix_ids {
my
$self
=
shift
;
return
$self
->_get_ids(
'matrix'
,
@_
);
}
Hide Show 13 lines of Pod
sub
get_factor_ids {
my
$self
=
shift
;
return
$self
->_get_ids(
'factor'
,
@_
);
}
Hide Show 12 lines of Pod
sub
get_fragment_ids {
my
$self
=
shift
;
return
$self
->_get_ids(
'fragment'
,
@_
);
}
Hide Show 4 lines of Pod
sub
_build_index {
my
(
$self
,
$dat_dir
,
$force
) =
@_
;
my
$index_dir
=
$self
->index_directory;
my
$gene_index
=
"$index_dir/gene.dat.index"
;
my
$reference_index
=
"$index_dir/reference.dat.index"
;
my
$matrix_index
=
"$index_dir/matrix.dat.index"
;
my
$factor_index
=
"$index_dir/factor.dat.index"
;
my
$fragment_index
=
"$index_dir/fragment.dat.index"
;
my
$site_index
=
"$index_dir/site.dat.index"
;
my
$reference_dat
=
"$dat_dir/reference.dat"
;
if
(! -e
$reference_index
||
$force
) {
open
(REF,
$reference_dat
) ||
$self
->throw(
"Cannot open reference file '$reference_dat' for reading"
);
my
%references
;
unlink
$reference_index
;
my
$ref
=
tie
(
%references
,
'DB_File'
,
$reference_index
, O_RDWR|O_CREAT, 0644,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$reference_index': $!"
);
my
%pubmed
;
my
$reference_pubmed
=
$reference_index
.
'.pubmed'
;
unlink
$reference_pubmed
;
my
$pub
=
tie
(
%pubmed
,
'DB_File'
,
$reference_pubmed
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_pubmed': $!"
);
my
%gene
;
my
$reference_gene
=
$gene_index
.
'.reference'
;
unlink
$reference_gene
;
my
$gene
=
tie
(
%gene
,
'DB_File'
,
$reference_gene
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_gene': $!"
);
my
%site
;
my
$reference_site
=
$site_index
.
'.reference'
;
unlink
$reference_site
;
my
$site
=
tie
(
%site
,
'DB_File'
,
$reference_site
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_site': $!"
);
my
%fragment
;
my
$reference_fragment
=
$fragment_index
.
'.reference'
;
unlink
$reference_fragment
;
my
$fragment
=
tie
(
%fragment
,
'DB_File'
,
$reference_fragment
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_fragment': $!"
);
my
%factor
;
my
$reference_factor
=
$factor_index
.
'.reference'
;
unlink
$reference_factor
;
my
$factor
=
tie
(
%factor
,
'DB_File'
,
$reference_factor
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_factor': $!"
);
my
%matrix
;
my
$reference_matrix
=
$matrix_index
.
'.reference'
;
unlink
$reference_matrix
;
my
$matrix
=
tie
(
%matrix
,
'DB_File'
,
$reference_matrix
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_matrix': $!"
);
<REF>; <REF>; <REF>;
my
@data
;
while
(<REF>) {
if
(/^AC (\S+)/) {
$data
[0] = $1;
}
elsif
(/^RX PUBMED: (\d+)/) {
$data
[1] = $1;
$pub
->put(
"$1"
,
$data
[0]);
}
elsif
(/^RA (.+)\n$/) {
$data
[2] = $1;
}
elsif
(/^RT (.+?)\.?\n$/) {
$data
[3] = $1;
}
elsif
(/^RL (.+?)\.?\n$/) {
$data
[4] = $1;
}
elsif
(/^GE TRANSFAC: (\w\d+)/) {
$gene
->put(
$data
[0],
"$1"
);
}
elsif
(/^BS TRANSFAC: (\w\d+)/) {
$site
->put(
$data
[0],
"$1"
);
}
elsif
(/^FA TRANSFAC: (\w\d+)/) {
$factor
->put(
$data
[0],
"$1"
);
}
elsif
(/^FR TRANSFAC: (FR\d+)/) {
$fragment
->put(
$data
[0],
"$1"
);
}
elsif
(/^MX TRANSFAC: (\w\d+)/) {
$matrix
->put(
$data
[0],
"$1"
);
}
elsif
(/^\/\//) {
$references
{
$data
[0]} =
join
(SEPARATOR, (
$data
[1] ||
''
,
$data
[2] ||
''
,
$data
[3] ||
''
,
$data
[4] ||
''
));
@data
= ();
}
}
close
(REF);
$ref
=
$pub
=
$gene
=
$site
=
$fragment
=
$factor
=
$matrix
=
undef
;
untie
%references
;
untie
%pubmed
;
untie
%gene
;
untie
%site
;
untie
%fragment
;
untie
%factor
;
untie
%matrix
;
}
my
$gene_dat
=
"$dat_dir/gene.dat"
;
if
(! -e
$gene_index
||
$force
) {
open
(GEN,
$gene_dat
) ||
$self
->throw(
"Cannot open gene file '$gene_dat' for reading"
);
my
%genes
;
unlink
$gene_index
;
my
$gene
=
tie
(
%genes
,
'DB_File'
,
$gene_index
, O_RDWR|O_CREAT, 0644,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$gene_index': $!"
);
my
%id
;
my
$gene_id
=
$gene_index
.
'.id'
;
unlink
$gene_id
;
my
$id
=
tie
(
%id
,
'DB_File'
,
$gene_id
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_id': $!"
);
my
%name
;
my
$gene_name
=
$gene_index
.
'.name'
;
unlink
$gene_name
;
my
$name
=
tie
(
%name
,
'DB_File'
,
$gene_name
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_name': $!"
);
my
%species
;
my
$gene_species
=
$gene_index
.
'.species'
;
unlink
$gene_species
;
my
$species
=
tie
(
%species
,
'DB_File'
,
$gene_species
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_species': $!"
);
my
%site
;
my
$gene_site
=
$site_index
.
'.gene'
;
unlink
$gene_site
;
my
$site
=
tie
(
%site
,
'DB_File'
,
$gene_site
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_site': $!"
);
my
%factor
;
my
$gene_factor
=
$factor_index
.
'.gene'
;
unlink
$gene_factor
;
my
$factor
=
tie
(
%factor
,
'DB_File'
,
$gene_factor
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_factor': $!"
);
my
%fragment
;
my
$gene_fragment
=
$fragment_index
.
'.gene'
;
unlink
$gene_fragment
;
my
$fragment
=
tie
(
%fragment
,
'DB_File'
,
$gene_fragment
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_fragment': $!"
);
my
%reference
;
my
$gene_reference
=
$reference_index
.
'.gene'
;
unlink
$gene_reference
;
my
$reference
=
tie
(
%reference
,
'DB_File'
,
$gene_reference
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_reference': $!"
);
<GEN>; <GEN>; <GEN>;
my
@data
;
while
(<GEN>) {
if
(/^AC (\S+)/) {
$data
[0] = $1;
}
elsif
(/^ID (\S+)/) {
$data
[1] = $1;
$id
->put(
"$1"
,
$data
[0]);
}
elsif
(/^SD (.+)$/) {
$data
[2] =
lc
(
"$1"
);
$name
->put(
lc
(
"$1"
),
$data
[0]);
}
elsif
(/^SY (.+)\.$/) {
foreach
(
split
(
'; '
,
lc
(
"$1"
))) {
$name
->put(
$_
,
$data
[0]);
}
}
elsif
(/^DE (.+)$/) {
$data
[3] = $1;
}
elsif
(/^OS (.+)$/) {
my
$raw_species
= $1;
my
$taxid
=
$self
->_species_to_taxid(
$raw_species
);
$data
[4] =
$taxid
||
$raw_species
;
$species
->put(
$data
[4],
$data
[0]);
}
elsif
(/^RN .+?(RE\d+)/) {
$reference
->put(
$data
[0],
"$1"
);
}
elsif
(/^BS .+?(R\d+)/) {
$site
->put(
$data
[0],
"$1"
);
}
elsif
(/^FA (T\d+)/) {
$factor
->put(
$data
[0],
"$1"
);
}
elsif
(/^BR (FR\d+)/) {
$fragment
->put(
$data
[0],
"$1"
);
}
elsif
(/^\/\//) {
$genes
{
$data
[0]} =
join
(SEPARATOR, (
$data
[1] ||
''
,
$data
[2] ||
''
,
$data
[3] ||
''
,
$data
[4] ||
''
));
@data
= ();
}
}
close
(GEN);
$gene
=
$id
=
$name
=
$species
=
$site
=
$factor
=
$reference
=
undef
;
untie
%genes
;
untie
%id
;
untie
%name
;
untie
%species
;
untie
%site
;
untie
%factor
;
untie
%reference
;
}
my
$site_dat
=
"$dat_dir/site.dat"
;
if
(! -e
$site_index
||
$force
) {
open
(SIT,
$site_dat
) ||
$self
->throw(
"Cannot open site file '$site_dat' for reading"
);
my
%sites
;
unlink
$site_index
;
my
$site
=
tie
(
%sites
,
'DB_File'
,
$site_index
, O_RDWR|O_CREAT, 0644,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$site_index': $!"
);
my
%id
;
my
$site_id
=
$site_index
.
'.id'
;
unlink
$site_id
;
my
$id
=
tie
(
%id
,
'DB_File'
,
$site_id
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_id': $!"
);
my
%species
;
my
$site_species
=
$site_index
.
'.species'
;
unlink
$site_species
;
my
$species
=
tie
(
%species
,
'DB_File'
,
$site_species
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_species': $!"
);
my
%qualities
;
my
$site_qualities
=
$site_index
.
'.qual'
;
unlink
$site_qualities
;
my
$quality
=
tie
(
%qualities
,
'DB_File'
,
$site_qualities
, O_RDWR|O_CREAT, 0644,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$site_qualities': $!"
);
my
%gene
;
my
$site_gene
=
$gene_index
.
'.site'
;
unlink
$site_gene
;
my
$gene
=
tie
(
%gene
,
'DB_File'
,
$site_gene
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_gene': $!"
);
my
%matrix
;
my
$site_matrix
=
$matrix_index
.
'.site'
;
unlink
$site_matrix
;
my
$matrix
=
tie
(
%matrix
,
'DB_File'
,
$site_matrix
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_matrix': $!"
);
my
%factor
;
my
$site_factor
=
$factor_index
.
'.site'
;
unlink
$site_factor
;
my
$factor
=
tie
(
%factor
,
'DB_File'
,
$site_factor
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_factor': $!"
);
my
%reference
;
my
$site_reference
=
$reference_index
.
'.site'
;
unlink
$site_reference
;
my
$reference
=
tie
(
%reference
,
'DB_File'
,
$site_reference
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_reference': $!"
);
<SIT>; <SIT>; <SIT>;
my
@data
;
while
(<SIT>) {
if
(/^AC (\S+)/) {
$data
[0] = $1;
}
elsif
(/^ID (\S+)/) {
$data
[1] = $1;
$id
->put(
"$1"
,
$data
[0]);
}
elsif
(/^TY (.+)$/) {
$data
[8] = $1;
}
elsif
(/^DE .
*Gene
: (G\d+)/) {
$data
[2] = $1;
$gene
->put(
$data
[0],
"$1"
);
}
elsif
(/^OS (.+)$/) {
my
$raw_species
= $1;
my
$taxid
=
$self
->_species_to_taxid(
$raw_species
);
$data
[7] =
$taxid
||
$raw_species
;
$species
->put(
$data
[7],
$data
[0]);
}
elsif
(/^SQ (.+)\.$/) {
$data
[3] = $1;
}
elsif
(/^S1 (.+)$/) {
$data
[4] = $1;
}
elsif
(/^SF (.+)$/) {
$data
[5] = $1;
}
elsif
(/^ST (.+)$/) {
$data
[6] = $1;
}
elsif
(/^RN .+?(RE\d+)/) {
$reference
->put(
$data
[0],
"$1"
);
}
elsif
(/^MX (M\d+)/) {
$matrix
->put(
$data
[0],
"$1"
);
}
elsif
(/^BF (T\d+); .+?; Quality: (\d)/) {
$factor
->put(
$data
[0],
"$1"
);
$qualities
{
$data
[0].SEPARATOR.$1} = $2;
}
elsif
(/^\/\//) {
$sites
{
$data
[0]} =
join
(SEPARATOR, (
$data
[1] ||
''
,
$data
[2] ||
''
,
$data
[3] ||
''
,
$data
[4] ||
'TSS'
,
$data
[5] ||
''
,
$data
[6] ||
''
,
$data
[7] ||
''
,
$data
[8] ||
''
));
@data
= ();
}
}
close
(SIT);
$site
=
$id
=
$species
=
$quality
=
$gene
=
$matrix
=
$factor
=
$reference
=
undef
;
untie
%sites
;
untie
%id
;
untie
%species
;
untie
%qualities
;
untie
%gene
;
untie
%matrix
;
untie
%factor
;
untie
%reference
;
}
my
$matrix_dat
=
"$dat_dir/matrix.dat"
;
if
(! -e
$matrix_index
||
$force
) {
open
(MAT,
$matrix_dat
) ||
$self
->throw(
"Cannot open matrix file '$matrix_dat' for reading"
);
my
%matrices
;
unlink
$matrix_index
;
my
$matrix
=
tie
(
%matrices
,
'DB_File'
,
$matrix_index
, O_RDWR|O_CREAT, 0644,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$matrix_index': $!"
);
my
%id
;
my
$matrix_id
=
$matrix_index
.
'.id'
;
unlink
$matrix_id
;
my
$id
=
tie
(
%id
,
'DB_File'
,
$matrix_id
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_id': $!"
);
my
%name
;
my
$matrix_name
=
$matrix_index
.
'.name'
;
unlink
$matrix_name
;
my
$name
=
tie
(
%name
,
'DB_File'
,
$matrix_name
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_name': $!"
);
my
%site
;
my
$matrix_site
=
$site_index
.
'.matrix'
;
unlink
$matrix_site
;
my
$site
=
tie
(
%site
,
'DB_File'
,
$matrix_site
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_site': $!"
);
my
%factor
;
my
$matrix_factor
=
$factor_index
.
'.matrix'
;
unlink
$matrix_factor
;
my
$factor
=
tie
(
%factor
,
'DB_File'
,
$matrix_factor
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_factor': $!"
);
my
%reference
;
my
$matrix_reference
=
$reference_index
.
'.matrix'
;
unlink
$matrix_reference
;
my
$reference
=
tie
(
%reference
,
'DB_File'
,
$matrix_reference
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_reference': $!"
);
<MAT>; <MAT>; <MAT>;
my
@data
;
my
@matrix_data
;
my
@site_data
;
while
(<MAT>) {
if
(/^AC (\S+)/) {
$data
[0] = $1;
}
elsif
(/^ID (\S+)/) {
$data
[1] = $1;
$id
->put(
"$1"
,
$data
[0]);
}
elsif
(/^NA (.+)$/) {
$data
[2] = $1;
$name
->put(
"$1"
,
$data
[0]);
}
elsif
(/^DE (.+)$/) {
$data
[3] = $1;
}
elsif
(/^\d\d \s*(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/) {
push
(
@matrix_data
,
join
(
"\t"
, ($1, $2, $3, $4)));
my
$num
= $1 + $2 + $3 + $4;
$data
[4] ||= 0;
if
(
$num
>
$data
[4]) {
$data
[4] =
$num
;
}
}
elsif
(/^BS ([\sa-zA-Z]+); (.+?); (-?\d+); \d+;.*; ([np])/) {
push
(
@site_data
,
join
(
'_'
, ($1, $2, $3, $4 eq
'p'
? 1 : -1)));
$site
->put(
$data
[0], $2);
}
elsif
(/^BF (T\d+)/) {
$factor
->put(
$data
[0],
"$1"
);
}
elsif
(/^RN .+?(RE\d+)/) {
$reference
->put(
$data
[0],
"$1"
);
}
elsif
(/^\/\//) {
my
$matrix_data
=
join
(INTERNAL_SEPARATOR,
@matrix_data
) ||
''
;
my
$longest_seq
= 0;
foreach
my
$site_seq
(
@site_data
) {
$site_seq
=~ s/ /-/g;
my
$length
=
length
(
$site_seq
);
if
(
$length
>
$longest_seq
) {
$longest_seq
=
$length
;
}
}
foreach
my
$site_seq
(
@site_data
) {
my
$length
=
length
(
$site_seq
);
if
(
$length
<
$longest_seq
) {
$site_seq
.=
'-'
x (
$longest_seq
-
$length
);
}
}
my
$site_data
=
join
(INTERNAL_SEPARATOR,
@site_data
) ||
''
;
$matrices
{
$data
[0]} =
join
(SEPARATOR, (
$data
[1] ||
''
,
$data
[2] ||
''
,
$data
[3] ||
''
,
$data
[4],
$matrix_data
,
$site_data
));
@data
=
@matrix_data
=
@site_data
= ();
}
}
close
(MAT);
$matrix
=
$id
=
$name
=
$site
=
$factor
=
$reference
=
undef
;
untie
%matrices
;
untie
%id
;
untie
%name
;
untie
%site
;
untie
%factor
;
untie
%reference
;
}
my
$factor_dat
=
"$dat_dir/factor.dat"
;
if
(! -e
$factor_index
||
$force
) {
open
(FAC,
$factor_dat
) ||
$self
->throw(
"Cannot open factor file '$factor_dat' for reading"
);
my
%factors
;
unlink
$factor_index
;
my
$factor
=
tie
(
%factors
,
'DB_File'
,
$factor_index
, O_RDWR|O_CREAT, 0644,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$factor_index': $!"
);
my
%id
;
my
$factor_id
=
$factor_index
.
'.id'
;
unlink
$factor_id
;
my
$id
=
tie
(
%id
,
'DB_File'
,
$factor_id
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file 'factor_id': $!"
);
my
%name
;
my
$factor_name
=
$factor_index
.
'.name'
;
unlink
$factor_name
;
my
$name
=
tie
(
%name
,
'DB_File'
,
$factor_name
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_name': $!"
);
my
%species
;
my
$factor_species
=
$factor_index
.
'.species'
;
unlink
$factor_species
;
my
$species
=
tie
(
%species
,
'DB_File'
,
$factor_species
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_species': $!"
);
my
%interactors
;
my
$factor_interactors
=
$factor_index
.
'.interactors'
;
unlink
$factor_interactors
;
my
$interact
=
tie
(
%interactors
,
'DB_File'
,
$factor_interactors
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_interactors': $!"
);
my
%gene
;
my
$factor_gene
=
$gene_index
.
'.factor'
;
unlink
$factor_gene
;
my
$gene
=
tie
(
%gene
,
'DB_File'
,
$factor_gene
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_gene': $!"
);
my
%matrix
;
my
$factor_matrix
=
$matrix_index
.
'.factor'
;
unlink
$factor_matrix
;
my
$matrix
=
tie
(
%matrix
,
'DB_File'
,
$factor_matrix
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_matrix': $!"
);
my
%site
;
my
$factor_site
=
$site_index
.
'.factor'
;
unlink
$factor_site
;
my
$site
=
tie
(
%site
,
'DB_File'
,
$factor_site
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_site': $!"
);
my
%fragment
;
my
$factor_fragment
=
$fragment_index
.
'.factor'
;
unlink
$factor_fragment
;
my
$fragment
=
tie
(
%fragment
,
'DB_File'
,
$factor_fragment
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_fragment': $!"
);
my
%reference
;
my
$factor_reference
=
$reference_index
.
'.factor'
;
unlink
$factor_reference
;
my
$reference
=
tie
(
%reference
,
'DB_File'
,
$factor_reference
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_reference': $!"
);
<FAC>; <FAC>; <FAC>;
my
@data
;
my
$sequence
=
''
;
while
(<FAC>) {
if
(/^AC (\S+)/) {
$data
[0] = $1;
}
elsif
(/^ID (\S+)/) {
$data
[1] = $1;
$id
->put(
"$1"
,
$data
[0]);
}
elsif
(/^FA (.+)$/) {
$data
[2] = $1;
$name
->put(
"$1"
,
$data
[0]);
}
elsif
(/^OS (.+)$/) {
my
$raw_species
= $1;
my
$taxid
=
$self
->_species_to_taxid(
$raw_species
);
$data
[3] =
$taxid
||
$raw_species
;
$species
->put(
$data
[3],
$data
[0]);
}
elsif
(/^GE (G\d+)/) {
$gene
->put(
$data
[0],
"$1"
);
}
elsif
(/^SQ (.+)$/) {
$sequence
.= $1;
}
elsif
(/^IN (T\d+)/) {
$interact
->put(
$data
[0],
"$1"
);
}
elsif
(/^MX (M\d+)/) {
$matrix
->put(
$data
[0],
"$1"
);
}
elsif
(/^BS (R\d+)/) {
$site
->put(
$data
[0],
"$1"
);
}
elsif
(/^BR (FR\d+)/) {
$fragment
->put(
$data
[0],
"$1"
);
}
elsif
(/^RN .+?(RE\d+)/) {
$reference
->put(
$data
[0],
"$1"
);
}
elsif
(/^\/\//) {
$factors
{
$data
[0]} =
join
(SEPARATOR, (
$data
[1] ||
''
,
$data
[2] ||
''
,
$data
[3] ||
''
,
$sequence
));
@data
= ();
$sequence
=
''
;
}
}
close
(FAC);
$factor
=
$id
=
$name
=
$species
=
$interact
=
$gene
=
$matrix
=
$site
=
$fragment
=
$reference
=
undef
;
untie
%factors
;
untie
%id
;
untie
%name
;
untie
%species
;
untie
%interactors
;
untie
%gene
;
untie
%matrix
;
untie
%site
;
untie
%fragment
;
untie
%reference
;
}
my
$fragment_dat
=
"$dat_dir/fragment.dat"
;
if
(! -e
$fragment_index
||
$force
) {
if
(
open
(FRA,
$fragment_dat
)) {
my
%fragments
;
unlink
$fragment_index
;
my
$fragment
=
tie
(
%fragments
,
'DB_File'
,
$fragment_index
, O_RDWR|O_CREAT, 0644,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$fragment_index': $!"
);
my
%id
;
my
$fragment_id
=
$fragment_index
.
'.id'
;
unlink
$fragment_id
;
my
$id
=
tie
(
%id
,
'DB_File'
,
$fragment_id
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$fragment_id': $!"
);
my
%qualities
;
my
$fragment_qualities
=
$fragment_index
.
'.qual'
;
unlink
$fragment_qualities
;
my
$quality
=
tie
(
%qualities
,
'DB_File'
,
$fragment_qualities
, O_RDWR|O_CREAT, 0644,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$fragment_qualities': $!"
);
my
%species
;
my
$fragment_species
=
$fragment_index
.
'.species'
;
unlink
$fragment_species
;
my
$species
=
tie
(
%species
,
'DB_File'
,
$fragment_species
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$fragment_species': $!"
);
my
%gene
;
my
$fragment_gene
=
$gene_index
.
'.fragment'
;
unlink
$fragment_gene
;
my
$gene
=
tie
(
%gene
,
'DB_File'
,
$fragment_gene
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$fragment_gene': $!"
);
my
%factor
;
my
$fragment_factor
=
$factor_index
.
'.fragment'
;
unlink
$fragment_factor
;
my
$factor
=
tie
(
%factor
,
'DB_File'
,
$fragment_factor
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$fragment_factor': $!"
);
my
%reference
;
my
$fragment_reference
=
$reference_index
.
'.fragment'
;
unlink
$fragment_reference
;
my
$reference
=
tie
(
%reference
,
'DB_File'
,
$fragment_reference
, O_RDWR|O_CREAT, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$fragment_reference': $!"
);
<FRA>; <FRA>; <FRA>;
my
@data
;
while
(<FRA>) {
if
(/^AC (\S+)/) {
$data
[0] = $1;
}
elsif
(/^ID (\S+)/) {
$data
[1] = $1;
$id
->put(
"$1"
,
$data
[0]);
}
elsif
(/^DE Gene: (G\d+)(?:.+Gene: (G\d+))?/) {
my
(
$gene1
,
$gene2
) = ($1, $2);
$data
[2] =
$gene1
;
$data
[3] =
$gene2
;
$gene
->put(
$data
[0],
$gene1
);
$gene
->put(
$data
[0],
$gene2
)
if
$gene2
;
}
elsif
(/^OS (.+)$/) {
my
$raw_species
= $1;
my
$taxid
=
$self
->_species_to_taxid(
$raw_species
);
$data
[4] =
$taxid
||
$raw_species
;
$species
->put(
$data
[4],
$data
[0]);
}
elsif
(/^SQ [atcgn]*([ATCGN]+)[atcgn]*/) {
$data
[5] .= $1;
}
elsif
(/^SC Build (\S+):$/) {
$data
[6] = $1;
}
elsif
(/^RN .+?(RE\d+)/) {
$reference
->put(
$data
[0],
"$1"
);
}
elsif
(/^BF (T\d+); .+?; Quality: (\d)/) {
$factor
->put(
$data
[0],
"$1"
);
$qualities
{
$data
[0].SEPARATOR.$1} = $2;
}
elsif
(/^\/\//) {
$fragments
{
$data
[0]} =
join
(SEPARATOR, (
$data
[1] ||
''
,
$data
[2] ||
''
,
$data
[3] ||
''
,
$data
[4] ||
''
,
$data
[5] ||
''
,
$data
[6] ||
''
));
@data
= ();
}
}
close
(FRA);
$fragment
=
$id
=
$species
=
$quality
=
$gene
=
$factor
=
$reference
=
undef
;
untie
%fragments
;
untie
%id
;
untie
%species
;
untie
%qualities
;
untie
%gene
;
untie
%factor
;
untie
%reference
;
}
else
{
$self
->
warn
(
"Cannot open fragment file '$fragment_dat' for reading, assuming you have an old version of Transfac Pro with no fragment.dat file."
);
}
}
}
sub
_db_connect {
my
$self
=
shift
;
return
if
$self
->{
'_initialized'
};
my
$index_dir
=
$self
->index_directory;
my
$gene_index
=
"$index_dir/gene.dat.index"
;
my
$reference_index
=
"$index_dir/reference.dat.index"
;
my
$matrix_index
=
"$index_dir/matrix.dat.index"
;
my
$factor_index
=
"$index_dir/factor.dat.index"
;
my
$site_index
=
"$index_dir/site.dat.index"
;
my
$fragment_index
=
"$index_dir/fragment.dat.index"
;
foreach
(
$gene_index
,
$reference_index
,
$matrix_index
,
$factor_index
,
$site_index
,
$fragment_index
) {
if
(! -e
$_
) {
}
}
{
$self
->{reference}->{data} = {};
tie
(%{
$self
->{reference}->{data}},
'DB_File'
,
$reference_index
, O_RDWR,
undef
,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$reference_index': $!"
);
my
$reference_pubmed
=
$reference_index
.
'.pubmed'
;
$self
->{reference}->{pubmed} =
tie
(%{
$self
->{reference}->{pubmed}},
'DB_File'
,
$reference_pubmed
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_pubmed': $!"
);
my
$reference_gene
=
$gene_index
.
'.reference'
;
$self
->{gene}->{reference} =
tie
(%{
$self
->{gene}->{reference}},
'DB_File'
,
$reference_gene
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_gene': $!"
);
my
$reference_site
=
$site_index
.
'.reference'
;
$self
->{site}->{reference} =
tie
(%{
$self
->{site}->{reference}},
'DB_File'
,
$reference_site
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_site': $!"
);
my
$reference_fragment
=
$fragment_index
.
'.reference'
;
$self
->{fragment}->{reference} =
tie
(%{
$self
->{fragment}->{reference}},
'DB_File'
,
$reference_fragment
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_fragment': $!"
);
my
$reference_factor
=
$factor_index
.
'.reference'
;
$self
->{factor}->{reference} =
tie
(%{
$self
->{factor}->{reference}},
'DB_File'
,
$reference_factor
,
undef
, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_factor': $!"
);
my
$reference_matrix
=
$matrix_index
.
'.reference'
;
$self
->{matrix}->{reference} =
tie
(%{
$self
->{matrix}->{reference}},
'DB_File'
,
$reference_matrix
,
undef
, 0644,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$reference_matrix': $!"
);
}
{
$self
->{gene}->{data} = {};
tie
(%{
$self
->{gene}->{data}},
'DB_File'
,
$gene_index
, O_RDWR,
undef
,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$gene_index': $!"
);
my
$gene_id
=
$gene_index
.
'.id'
;
$self
->{gene}->{id} =
tie
(%{
$self
->{gene}->{id}},
'DB_File'
,
$gene_id
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_id': $!"
);
my
$gene_name
=
$gene_index
.
'.name'
;
$self
->{gene}->{name} =
tie
(%{
$self
->{gene}->{name}},
'DB_File'
,
$gene_name
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_name': $!"
);
my
$gene_species
=
$gene_index
.
'.species'
;
$self
->{gene}->{species} =
tie
(%{
$self
->{gene}->{species}},
'DB_File'
,
$gene_species
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_species': $!"
);
my
$gene_site
=
$site_index
.
'.gene'
;
$self
->{site}->{gene} =
tie
(%{
$self
->{site}->{gene}},
'DB_File'
,
$gene_site
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_site': $!"
);
my
$gene_fragment
=
$fragment_index
.
'.gene'
;
$self
->{fragment}->{gene} =
tie
(%{
$self
->{fragment}->{gene}},
'DB_File'
,
$gene_fragment
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_fragment': $!"
);
my
$gene_factor
=
$factor_index
.
'.gene'
;
$self
->{factor}->{gene} =
tie
(%{
$self
->{factor}->{gene}},
'DB_File'
,
$gene_factor
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_factor': $!"
);
my
$gene_reference
=
$reference_index
.
'.gene'
;
$self
->{reference}->{gene} =
tie
(%{
$self
->{reference}->{gene}},
'DB_File'
,
$gene_reference
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$gene_reference': $!"
);
}
{
$self
->{site}->{data} = {};
tie
(%{
$self
->{site}->{data}},
'DB_File'
,
$site_index
, O_RDWR,
undef
,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$site_index': $!"
);
my
$site_id
=
$site_index
.
'.id'
;
$self
->{site}->{id} =
tie
(%{
$self
->{site}->{id}},
'DB_File'
,
$site_id
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_id': $!"
);
my
$site_species
=
$site_index
.
'.species'
;
$self
->{site}->{species} =
tie
(%{
$self
->{site}->{species}},
'DB_File'
,
$site_species
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file $site_species': $!"
);
my
$site_qualities
=
$site_index
.
'.qual'
;
$self
->{quality} = {};
tie
(%{
$self
->{quality}},
'DB_File'
,
$site_qualities
, O_RDWR,
undef
,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$site_qualities': $!"
);
my
$site_gene
=
$gene_index
.
'.site'
;
$self
->{gene}->{site} =
tie
(%{
$self
->{gene}->{site}},
'DB_File'
,
$site_gene
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_gene': $!"
);
my
$site_matrix
=
$matrix_index
.
'.site'
;
$self
->{matrix}->{site} =
tie
(%{
$self
->{matrix}->{site}},
'DB_File'
,
$site_matrix
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_matrix': $!"
);
my
$site_factor
=
$factor_index
.
'.site'
;
$self
->{factor}->{site} =
tie
(%{
$self
->{factor}->{site}},
'DB_File'
,
$site_factor
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_factor': $!"
);
my
$site_reference
=
$reference_index
.
'.site'
;
$self
->{reference}->{site} =
tie
(%{
$self
->{reference}->{site}},
'DB_File'
,
$site_reference
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$site_reference': $!"
);
}
if
(-e
$fragment_index
) {
$self
->{fragment}->{data} = {};
tie
(%{
$self
->{fragment}->{data}},
'DB_File'
,
$fragment_index
, O_RDWR,
undef
,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$fragment_index': $!"
);
my
$fragment_id
=
$fragment_index
.
'.id'
;
$self
->{fragment}->{id} =
tie
(%{
$self
->{fragment}->{id}},
'DB_File'
,
$fragment_id
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$fragment_id': $!"
);
my
$fragment_species
=
$fragment_index
.
'.species'
;
$self
->{fragment}->{species} =
tie
(%{
$self
->{fragment}->{species}},
'DB_File'
,
$fragment_species
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file $fragment_species': $!"
);
my
$fragment_qualities
=
$fragment_index
.
'.qual'
;
$self
->{fragment_quality} = {};
tie
(%{
$self
->{fragment_quality}},
'DB_File'
,
$fragment_qualities
, O_RDWR,
undef
,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$fragment_qualities': $!"
);
my
$fragment_gene
=
$gene_index
.
'.fragment'
;
$self
->{gene}->{fragment} =
tie
(%{
$self
->{gene}->{fragment}},
'DB_File'
,
$fragment_gene
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$fragment_gene': $!"
);
my
$fragment_factor
=
$factor_index
.
'.fragment'
;
$self
->{factor}->{fragment} =
tie
(%{
$self
->{factor}->{fragment}},
'DB_File'
,
$fragment_factor
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$fragment_factor': $!"
);
my
$fragment_reference
=
$reference_index
.
'.fragment'
;
$self
->{reference}->{fragment} =
tie
(%{
$self
->{reference}->{fragment}},
'DB_File'
,
$fragment_reference
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$fragment_reference': $!"
);
}
else
{
die
"no fragment_index at '$fragment_index'\n"
;
}
{
$self
->{matrix}->{data} = {};
tie
(%{
$self
->{matrix}->{data}},
'DB_File'
,
$matrix_index
, O_RDWR,
undef
,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$matrix_index': $!"
);
my
$matrix_id
=
$matrix_index
.
'.id'
;
$self
->{matrix}->{id} =
tie
(%{
$self
->{matrix}->{id}},
'DB_File'
,
$matrix_id
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_id': $!"
);
my
$matrix_name
=
$matrix_index
.
'.name'
;
$self
->{matrix}->{name} =
tie
(%{
$self
->{matrix}->{name}},
'DB_File'
,
$matrix_name
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_name': $!"
);
my
$matrix_site
=
$site_index
.
'.matrix'
;
$self
->{site}->{matrix} =
tie
(%{
$self
->{site}->{matrix}},
'DB_File'
,
$matrix_site
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_site': $!"
);
my
$matrix_factor
=
$factor_index
.
'.matrix'
;
$self
->{factor}->{matrix} =
tie
(%{
$self
->{factor}->{matrix}},
'DB_File'
,
$matrix_factor
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_factor': $!"
);
my
$matrix_reference
=
$reference_index
.
'.matrix'
;
$self
->{reference}->{matrix} =
tie
(%{
$self
->{reference}->{matrix}},
'DB_File'
,
$matrix_reference
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$matrix_reference': $!"
);
}
{
$self
->{factor}->{data} = {};
tie
(%{
$self
->{factor}->{data}},
'DB_File'
,
$factor_index
, O_RDWR,
undef
,
$DB_HASH
) ||
$self
->throw(
"Cannot open file '$factor_index': $!"
);
my
$factor_id
=
$factor_index
.
'.id'
;
$self
->{factor}->{id} =
tie
(%{
$self
->{factor}->{id}},
'DB_File'
,
$factor_id
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file 'factor_id': $!"
);
my
$factor_name
=
$factor_index
.
'.name'
;
$self
->{factor}->{name} =
tie
(%{
$self
->{factor}->{name}},
'DB_File'
,
$factor_name
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_name': $!"
);
my
$factor_species
=
$factor_index
.
'.species'
;
$self
->{factor}->{species} =
tie
(%{
$self
->{factor}->{species}},
'DB_File'
,
$factor_species
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_species': $!"
);
my
$factor_interactors
=
$factor_index
.
'.interactors'
;
$self
->{factor}->{interactors} =
tie
(%{
$self
->{factor}->{interactors}},
'DB_File'
,
$factor_interactors
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_interactors': $!"
);
my
$factor_gene
=
$gene_index
.
'.factor'
;
$self
->{gene}->{factor} =
tie
(%{
$self
->{gene}->{factor}},
'DB_File'
,
$factor_gene
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_gene': $!"
);
my
$factor_matrix
=
$matrix_index
.
'.factor'
;
$self
->{matrix}->{factor} =
tie
(%{
$self
->{matrix}->{factor}},
'DB_File'
,
$factor_matrix
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_matrix': $!"
);
my
$factor_site
=
$site_index
.
'.factor'
;
$self
->{site}->{factor} =
tie
(%{
$self
->{site}->{factor}},
'DB_File'
,
$factor_site
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_site': $!"
);
my
$factor_fragment
=
$fragment_index
.
'.factor'
;
$self
->{fragment}->{factor} =
tie
(%{
$self
->{fragment}->{factor}},
'DB_File'
,
$factor_fragment
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_fragment': $!"
);
my
$factor_reference
=
$reference_index
.
'.factor'
;
$self
->{reference}->{factor} =
tie
(%{
$self
->{reference}->{factor}},
'DB_File'
,
$factor_reference
, O_RDWR,
undef
,
$DB_BTREE
) ||
$self
->throw(
"Cannot open file '$factor_reference': $!"
);
}
$self
->{
'_initialized'
} = 1;
}
Hide Show 11 lines of Pod
sub
index_directory {
my
$self
=
shift
;
return
$self
->{
'index_directory'
} =
shift
if
@_
;
return
$self
->{
'index_directory'
};
}
sub
_species_to_taxid {
my
(
$self
,
$raw_species
) =
@_
;
$raw_species
or
return
;
my
$species_string
;
my
@split
=
split
(
', '
,
$raw_species
);
(
@split
> 1) ? (
$species_string
=
$split
[1]) : (
$species_string
=
$split
[0]);
my
$ncbi_taxid
;
if
(
$species_string
=~ /^[A-Z]\S+ \S+$/) {
SWITCH:
for
(
$species_string
) {
/^Darnel ryegrass/ &&
do
{
$ncbi_taxid
= 34176;
last
; };
/^Coix lacryma/ &&
do
{
$ncbi_taxid
= 4505;
last
; };
/^Rattus spec/ &&
do
{
$ncbi_taxid
= 10116;
last
; };
/^Mus spec/ &&
do
{
$ncbi_taxid
= 10090;
last
; };
/^Equus spec/ &&
do
{
$ncbi_taxid
= 9796;
last
; };
/^Cavia sp/ &&
do
{
$ncbi_taxid
= 10141;
last
; };
/^Marsh marigold/ &&
do
{
$ncbi_taxid
= 3449;
last
; };
/^Phalaenopsis sp/ &&
do
{
$ncbi_taxid
= 36900;
last
; };
/^Anthirrhinum majus/ &&
do
{
$ncbi_taxid
= 4151;
last
; };
/^Equus spec/ &&
do
{
$ncbi_taxid
= 9796;
last
; };
/^Lycopodium spec/ &&
do
{
$ncbi_taxid
= 13840;
last
; };
/^Autographa californica/ &&
do
{
$ncbi_taxid
= 307456;
last
; };
/^E26 AEV/ &&
do
{
$ncbi_taxid
= 31920;
last
; };
/^Pseudocentrotus miliaris/ &&
do
{
$ncbi_taxid
= 7677;
last
; };
/^SL3-3 (?:retro)?virus/ &&
do
{
$ncbi_taxid
= 53454;
last
; };
/^Petunia sp/ &&
do
{
$ncbi_taxid
= 4104;
last
; };
}
if
(!
$ncbi_taxid
&&
defined
$self
->{_tax_db}) {
(
$ncbi_taxid
) =
$self
->{_tax_db}->get_taxonids(
$species_string
);
}
}
else
{
SWITCH:
for
(
$raw_species
) {
my
$first_letter
=
substr
(
$raw_species
, 0, 1);
$first_letter
eq
'A'
&&
do
{
/^Adiantum raddianum/ &&
do
{
$ncbi_taxid
= 32168;
last
; };
/^Avian sarcoma virus \(strain 17\)/ &&
do
{
$ncbi_taxid
= 11877;
last
; };
/^AMV/ &&
do
{
$ncbi_taxid
= 11866;
last
; };
/^AEV/ &&
do
{
$ncbi_taxid
= 11861;
last
; };
/^AS42|^Avian musculoaponeurotic/ &&
do
{
$ncbi_taxid
= 11873;
last
; };
/^Avian myelocytomatosis/ &&
do
{
$ncbi_taxid
= 11869;
last
; };
/^ASV 31/ &&
do
{
$ncbi_taxid
= 35270;
last
; };
/^A-MuLV/ &&
do
{
$ncbi_taxid
= 188539;
last
; };
/^Asparagus officinalis/ &&
do
{
$ncbi_taxid
= 4686;
last
; };
/^Agrobacterium tumefaciens/ &&
do
{
$ncbi_taxid
= 358;
last
; };
/^ALV/ &&
do
{
$ncbi_taxid
= 11864;
last
; };
/^AAV/ &&
do
{
$ncbi_taxid
= 272636;
last
; };
/^AKV MLV/ &&
do
{
$ncbi_taxid
= 11791;
last
; };
last
;
};
$first_letter
eq
'B'
&&
do
{
/^BPV-1/ &&
do
{
$ncbi_taxid
= 10559;
last
; };
/^BKV/ &&
do
{
$ncbi_taxid
= 10629;
last
; };
/^Bolivian squirrel monkey/ &&
do
{
$ncbi_taxid
= 39432;
last
; };
last
;
};
$first_letter
eq
'C'
&&
do
{
/^Cauliflower/ &&
do
{
$ncbi_taxid
= 3715;
last
; };
/^Chamek/ &&
do
{
$ncbi_taxid
= 118643;
last
; };
/^Candida albicans/ &&
do
{
$ncbi_taxid
= 5476;
last
; };
/^CaMV/ &&
do
{
$ncbi_taxid
= 10641;
last
; };
last
;
};
$first_letter
eq
'E'
&&
do
{
/^Eucalyptus gunnii/ &&
do
{
$ncbi_taxid
= 3933;
last
; };
/^EBV, Epstein-Barr virus/ &&
do
{
$ncbi_taxid
= 10376;
last
; };
/^Eucalyptus globulus subsp. bicostata/ &&
do
{
$ncbi_taxid
= 71272;
last
; };
/^Eucalyptus globulus subsp. globulus/ &&
do
{
$ncbi_taxid
= 71271;
last
; };
last
;
};
$first_letter
eq
'F'
&&
do
{
/^FBR MuLV/ &&
do
{
$ncbi_taxid
= 11806;
last
; };
/^FBJ MuLV/ &&
do
{
$ncbi_taxid
= 11805;
last
; };
/^FeLV|Feline leukemia/ &&
do
{
$ncbi_taxid
= 11923;
last
; };
/^Flaveria trinervia/ &&
do
{
$ncbi_taxid
= 4227;
last
; };
/^FSV/ &&
do
{
$ncbi_taxid
= 11885;
last
; };
/^F-MuLV/ &&
do
{
$ncbi_taxid
= 11795;
last
; };
last
;
};
$first_letter
eq
'H'
&&
do
{
/^HSV-1/ &&
do
{
$ncbi_taxid
= 10298;
last
; };
/^HTLV-I/ &&
do
{
$ncbi_taxid
= 11908;
last
; };
/^HIV-1/ &&
do
{
$ncbi_taxid
= 11676;
last
; };
/^HPV-16/ &&
do
{
$ncbi_taxid
= 333760;
last
; };
/^HBV/ &&
do
{
$ncbi_taxid
= 10407;
last
; };
/^HBI/ &&
do
{
$ncbi_taxid
= 11867;
last
; };
/^HPV-8/ &&
do
{
$ncbi_taxid
= 10579;
last
; };
/^HPV-11/ &&
do
{
$ncbi_taxid
= 10580;
last
; };
/^HPV-18/ &&
do
{
$ncbi_taxid
= 333761;
last
; };
/^HCMV/ &&
do
{
$ncbi_taxid
= 10359;
last
; };
/^HSV/ &&
do
{
$ncbi_taxid
= 126283;
last
; };
/^HSV-2/ &&
do
{
$ncbi_taxid
= 10310;
last
; };
/^HCV/ &&
do
{
$ncbi_taxid
= 11108;
last
; };
/^HIV-2/ &&
do
{
$ncbi_taxid
= 11709;
last
; };
last
;
};
$first_letter
eq
'M'
&&
do
{
/^MMTV/ &&
do
{
$ncbi_taxid
= 11757;
last
; };
/^Mo-MuLV/ &&
do
{
$ncbi_taxid
= 11801;
last
; };
/^MuLV/ &&
do
{
$ncbi_taxid
= 11786;
last
; };
/^MSV/ &&
do
{
$ncbi_taxid
= 11802;
last
; };
/^MC29/ &&
do
{
$ncbi_taxid
= 11868;
last
; };
/^MVM/ &&
do
{
$ncbi_taxid
= 10794;
last
; };
/^MH2E21/ &&
do
{
$ncbi_taxid
= 11955;
last
; };
last
;
};
$first_letter
eq
'R'
&&
do
{
/^Raphanus sativus/ &&
do
{
$ncbi_taxid
= 3726;
last
; };
/^REV-T/ &&
do
{
$ncbi_taxid
= 11636;
last
; };
/^RAV-0/ &&
do
{
$ncbi_taxid
= 11867;
last
; };
/^RSV/ &&
do
{
$ncbi_taxid
= 11886;
last
; };
/^RadLV/ &&
do
{
$ncbi_taxid
= 31689;
last
; };
/^RTBV/ &&
do
{
$ncbi_taxid
= 10654;
last
; };
last
;
};
$first_letter
eq
'S'
&&
do
{
/^SV40/ &&
do
{
$ncbi_taxid
= 10633;
last
; };
/^Sesbania rostrata/ &&
do
{
$ncbi_taxid
= 3895;
last
; };
/^SIV/ &&
do
{
$ncbi_taxid
= 11723;
last
; };
/^Spinacia oleracea/ &&
do
{
$ncbi_taxid
= 3562;
last
; };
/^SCMV/ &&
do
{
$ncbi_taxid
= 10364;
last
; };
last
;
};
$first_letter
eq
'a'
&&
do
{
/^adenovirus type 5/ &&
do
{
$ncbi_taxid
= 28285;
last
; };
/^adenovirus type 2/ &&
do
{
$ncbi_taxid
= 10515;
last
; };
/^adenovirus/ &&
do
{
$ncbi_taxid
= 189831;
last
; };
last
;
};
$first_letter
eq
'b'
&&
do
{
/^bell pepper/ &&
do
{
$ncbi_taxid
= 4072;
last
; };
/^baculovirus, Autographa californica/ &&
do
{
$ncbi_taxid
= 46015;
last
; };
/^broccoli/ &&
do
{
$ncbi_taxid
= 36774;
last
; };
/^barley/ &&
do
{
$ncbi_taxid
= 112509;
last
; };
last
;
};
$first_letter
eq
'c'
&&
do
{
/^clawed frog/ &&
do
{
$ncbi_taxid
= 8355;
last
; };
/^chipmunk/ &&
do
{
$ncbi_taxid
= 64680;
last
; };
/^common tree shrew/ &&
do
{
$ncbi_taxid
= 37347;
last
; };
/^cat/ &&
do
{
$ncbi_taxid
= 9685;
last
; };
last
;
};
/^NK24/ &&
do
{
$ncbi_taxid
= 11955;
last
; };
/^OK10/ &&
do
{
$ncbi_taxid
= 11871;
last
; };
/^Dendrobium grex/ &&
do
{
$ncbi_taxid
= 84618;
last
; };
/^KSHV/ &&
do
{
$ncbi_taxid
= 37296;
last
; };
/^Oncidium/ &&
do
{
$ncbi_taxid
= 96474;
last
; };
/^Japanese quail/ &&
do
{
$ncbi_taxid
= 93934;
last
; };
/^Nile tilapia/ &&
do
{
$ncbi_taxid
= 8128;
last
; };
/^GALV/ &&
do
{
$ncbi_taxid
= 11840;
last
; };
/^JCV/ &&
do
{
$ncbi_taxid
= 10632;
last
; };
/^LPV/ &&
do
{
$ncbi_taxid
= 10574;
last
; };
/^Py,/ &&
do
{
$ncbi_taxid
= 36362;
last
; };
/^DHBV/ &&
do
{
$ncbi_taxid
= 12639;
last
; };
/^VZV/ &&
do
{
$ncbi_taxid
= 10335;
last
; };
/^Vicia faba/ &&
do
{
$ncbi_taxid
= 3906;
last
; };
/^hamster/ &&
do
{
$ncbi_taxid
= 10029;
last
; };
/^sea urchin/ &&
do
{
$ncbi_taxid
= 7668;
last
; };
/^fruit fly/ &&
do
{
$ncbi_taxid
= 7227;
last
; };
/^halibut/ &&
do
{
$ncbi_taxid
= 8267;
last
; };
/^vaccinia virus/ &&
do
{
$ncbi_taxid
= 10245;
last
; };
/^taxonomic class Mammalia/ &&
do
{
$ncbi_taxid
= 40674;
last
; };
/^taxonomic class Vertebrata/ &&
do
{
$ncbi_taxid
= 7742;
last
; };
/^dog/ &&
do
{
$ncbi_taxid
= 9615;
last
; };
/^parsley/ &&
do
{
$ncbi_taxid
= 4043;
last
; };
/^mouse, Mus domesticus Torino/ &&
do
{
$ncbi_taxid
= 10092;
last
; };
/^lemur, Eulemur fulvus collaris/ &&
do
{
$ncbi_taxid
= 47178;
last
; };
/^red sea bream/ &&
do
{
$ncbi_taxid
= 143350;
last
; };
/^zebra finch/ &&
do
{
$ncbi_taxid
= 59729;
last
; };
/^mung bean/ &&
do
{
$ncbi_taxid
= 3916;
last
; };
/^soybean/ &&
do
{
$ncbi_taxid
= 3847;
last
; };
/^oat/ &&
do
{
$ncbi_taxid
= 4498;
last
; };
/^pseudorabies virus/ &&
do
{
$ncbi_taxid
= 10345;
last
; };
}
}
$self
->
warn
(
"Didn't know what species '$raw_species' was, unable to classify"
)
unless
$ncbi_taxid
;
return
$ncbi_taxid
;
}
1;