$Bio::DB::SeqFeature::Store::DBI::SQLite::VERSION
=
'1.7.5'
;
use
Cwd
qw(abs_path getcwd)
;
my
(
@BINS
,
%BINS
);
{
@BINS
=
map
{2*
*$_
} (17, 20, 23, 26, 29);
my
$start
=0;
for
my
$b
(
sort
{
$b
<=>
$a
}
@BINS
) {
$BINS
{
$b
} =
$start
;
$start
+=
$BINS
[-1]/
$b
;
}
}
sub
calculate_bin {
my
$self
=
shift
;
my
(
$start
,
$end
) =
@_
;
my
$len
=
$end
-
$start
;
for
my
$bin
(
@BINS
) {
next
if
$len
>
$bin
;
my
$binstart
=
int
$start
/
$bin
;
my
$binend
=
int
$end
/
$bin
;
return
$binstart
+
$BINS
{
$bin
}
if
$binstart
==
$binend
;
}
die
"unreasonable coordinates "
,
$start
+1,
"..$end"
;
}
sub
search_bins {
my
$self
=
shift
;
my
(
$start
,
$end
) =
@_
;
my
@results
;
for
my
$bin
(
@BINS
) {
my
$binstart
=
int
$start
/
$bin
;
my
$binend
=
int
$end
/
$bin
;
push
@results
,
$binstart
+
$BINS
{
$bin
}..
$binend
+
$BINS
{
$bin
};
}
return
@results
;
}
sub
init {
my
$self
=
shift
;
my
(
$dsn
,
$is_temporary
,
$autoindex
,
$namespace
,
$dump_dir
,
$user
,
$pass
,
$dbi_options
,
$writeable
,
$fts
,
$create
,
) = rearrange([
'DSN'
,
[
'TEMP'
,
'TEMPORARY'
],
'AUTOINDEX'
,
'NAMESPACE'
,
[
'DUMP_DIR'
,
'DUMPDIR'
,
'TMP'
,
'TMPDIR'
],
'USER'
,
[
'PASS'
,
'PASSWD'
,
'PASSWORD'
],
[
'OPTIONS'
,
'DBI_OPTIONS'
,
'DBI_ATTR'
],
[
'WRITE'
,
'WRITEABLE'
],
'FTS'
,
'CREATE'
,
],
@_
);
$dbi_options
||= {};
$writeable
= 1
if
$is_temporary
or
$dump_dir
;
$dsn
or
$self
->throw(
"Usage: "
.__PACKAGE__.
"->init(-dsn => \$dbh || \$dsn)"
);
my
$dbh
;
if
(
ref
$dsn
) {
$dbh
=
$dsn
;
}
else
{
$dsn
=
"dbi:SQLite:$dsn"
unless
$dsn
=~ /^dbi:/;
$dbh
= DBI->
connect
(
$dsn
,
$user
,
$pass
,
$dbi_options
) or
$self
->throw(
$DBI::errstr
);
$dbh
->
do
(
"PRAGMA synchronous = OFF;"
);
$dbh
->
do
(
"PRAGMA temp_store = MEMORY;"
);
$dbh
->
do
(
"PRAGMA cache_size = 20000;"
);
my
$cwd
= getcwd;
my
(
$db_file
) = (
$dsn
=~ m/(?:db(?:name)?|database)=(.+)$/);
$self
->{dbh_file} =
"$cwd/$db_file"
;
}
$self
->{dbh} =
$dbh
;
$self
->{fts} =
$fts
;
$self
->{is_temp} =
$is_temporary
;
$self
->{namespace} =
$namespace
;
$self
->{writeable} =
$writeable
;
$self
->default_settings;
$self
->autoindex(
$autoindex
)
if
defined
$autoindex
;
$self
->dumpdir(
$dump_dir
)
if
$dump_dir
;
if
(
$self
->is_temp) {
$self
->init_tmp_database();
}
elsif
(
$create
) {
$self
->init_database(
'erase'
);
}
}
sub
table_definitions {
my
$self
=
shift
;
my
$defs
=
{
feature
=>
<<END,
(
id integer primary key autoincrement,
typeid integer not null,
strand integer default 0,
"indexed" integer default 1,
object blob not null
)
END
locationlist
=>
<<END,
(
id integer primary key autoincrement,
seqname text not null
);
create index index_locationlist on locationlist (seqname);
END
typelist
=>
<<END,
(
id integer primary key autoincrement,
tag text not null collate nocase
);
create index index_typelist on typelist (tag);
END
name
=>
<<END,
(
id integer not null,
name text not null collate nocase,
display_name integer default 0
);
create index index_name_id on name(id);
create index index_name_name on name(name);
END
attribute
=>
<<END,
(
id integer not null,
attribute_id integer not null,
attribute_value text collate nocase
);
create index index_attribute_id on attribute(attribute_id);
create index index_attribute_value on attribute(attribute_value);
END
attributelist
=>
<<END,
(
id integer primary key autoincrement,
tag text not null
);
create index index_attributelist_tag on attributelist(tag);
END
parent2child
=>
<<END,
(
id integer,
child integer,
primary key(id, child)
) without rowid;
END
meta
=>
<<END,
(
name text primary key,
value text not null
)
END
sequence
=>
<<END,
(
id integer not null,
offset integer not null,
sequence blob,
primary key(id,offset)
)
END
};
if
(
$self
->{
'fts'
}) {
delete
(
$defs
->{attribute});
}
unless
(
$self
->_has_spatial_index) {
$defs
->{feature_location} =
<<END;
(
id int(10) primary key,
seqid int(10),
bin int,
start int,
end int
);
create index index_feature_location on feature_location(seqid,bin,start,end);
END
}
if
(EXPERIMENTAL_COVERAGE) {
$defs
->{interval_stats} =
<<END;
(
typeid integer not null,
seqid integer not null,
bin integer not null,
cum_count integer not null,
primary key (typeid,seqid,bin)
) without rowid;
END
}
return
$defs
;
}
sub
_init_database {
my
$self
=
shift
;
$self
->_create_spatial_index;
$self
->_create_attribute_fts;
$self
->SUPER::_init_database(
@_
);
}
sub
init_tmp_database {
my
$self
=
shift
;
my
$erase
=
shift
;
$self
->_create_spatial_index;
$self
->_create_attribute_fts;
$self
->SUPER::init_tmp_database(
@_
);
}
sub
_create_spatial_index{
my
$self
=
shift
;
my
$dbh
=
$self
->dbh;
local
$dbh
->{PrintError} = 0;
$dbh
->
do
(
"DROP TABLE IF EXISTS feature_index"
);
if
(USE_SPATIAL) {
$dbh
->
do
(
"CREATE VIRTUAL TABLE feature_index USING RTREE(id,seqid,bin,start,end)"
);
}
}
sub
_create_attribute_fts{
my
$self
=
shift
;
my
$dbh
=
$self
->dbh;
if
(
$self
->{
'fts'
}) {
my
@fts_versions
;
for
(
@fts_versions
=
grep
(/^ENABLE_FTS[0-9]+$/, DBD::SQLite::compile_options)) { s/ENABLE_// }
die
'fts not supported by this version of DBD::SQLite'
if
(!
@fts_versions
);
$dbh
->
do
(
"DROP TABLE IF EXISTS attribute"
);
$dbh
->
do
(
"CREATE VIRTUAL TABLE "
.
$self
->_attribute_table
.
" USING "
.
$fts_versions
[-1]
.
"(id, attribute_id, attribute_value)"
);
}
}
sub
_has_fts {
my
$self
=
shift
;
if
(!
defined
(
$self
->{
'has_fts'
})) {
(
$self
->{
'has_fts'
}) =
$self
->dbh->selectrow_array(
"select count(*) from sqlite_master where type = 'table' and name = '"
.
$self
->_attribute_table
.
"' and (rootpage = 0 or rootpage is null);"
);
}
return
$self
->{
'has_fts'
};
}
sub
_has_spatial_index {
my
$self
=
shift
;
return
$self
->{
'_has_spatial_index'
}
if
exists
$self
->{
'_has_spatial_index'
};
my
$dbh
=
$self
->dbh;
my
(
$count
) =
$dbh
->selectrow_array(
"select count(*) from sqlite_master where name='feature_index'"
);
return
$self
->{
'_has_spatial_index'
} =
$count
;
}
sub
_finish_bulk_update {
my
$self
=
shift
;
my
$dbh
=
$self
->dbh;
my
$dir
=
$self
->{dumpdir} ||
'.'
;
$self
->begin_work;
for
my
$table
(
'feature'
,
$self
->index_tables) {
my
$fh
=
$self
->dump_filehandle(
$table
);
my
$path
=
$self
->dump_path(
$table
);
$fh
->
close
;
open
$fh
,
'<'
,
$path
or
$self
->throw(
"Could not read file '$path': $!"
);
my
$qualified_table
=
$self
->_qualify(
$table
);
my
$sth
;
if
(
$table
=~ /feature$/) {
$sth
=
$dbh
->prepare(
"REPLACE INTO $qualified_table VALUES (?,?,?,?,?)"
);
while
(<
$fh
>) {
chomp
();
my
(
$id
,
$typeid
,
$strand
,
$indexed
,
$obj
) =
split
(/\t/);
$sth
->bind_param(1,
$id
);
$sth
->bind_param(2,
$typeid
);
$sth
->bind_param(3,
$strand
);
$sth
->bind_param(4,
$indexed
);
$sth
->bind_param(5,
pack
(
'H*'
,
$obj
), {
TYPE
=> SQL_BLOB});
$sth
->execute();
}
}
else
{
my
$feature_index
=
$self
->_feature_index_table;
if
(
$table
=~ /parent2child$/) {
$sth
=
$dbh
->prepare(
"REPLACE INTO $qualified_table VALUES (?,?)"
);
}
elsif
(
$table
=~ /
$feature_index
$/) {
$sth
=
$dbh
->prepare(
$self
->_has_spatial_index ?
"REPLACE INTO $qualified_table VALUES (?,?,?,?,?)"
:
"REPLACE INTO $qualified_table (id,seqid,bin,start,end) VALUES (?,?,?,?,?)"
);
}
else
{
$sth
=
$dbh
->prepare(
"REPLACE INTO $qualified_table VALUES (?,?,?)"
);
}
while
(<
$fh
>) {
chomp
();
$sth
->execute(
split
(/\t/));
}
}
$fh
->
close
();
unlink
$path
;
}
$self
->commit;
delete
$self
->{bulk_update_in_progress};
delete
$self
->{filehandles};
}
sub
index_tables {
my
$self
=
shift
;
my
@t
=
$self
->SUPER::index_tables;
return
(
@t
,
$self
->_feature_index_table);
}
sub
_enable_keys { }
sub
_disable_keys { }
sub
_fetch_indexed_features_sql {
my
$self
=
shift
;
my
$location_table
=
$self
->_qualify(
'feature_location'
);
my
$feature_table
=
$self
->_qualify(
'feature'
);
return
<<END;
SELECT typeid,seqid,start-1,end
FROM $location_table as l,$feature_table as f
WHERE l.id=f.id AND f.\"indexed\"=1
ORDER BY typeid,seqid,start
END
}
sub
_fetch_sequence {
my
$self
=
shift
;
my
(
$seqid
,
$start
,
$end
) =
@_
;
my
$reversed
;
if
(
defined
$start
&&
defined
$end
&&
$start
>
$end
) {
$reversed
++;
(
$start
,
$end
) = (
$end
,
$start
);
}
$start
--
if
defined
$start
;
$end
--
if
defined
$end
;
my
$offset1
=
$self
->_offset_boundary(
$seqid
,
$start
||
'left'
);
my
$offset2
=
$self
->_offset_boundary(
$seqid
,
$end
||
'right'
);
my
$sequence_table
=
$self
->_sequence_table;
my
$locationlist_table
=
$self
->_locationlist_table;
my
$sth
=
$self
->_prepare(
<<END);
SELECT sequence,offset
FROM $locationlist_table as ll CROSS JOIN $sequence_table as s
WHERE ll.id=s.id
AND ll.seqname= ?
AND offset >= ?
AND offset <= ?
ORDER BY offset
END
my
$seq
=
''
;
$sth
->execute(
$seqid
,
$offset1
,
$offset2
) or
$self
->throw(
$sth
->errstr);
while
(
my
(
$frag
,
$offset
) =
$sth
->fetchrow_array) {
substr
(
$frag
,0,
$start
-
$offset
) =
''
if
defined
$start
&&
$start
>
$offset
;
$seq
.=
$frag
;
}
substr
(
$seq
,
$end
-
$start
+1) =
''
if
defined
$end
&&
$end
-
$start
+1 <
length
(
$seq
);
if
(
$reversed
) {
$seq
=
reverse
$seq
;
$seq
=~
tr
/gatcGATC/ctagCTAG/;
}
$sth
->finish;
$seq
;
}
sub
_offset_boundary {
my
$self
=
shift
;
my
(
$seqid
,
$position
) =
@_
;
my
$sequence_table
=
$self
->_sequence_table;
my
$locationlist_table
=
$self
->_locationlist_table;
my
$sql
;
$sql
=
$position
eq
'left'
?
"SELECT min(offset) FROM $locationlist_table as ll CROSS JOIN $sequence_table as s ON ll.id=s.id WHERE ll.seqname=?"
:
$position
eq
'right'
?
"SELECT max(offset) FROM $locationlist_table as ll CROSS JOIN $sequence_table as s ON ll.id=s.id WHERE ll.seqname=?"
:
"SELECT max(offset) FROM $locationlist_table as ll CROSS JOIN $sequence_table as s ON ll.id=s.id WHERE ll.seqname=? AND offset<=?"
;
my
$sth
=
$self
->_prepare(
$sql
);
my
@args
=
$position
=~ /^-?\d+$/ ? (
$seqid
,
$position
) : (
$seqid
);
$sth
->execute(
@args
) or
$self
->throw(
$sth
->errstr);
my
$boundary
=
$sth
->fetchall_arrayref->[0][0];
$sth
->finish;
return
$boundary
;
}
sub
_fetch_many {
my
$self
=
shift
;
@_
or
$self
->throw(
'usage: fetch_many($id1,$id2,$id3...)'
);
my
$ids
=
join
','
,
map
{
ref
(
$_
) ?
@$_
:
$_
}
@_
or
return
;
my
$features
=
$self
->_feature_table;
my
$sth
=
$self
->_prepare(
<<END);
SELECT id,object FROM $features WHERE id IN ($ids)
END
$sth
->execute() or
$self
->throw(
$sth
->errstr);
return
$self
->_sth2objs(
$sth
);
}
sub
_features {
my
$self
=
shift
;
my
(
$seq_id
,
$start
,
$end
,
$strand
,
$name
,
$class
,
$allow_aliases
,
$types
,
$attributes
,
$range_type
,
$fromtable
,
$iterator
,
$sources
) = rearrange([[
'SEQID'
,
'SEQ_ID'
,
'REF'
],
'START'
,[
'STOP'
,
'END'
],
'STRAND'
,
'NAME'
,
'CLASS'
,
'ALIASES'
,
[
'TYPES'
,
'TYPE'
,
'PRIMARY_TAG'
],
[
'ATTRIBUTES'
,
'ATTRIBUTE'
],
'RANGE_TYPE'
,
'FROM_TABLE'
,
'ITERATOR'
,
[
'SOURCE'
,
'SOURCES'
]
],
@_
);
my
(
@from
,
@where
,
@args
,
@group
);
$range_type
||=
'overlaps'
;
my
$feature_table
=
$self
->_feature_table;
@from
=
"$feature_table as f"
;
if
(
defined
$name
) {
undef
$class
if
$class
&&
$class
eq
'Sequence'
;
$name
=
"$class:$name"
if
defined
$class
&&
length
$class
> 0;
my
(
$from
,
$where
,
$group
,
@a
) =
$self
->_name_sql(
$name
,
$allow_aliases
,
'f.id'
);
push
@from
,
$from
if
$from
;
push
@where
,
$where
if
$where
;
push
@group
,
$group
if
$group
;
push
@args
,
@a
;
}
if
(
defined
$seq_id
) {
my
(
$from
,
$where
,
$group
,
@a
) =
$self
->_location_sql(
$seq_id
,
$start
,
$end
,
$range_type
,
$strand
,
'f'
);
push
@from
,
$from
if
$from
;
push
@where
,
$where
if
$where
;
push
@group
,
$group
if
$group
;
push
@args
,
@a
;
}
if
(
defined
(
$sources
)) {
my
@sources
=
ref
(
$sources
) eq
'ARRAY'
? @{
$sources
} : (
$sources
);
if
(
defined
(
$types
)) {
my
@types
=
ref
(
$types
) eq
'ARRAY'
? @{
$types
} : (
$types
);
my
@final_types
;
foreach
my
$type
(
@types
) {
if
(
$type
=~ /:/) {
push
(
@final_types
,
$type
);
}
else
{
foreach
my
$source
(
@sources
) {
push
(
@final_types
,
$type
.
':'
.
$source
);
}
}
}
$types
= \
@final_types
;
}
else
{
$types
= [
map
{
':'
.
$_
}
@sources
];
}
}
if
(
defined
(
$types
)) {
my
(
$from
,
$where
,
$group
,
@a
) =
$self
->_types_sql(
$types
,
'f'
);
push
@from
,
$from
if
$from
;
push
@where
,
$where
if
$where
;
push
@group
,
$group
if
$group
;
push
@args
,
@a
;
}
if
(
defined
$attributes
) {
my
(
$from
,
$where
,
$group
,
@a
) =
$self
->_attributes_sql(
$attributes
,
'f.id'
);
push
@from
,
$from
if
$from
;
push
@where
,
$where
if
$where
;
push
@group
,
$group
if
$group
;
push
@args
,
@a
;
}
if
(
defined
$fromtable
) {
my
(
$from
,
$where
,
$group
,
@a
) =
$self
->_from_table_sql(
$fromtable
,
'f.id'
);
push
@from
,
$from
if
$from
;
push
@where
,
$where
if
$where
;
push
@group
,
$group
if
$group
;
push
@args
,
@a
;
}
@where
=
'"indexed"=1'
unless
@where
;
my
$from
=
join
', '
,
@from
;
my
$where
=
join
' AND '
,
map
{
"($_)"
}
@where
;
my
$group
=
join
', '
,
@group
;
$group
=
"GROUP BY $group"
if
@group
;
my
$query
=
<<END;
SELECT f.id,f.object
FROM $from
WHERE $where
$group
END
$self
->_print_query(
$query
,
@args
)
if
DEBUG ||
$self
->debug;
my
$sth
=
$self
->_prepare(
$query
);
$sth
->execute(
@args
) or
$self
->throw(
$sth
->errstr);
return
$iterator
? Bio::DB::SeqFeature::Store::DBI::Iterator->new(
$sth
,
$self
) :
$self
->_sth2objs(
$sth
);
}
sub
_make_attribute_group {
my
$self
=
shift
;
my
(
$table_name
,
$attributes
) =
@_
;
my
$key_count
=
keys
%$attributes
or
return
;
my
$count
=
$key_count
-1;
return
"f.id HAVING count(f.id)>$count"
;
}
sub
_location_sql {
my
$self
=
shift
;
my
(
$seq_id
,
$start
,
$end
,
$range_type
,
$strand
,
$location
) =
@_
;
my
$seqid
=
$self
->_locationid_nocreate(
$seq_id
) || 0;
my
$feature_index
=
$self
->_feature_index_table;
my
$from
=
"$feature_index as fi"
;
my
(
$bin_where
,
@bin_args
);
if
(
defined
$start
&&
defined
$end
&& !
$self
->_has_spatial_index) {
my
@bins
=
$self
->search_bins(
$start
,
$end
);
$bin_where
=
' AND bin in ('
.
join
(
','
,
@bins
).
')'
;
}
$start
= MIN_INT
unless
defined
$start
;
$end
= MAX_INT
unless
defined
$end
;
my
(
$range
,
@range_args
);
if
(
$range_type
eq
'overlaps'
) {
$range
=
"fi.end>=? AND fi.start<=?"
.
$bin_where
;
@range_args
= (
$start
,
$end
,
@bin_args
);
}
elsif
(
$range_type
eq
'contains'
) {
$range
=
"fi.start>=? AND fi.end<=?"
.
$bin_where
;
@range_args
= (
$start
,
$end
,
@bin_args
);
}
elsif
(
$range_type
eq
'contained_in'
) {
$range
=
"fi.start<=? AND fi.end>=?"
;
@range_args
= (
$start
,
$end
);
}
else
{
$self
->throw(
"range_type must be one of 'overlaps', 'contains' or 'contained_in'"
);
}
if
(
defined
$strand
) {
$range
.=
" AND strand=?"
;
push
@range_args
,
$strand
;
}
my
$where
=
<<END;
fi.seqid=?
AND $location.id=fi.id
AND $range
END
;
my
$group
=
''
;
my
@args
= (
$seqid
,
@range_args
);
return
(
$from
,
$where
,
$group
,
@args
);
}
sub
_feature_index_table {
my
$self
=
shift
;
return
$self
->_has_spatial_index ?
$self
->_qualify(
'feature_index'
)
:
$self
->_qualify(
'feature_location'
) }
sub
_name_sql {
my
$self
=
shift
;
my
(
$name
,
$allow_aliases
,
$join
) =
@_
;
my
$name_table
=
$self
->_name_table;
my
$from
=
"$name_table as n"
;
my
(
$match
,
$string
) =
$self
->_match_sql(
$name
);
my
$where
=
"n.id=$join AND n.name $match COLLATE NOCASE"
;
$where
.=
" AND n.display_name>0"
unless
$allow_aliases
;
return
(
$from
,
$where
,
''
,
$string
);
}
sub
_search_attributes {
my
$self
=
shift
;
my
(
$search_string
,
$attribute_names
,
$limit
) =
@_
;
my
@words
=
map
{
quotemeta
(
$_
)}
split
/\s+/,
$search_string
;
my
$name_table
=
$self
->_name_table;
my
$attribute_table
=
$self
->_attribute_table;
my
$attributelist_table
=
$self
->_attributelist_table;
my
$type_table
=
$self
->_type_table;
my
$typelist_table
=
$self
->_typelist_table;
my
$has_fts
=
$self
->_has_fts;
my
@tags
=
@$attribute_names
;
my
$tag_sql
=
join
' OR '
,(
"al.tag=?"
) x
@tags
;
my
$perl_regexp
=
join
'|'
,
@words
;
my
$sql_regexp
;
my
@wild_card_words
;
if
(
$has_fts
) {
$sql_regexp
=
"a.attribute_value MATCH ?"
;
@wild_card_words
=
join
(
' OR '
,
@words
);
}
else
{
$sql_regexp
=
join
' OR '
,(
"a.attribute_value LIKE ?"
) x
@words
;
@wild_card_words
=
map
{
"%$_%"
}
@words
;
}
my
$sql
=
<<END;
SELECT name,attribute_value,tl.tag,n.id
FROM $attributelist_table AS al
JOIN $attribute_table AS a ON al.id = a.attribute_id
JOIN $name_table AS n ON n.id = a.id
JOIN $type_table AS t ON t.id = n.id
JOIN $typelist_table AS tl ON tl.id = t.typeid
WHERE ($tag_sql)
AND ($sql_regexp)
AND n.display_name=1
END
$sql
.=
"LIMIT $limit"
if
defined
$limit
;
$self
->_print_query(
$sql
,
@tags
,
@wild_card_words
)
if
DEBUG ||
$self
->debug;
my
$sth
=
$self
->_prepare(
$sql
);
$sth
->execute(
@tags
,
@wild_card_words
) or
$self
->throw(
$sth
->errstr);
my
@results
;
while
(
my
(
$name
,
$value
,
$type
,
$id
) =
$sth
->fetchrow_array) {
my
(
@hits
) =
$value
=~ /
$perl_regexp
/ig;
my
@words_in_row
=
split
/\b/,
$value
;
my
$score
=
int
(
@hits
*100/
@words
/
@words_in_row
);
push
@results
,[
$name
,
$value
,
$score
,
$type
,
$id
];
}
$sth
->finish;
@results
=
sort
{
$b
->[2]<=>
$a
->[2]}
@results
;
return
@results
;
}
sub
_match_sql {
my
$self
=
shift
;
my
$name
=
shift
;
my
(
$match
,
$string
);
if
(
$name
=~ /(?:^|[^\\])[*?]/) {
$name
=~ s/(^|[^\\])([
%_
])/$1\\$2/g;
$name
=~ s/(^|[^\\])\*/$1%/g;
$name
=~ s/(^|[^\\])\?/$1_/g;
$match
=
"LIKE ?"
;
$string
=
$name
;
}
else
{
$match
=
"= ? COLLATE NOCASE"
;
$string
=
$name
;
}
return
(
$match
,
$string
);
}
sub
_attributes_sql {
my
$self
=
shift
;
my
(
$attributes
,
$join
) =
@_
;
my
(
$wf
,
@bind_args
) =
$self
->_make_attribute_where(
'a'
,
'al'
,
$attributes
);
my
(
$group_by
,
@group_args
)=
$self
->_make_attribute_group(
'a'
,
$attributes
);
my
$attribute_table
=
$self
->_attribute_table;
my
$attributelist_table
=
$self
->_attributelist_table;
my
$from
=
"$attribute_table AS a"
. (
$self
->_has_fts
?
''
:
" INDEXED BY index_attribute_id"
) .
", $attributelist_table AS al"
;
my
$a_al_join
=
$self
->_has_fts ?
'a.attribute_id MATCH al.id'
:
'a.attribute_id=al.id'
;
my
$where
=
<<END;
a.id=$join
AND $a_al_join
AND ($wf)
END
my
$group
=
$group_by
;
my
@args
= (
@bind_args
,
@group_args
);
return
(
$from
,
$where
,
$group
,
@args
);
}
sub
_types_sql {
my
$self
=
shift
;
my
(
$types
,
$type_table
) =
@_
;
my
(
$primary_tag
,
$source_tag
);
my
@types
=
ref
$types
eq
'ARRAY'
?
@$types
:
$types
;
my
$typelist
=
$self
->_typelist_table;
my
$from
=
"$typelist AS tl"
;
my
(
@matches
,
@args
);
for
my
$type
(
@types
) {
if
(
ref
$type
&&
$type
->isa(
'Bio::DB::GFF::Typename'
)) {
$primary_tag
=
$type
->method;
$source_tag
=
$type
->source;
}
else
{
(
$primary_tag
,
$source_tag
) =
split
':'
,
$type
,2;
}
if
(
length
$source_tag
) {
push
@matches
,
"tl.tag=? COLLATE NOCASE"
;
push
@args
,
"$primary_tag:$source_tag"
;
}
else
{
push
@matches
,
"tl.tag LIKE ?"
;
push
@args
,
"$primary_tag:%"
;
}
}
my
$matches
=
join
' OR '
,
@matches
;
my
$where
=
<<END;
tl.id=$type_table.typeid
AND ($matches)
END
return
(
$from
,
$where
,
''
,
@args
);
}
sub
optimize {
my
$self
=
shift
;
$self
->dbh->
do
(
"ANALYZE $_"
)
foreach
$self
->index_tables;
}
sub
replace {
my
$self
=
shift
;
my
$object
=
shift
;
my
$index_flag
=
shift
||
undef
;
my
$id
=
$object
->primary_id;
my
$features
=
$self
->_feature_table;
my
$sth
=
$self
->_prepare(
<<END);
REPLACE INTO $features (id,object,"indexed",strand,typeid) VALUES (?,?,?,?,?)
END
my
(
$seqid
,
$start
,
$end
,
$strand
,
$bin
) =
$index_flag
?
$self
->_get_location_and_bin(
$object
) : (
undef
)x6;
my
$primary_tag
=
$object
->primary_tag;
my
$source_tag
=
$object
->source_tag ||
''
;
$primary_tag
.=
":$source_tag"
;
my
$typeid
=
$self
->_typeid(
$primary_tag
,1);
my
$frozen
=
$self
->no_blobs() ? 0 :
$self
->freeze(
$object
);
$sth
->bind_param(1,
$id
);
$sth
->bind_param(2,
$frozen
, {
TYPE
=> SQL_BLOB});
$sth
->bind_param(3,
$index_flag
||0);
$sth
->bind_param(4,
$strand
);
$sth
->bind_param(5,
$typeid
);
$sth
->execute() or
$self
->throw(
$sth
->errstr);
my
$dbh
=
$self
->dbh;
$object
->primary_id(
$dbh
->func(
'last_insert_rowid'
))
unless
defined
$id
;
$self
->flag_for_indexing(
$dbh
->func(
'last_insert_rowid'
))
if
$self
->{bulk_update_in_progress};
}
sub
bulk_replace {
my
$self
=
shift
;
my
$index_flag
=
shift
||
undef
;
my
@objects
=
@_
;
my
$features
=
$self
->_feature_table;
my
@insert_values
;
foreach
my
$object
(
@objects
) {
my
$id
=
$object
->primary_id;
my
(
undef
,
undef
,
undef
,
$strand
) =
$index_flag
?
$self
->_get_location_and_bin(
$object
) : (
undef
)x4;
my
$primary_tag
=
$object
->primary_tag;
my
$source_tag
=
$object
->source_tag ||
''
;
$primary_tag
.=
":$source_tag"
;
my
$typeid
=
$self
->_typeid(
$primary_tag
,1);
push
(
@insert_values
, (
$id
,0,
$index_flag
||0,
$strand
,
$typeid
));
}
my
@value_blocks
;
for
(1..
@objects
) {
push
(
@value_blocks
,
'(?,?,?,?,?)'
);
}
my
$value_blocks
=
join
(
','
,
@value_blocks
);
my
$sql
=
qq{REPLACE INTO $features (id,object,"indexed",strand,typeid) VALUES $value_blocks}
;
my
$sth
=
$self
->_prepare(
$sql
);
$sth
->execute(
@insert_values
) or
$self
->throw(
$sth
->errstr);
}
sub
_get_location_and_bin {
my
$self
=
shift
;
my
$obj
=
shift
;
my
$seqid
=
$self
->_locationid(
$obj
->seq_id||
''
);
my
$start
=
$obj
->start;
my
$end
=
$obj
->end;
my
$strand
=
$obj
->strand;
return
(
$seqid
,
$start
,
$end
,
$strand
,
$self
->calculate_bin(
$start
,
$end
));
}
sub
insert {
my
$self
=
shift
;
my
$object
=
shift
;
my
$index_flag
=
shift
|| 0;
$self
->_load_class(
$object
);
defined
$object
->primary_id and
$self
->throw(
"$object already has a primary id"
);
my
$features
=
$self
->_feature_table;
my
$sth
=
$self
->_prepare(
<<END);
INSERT INTO $features (id,object,"indexed") VALUES (?,?,?)
END
$sth
->execute(
undef
,
$self
->freeze(
$object
),
$index_flag
) or
$self
->throw(
$sth
->errstr);
my
$dbh
=
$self
->dbh;
$object
->primary_id(
$dbh
->func(
'last_insert_rowid'
));
$self
->flag_for_indexing(
$dbh
->func(
'last_insert_rowid'
))
if
$self
->{bulk_update_in_progress};
}
sub
toplevel_types {
my
$self
=
shift
;
eval
"require Bio::DB::GFF::Typename"
unless
Bio::DB::GFF::Typename->can(
'new'
);
my
$typelist_table
=
$self
->_typelist_table;
my
$feature_table
=
$self
->_feature_table;
my
$sql
=
<<END;
SELECT distinct(tag) from $typelist_table as tl,$feature_table as f
WHERE tl.id=f.typeid
AND f."indexed"=1
END
;
$self
->_print_query(
$sql
)
if
DEBUG ||
$self
->debug;
my
$sth
=
$self
->_prepare(
$sql
);
$sth
->execute() or
$self
->throw(
$sth
->errstr);
my
@results
;
while
(
my
(
$tag
) =
$sth
->fetchrow_array) {
push
@results
,Bio::DB::GFF::Typename->new(
$tag
);
}
$sth
->finish;
return
@results
;
}
sub
_genericid {
my
$self
=
shift
;
my
(
$table
,
$namefield
,
$name
,
$add_if_missing
) =
@_
;
my
$qualified_table
=
$self
->_qualify(
$table
);
my
$sth
=
$self
->_prepare(
<<END);
SELECT id FROM $qualified_table WHERE $namefield=? COLLATE NOCASE
END
$sth
->execute(
$name
) or
die
$sth
->errstr;
my
(
$id
) =
$sth
->fetchrow_array;
$sth
->finish;
return
$id
if
defined
$id
;
return
unless
$add_if_missing
;
$sth
=
$self
->_prepare(
<<END);
INSERT INTO $qualified_table ($namefield) VALUES (?)
END
$sth
->execute(
$name
) or
die
$sth
->errstr;
my
$dbh
=
$self
->dbh;
return
$dbh
->func(
'last_insert_rowid'
);
}
sub
_dump_store {
my
$self
=
shift
;
my
$indexed
=
shift
;
my
$count
= 0;
my
$store_fh
=
$self
->dump_filehandle(
'feature'
);
my
$dbh
=
$self
->dbh;
my
$autoindex
=
$self
->autoindex;
for
my
$obj
(
@_
) {
my
$id
=
$self
->next_id;
my
(
$seqid
,
$start
,
$end
,
$strand
) =
$indexed
?
$self
->_get_location_and_bin(
$obj
) : (
undef
)x4;
my
$primary_tag
=
$obj
->primary_tag;
my
$source_tag
=
$obj
->source_tag ||
''
;
$primary_tag
.=
":$source_tag"
;
my
$typeid
=
$self
->_typeid(
$primary_tag
,1);
print
$store_fh
join
(
"\t"
,
$id
,
$typeid
,
$strand
,
$indexed
,
unpack
(
'H*'
,
$self
->freeze(
$obj
))),
"\n"
;
$obj
->primary_id(
$id
);
$self
->_update_indexes(
$obj
)
if
$indexed
&&
$autoindex
;
$count
++;
}
unless
(
$indexed
or
$self
->{indexed_flag}++) {
$self
->subfeatures_are_indexed(0);
}
$count
;
}
sub
_dump_update_name_index {
my
$self
=
shift
;
my
(
$obj
,
$id
) =
@_
;
my
$fh
=
$self
->dump_filehandle(
'name'
);
my
$dbh
=
$self
->dbh;
my
(
$names
,
$aliases
) =
$self
->feature_names(
$obj
);
print
$fh
join
(
"\t"
,
$id
,
$_
,1),
"\n"
foreach
@$names
;
print
$fh
join
(
"\t"
,
$id
,
$_
,0),
"\n"
foreach
@$aliases
;
}
sub
_update_name_index {
my
$self
=
shift
;
my
(
$obj
,
$id
) =
@_
;
my
$name
=
$self
->_name_table;
my
$primary_id
=
$obj
->primary_id;
$self
->_delete_index(
$name
,
$id
);
my
(
$names
,
$aliases
) =
$self
->feature_names(
$obj
);
my
$sth
=
$self
->_prepare(
"INSERT INTO $name (id,name,display_name) VALUES (?,?,?)"
);
$sth
->execute(
$id
,
$_
,1) or
$self
->throw(
$sth
->errstr)
foreach
@$names
;
$sth
->execute(
$id
,
$_
,0) or
$self
->throw(
$sth
->errstr)
foreach
@$aliases
;
$sth
->finish;
}
sub
_dump_update_attribute_index {
my
$self
=
shift
;
my
(
$obj
,
$id
) =
@_
;
my
$fh
=
$self
->dump_filehandle(
'attribute'
);
my
$dbh
=
$self
->dbh;
for
my
$tag
(
$obj
->all_tags) {
my
$tagid
=
$self
->_attributeid(
$tag
);
for
my
$value
(
$obj
->each_tag_value(
$tag
)) {
print
$fh
join
(
"\t"
,
$id
,
$tagid
,
$value
),
"\n"
;
}
}
}
sub
_update_indexes {
my
$self
=
shift
;
my
$obj
=
shift
;
defined
(
my
$id
=
$obj
->primary_id) or
return
;
$self
->SUPER::_update_indexes(
$obj
);
if
(
$self
->{bulk_update_in_progress}) {
$self
->_dump_update_location_index(
$obj
,
$id
);
}
else
{
$self
->_update_location_index(
$obj
,
$id
);
}
}
sub
_update_location_index {
my
$self
=
shift
;
my
(
$obj
,
$id
) =
@_
;
my
(
$seqid
,
$start
,
$end
,
$strand
,
$bin
) =
$self
->_get_location_and_bin(
$obj
);
my
$table
=
$self
->_feature_index_table;
$self
->_delete_index(
$table
,
$id
);
my
(
$sql
,
@args
);
if
(
$self
->_has_spatial_index) {
$sql
=
"INSERT INTO $table (id,seqid,bin,start,end) values (?,?,?,?,?)"
;
@args
= (
$id
,
$seqid
,
$bin
,
$start
,
$end
);
}
else
{
$sql
=
"INSERT INTO $table (id,seqid,bin,start,end) values (?,?,?,?,?)"
;
@args
= (
$id
,
$seqid
,
$bin
,
$start
,
$end
);
}
my
$sth
=
$self
->_prepare(
$sql
);
$sth
->execute(
@args
);
$sth
->finish;
}
sub
_dump_update_location_index {
my
$self
=
shift
;
my
(
$obj
,
$id
) =
@_
;
my
$table
=
$self
->_feature_index_table;
my
$fh
=
$self
->dump_filehandle(
$table
);
my
$dbh
=
$self
->dbh;
my
(
$seqid
,
$start
,
$end
,
$strand
,
$bin
) =
$self
->_get_location_and_bin(
$obj
);
my
@args
=
$self
->_has_spatial_index ? (
$id
,
$seqid
,
$bin
,
$start
,
$end
)
: (
$id
,
$seqid
,
$bin
,
$start
,
$end
);
print
$fh
join
(
"\t"
,
@args
),
"\n"
;
}
sub
DESTROY {
my
$self
=
shift
;
if
(
%DBI::installed_drh
) {
DBI->disconnect_all;
%DBI::installed_drh
= ();
}
undef
$self
->{dbh};
}
1;