package DBIx::TextIndex::DBD; use strict; use warnings; our $VERSION = '0.26'; use base qw(DBIx::TextIndex); my @FIELDS = qw( all_docs_vector_table collection_fields collection_table doc_fields doc_id_field doc_table docweights_table mask_table max_word_length doc_key_sql_type doc_key_length ); BEGIN { DBIx::TextIndex::create_accessors(\@FIELDS); } sub new { my $pkg = shift; my $class = ref($pkg) || $pkg; my $self = bless {}, $class; my $args = shift; $self->set($args) if $args; return $self; } sub drop_table { my $self = shift; my $table = shift; if ($self->table_exists($table)) { $self->{INDEX_DBH}->do("DROP TABLE $table"); return 1; } else { return 0; } } sub table_exists { my $self = shift; my $table = shift; my @tables = $self->{INDEX_DBH}->tables(undef, undef, $table, 'table'); for (@tables) { return 1 if m/^.*\.?[\"\`]?$table[\"\`]?$/; } return 0; } sub delete_mask { my $self = shift; return <<END; DELETE FROM $self->{MASK_TABLE} WHERE mask = ? END } sub add_mask { my $self = shift; my ($mask, $vector_enum) = @_; my $sql = <<END; REPLACE INTO $self->{MASK_TABLE} (mask, docs_vector) VALUES (?, ?) END $self->{INDEX_DBH}->do($sql, undef, $mask, $vector_enum); } sub create_collection_table { my $self = shift; my $collection_length = DBIx::TextIndex::COLLECTION_NAME_MAX_LENGTH; return <<END; CREATE TABLE collection ( collection varchar($collection_length) NOT NULL default '', version decimal(10,2) NOT NULL default '0.00', max_indexed_id int(10) unsigned NOT NULL default '0', doc_table varchar(30), doc_id_field varchar(30), doc_fields varchar(250) NOT NULL default '', charset varchar(50) NOT NULL default '', stoplist varchar(255) NOT NULL default '', proximity_index enum('0', '1') NOT NULL default '0', error_empty_query varchar(255) NOT NULL default '', error_quote_count varchar(255) NOT NULL default '', error_no_results varchar(255) NOT NULL default '', error_no_results_stop varchar(255) NOT NULL default '', error_wildcard_length varchar(255) NOT NULL default '', error_wildcard_expansion varchar(255) NOT NULL default '', max_word_length int(10) unsigned NOT NULL default '0', result_threshold int(10) unsigned NOT NULL default '0', phrase_threshold int(10) unsigned NOT NULL default '0', min_wildcard_length int(10) unsigned NOT NULL default '0', max_wildcard_term_expansion int(10) unsigned NOT NULL default '0', decode_html_entities enum('0', '1') NOT NULL default '0', scoring_method varchar(20) NOT NULL default '', update_commit_interval int(10) unsigned NOT NULL default '0', PRIMARY KEY collection_key (collection) ) END } sub insert_collection_table_row { my $self = shift; my $row = shift; my @fields; my @values; while (my ($field, $value) = each %$row) { push @fields, $field; push @values, $value; } my $collection_fields = join ', ', @fields; my $place_holders = join ', ', (('?') x ($#fields + 1)); my $sql = <<END; INSERT INTO $self->{COLLECTION_TABLE} ($collection_fields) VALUES ($place_holders) END $self->{INDEX_DBH}->do($sql, undef, @values); } sub fetch_doc_id { my $self = shift; my $doc_key = shift; my $sql = <<END; SELECT doc_id FROM $self->{DOC_KEY_TABLE} WHERE doc_key = ? END my ($doc_id) = $self->{INDEX_DBH}->selectrow_array($sql, undef, $doc_key); return $doc_id ? $doc_id : undef; } sub fetch_doc_ids { my $self = shift; my $doc_keys = shift; my $placeholders = join(',', (('?') x ($#$doc_keys +1))); my $sql = <<END; SELECT doc_id FROM $self->{DOC_KEY_TABLE} WHERE doc_key in ($placeholders) order by doc_id END my $doc_ids = $self->{INDEX_DBH}->selectcol_arrayref($sql, undef, @$doc_keys); return $doc_ids; } sub fetch_doc_keys { my $self = shift; my $doc_ids = shift; my $placeholders = join(',', (('?') x ($#$doc_ids +1))); my $sql = <<END; SELECT doc_key FROM $self->{DOC_KEY_TABLE} WHERE doc_id in ($placeholders) order by doc_id END my $doc_keys = $self->{INDEX_DBH}->selectcol_arrayref($sql, undef, @$doc_ids); return $doc_keys; } sub delete_doc_key_doc_ids { my $self = shift; my $doc_ids = shift; my $placeholders = join(',', (('?') x ($#$doc_ids +1))); my $sql = <<END; DELETE FROM $self->{DOC_KEY_TABLE} WHERE doc_id in ($placeholders) END $self->{INDEX_DBH}->do($sql, undef, @$doc_ids); } sub insert_doc_key { my $self = shift; my $doc_key = shift; my $sql = <<END; INSERT INTO $self->{DOC_KEY_TABLE} (doc_key) VALUES (?) END $self->{INDEX_DBH}->do($sql, undef, $doc_key); my $doc_id = $self->{INDEX_DBH}->last_insert_id(undef, undef, $self->{DOC_KEY_TABLE}, undef); return $doc_id; } sub fetch_max_indexed_id { my $self = shift; return <<END; SELECT max_indexed_id FROM $self->{COLLECTION_TABLE} WHERE collection = ? END } sub fetch_collection_version { my $self = shift; return <<END; SELECT MAX(version) FROM $self->{COLLECTION_TABLE} END } sub collection_count { my $self = shift; return <<END; SELECT COUNT(*) FROM $self->{COLLECTION_TABLE} END } sub update_collection_info { my $self = shift; my $field = shift; return <<END; UPDATE $self->{COLLECTION_TABLE} SET $field = ? WHERE collection = ? END } sub delete_collection_info { my $self = shift; return <<END; DELETE FROM $self->{COLLECTION_TABLE} WHERE collection = ? END } sub store_collection_info { my $self = shift; my @collection_fields = @{$self->{COLLECTION_FIELDS}}; my $collection_fields = join ', ', @collection_fields; my $place_holders = join ', ', (('?') x ($#collection_fields + 1)); return <<END; INSERT INTO $self->{COLLECTION_TABLE} ($collection_fields) VALUES ($place_holders) END } sub fetch_collection_info { my $self = shift; my $collection_fields = join ', ', @{$self->{COLLECTION_FIELDS}}; return <<END; SELECT $collection_fields FROM $self->{COLLECTION_TABLE} WHERE collection = ? END } sub fetch_all_collection_rows { my $self = shift; return <<END; SELECT * FROM $self->{COLLECTION_TABLE} END } sub phrase_scan_cz { my $self = shift; my $result_docs = shift; my $fno = shift; return <<END; SELECT $self->{DOC_ID_FIELD}, $self->{DOC_FIELDS}->[$fno] FROM $self->{DOC_TABLE} WHERE $self->{DOC_ID_FIELD} IN ($result_docs) END } sub phrase_scan { my $self = shift; my $result_docs = shift; my $fno = shift; return <<END; SELECT $self->{DOC_ID_FIELD} FROM $self->{DOC_TABLE} WHERE $self->{DOC_ID_FIELD} IN ($result_docs) AND $self->{DOC_FIELDS}->[$fno] LIKE ? END } sub fetch_docweights { my $self = shift; my $fields = shift; return <<END; SELECT field_no, avg_docweight, docweights FROM $self->{DOCWEIGHTS_TABLE} WHERE field_no in ($fields) END } sub fetch_all_docs_vector { my $self = shift; return <<END; SELECT all_docs_vector FROM $self->{ALL_DOCS_VECTOR_TABLE} END } sub update_all_docs_vector { my $self = shift; return <<END; REPLACE INTO $self->{ALL_DOCS_VECTOR_TABLE} (id, all_docs_vector) VALUES (1, ?) END } sub fetch_mask { my $self = shift; return <<END; SELECT docs_vector FROM $self->{MASK_TABLE} WHERE mask = ? END } sub fetch_term_pos { my $self = shift; my $table = shift; return <<END; SELECT term_pos FROM $table WHERE term = ? END } sub fetch_term_docs { my $self = shift; my $table = shift; return <<END; SELECT term_docs FROM $table WHERE term = ? END } sub fetch_term_freq_and_docs { my $self = shift; my $table = shift; return <<END; select docfreq_t, term_docs from $table where term = ? END } sub fetch_terms { my $self = shift; my $table = shift; return <<END; SELECT term FROM $table WHERE term LIKE ? END } sub ping_doc { my $self = shift; return <<END; SELECT 1 FROM $self->{DOC_TABLE} WHERE $self->{DOC_ID_FIELD} = ? END } sub fetch_doc { my $self = shift; my $field = shift; return <<END; SELECT $field FROM $self->{DOC_TABLE} WHERE $self->{DOC_ID_FIELD} = ? END } sub fetch_doc_all_fields { my $self = shift; my $fields = join(', ', @{$self->{DOC_FIELDS}}); return <<END; SELECT $fields FROM $self->{DOC_TABLE} WHERE $self->{DOC_ID_FIELD} = ? END } sub update_docweights { my $self = shift; return <<END; REPLACE INTO $self->{DOCWEIGHTS_TABLE} (field_no, avg_docweight, docweights) VALUES (?, ?, ?) END } sub update_docweights_execute { my $self = shift; my ($sth, $fno, $avg_w_d, $packed_w_d) = @_; $sth->execute($fno, $avg_w_d, $packed_w_d); } sub inverted_replace { my $self = shift; my $table = shift; return <<END; REPLACE INTO $table (term, docfreq_t, term_docs, term_pos) VALUES (?, ?, ?, ?) END } sub fetch_delete_queue { my $self = shift; my ($delete_queue) = $self->{INDEX_DBH}->selectrow_array(<<END, undef, 1); SELECT delete_queue FROM $self->{DELETE_QUEUE_TABLE} WHERE ID = ? END return $delete_queue ? $delete_queue : undef; } sub update_delete_queue { my $self = shift; my $delete_queue = shift; $self->{INDEX_DBH}->do(<<END, undef, $delete_queue, 1); REPLACE INTO $self->{DELETE_QUEUE_TABLE} (delete_queue, id) VALUES (?, ?) END } sub inverted_replace_execute { my $self = shift; my ($sth, $term, $docfreq_t, $term_docs, $term_pos) = @_; $sth->execute( $term, $docfreq_t, $term_docs, $term_pos, ) or warn $self->{INDEX_DBH}->err; } sub inverted_select { my $self = shift; my $table = shift; return <<END; SELECT docfreq_t, term_docs, term_pos FROM $table WHERE term = ? END } sub total_terms { my $self = shift; my $table = shift; return <<END; SELECT SUM(docfreq_t) FROM $table END } sub create_mask_table { my $self = shift; return <<END; CREATE TABLE $self->{MASK_TABLE} ( mask varchar(100) NOT NULL, docs_vector mediumblob NOT NULL, primary key mask_key (mask) ) END } sub create_docweights_table { my $self = shift; return <<END; CREATE TABLE $self->{DOCWEIGHTS_TABLE} ( field_no smallint unsigned NOT NULL, avg_docweight float NOT NULL, docweights mediumblob NOT NULL, primary key field_no_key (field_no) ) END } sub create_all_docs_vector_table { my $self = shift; return <<END; CREATE TABLE $self->{ALL_DOCS_VECTOR_TABLE} ( id INT UNSIGNED NOT NULL, all_docs_vector MEDIUMBLOB NOT NULL, UNIQUE KEY id_key (id) ) END } sub create_delete_queue_table { my $self = shift; return <<END; CREATE TABLE $self->{DELETE_QUEUE_TABLE} ( id INT UNSIGNED NOT NULL, delete_queue MEDIUMBLOB NOT NULL, UNIQUE KEY id_key (id) ) END } sub create_inverted_table { my $self = shift; my $table = shift; my $max_word = $self->{MAX_WORD_LENGTH}; return <<END; CREATE TABLE $table ( term varchar($max_word) NOT NULL, docfreq_t int unsigned NOT NULL, term_docs mediumblob NOT NULL, term_pos longblob NOT NULL, PRIMARY KEY term_key (term) ) END } sub create_doc_key_table { my $self = shift; my $doc_key_sql_type = $self->{DOC_KEY_SQL_TYPE}; if (lc($doc_key_sql_type) eq 'int') { $doc_key_sql_type .= ' unsigned'; } else { $doc_key_sql_type .= "($self->{DOC_KEY_LENGTH})" if $self->{DOC_KEY_LENGTH}; } return <<END; CREATE TABLE $self->{DOC_KEY_TABLE} ( doc_id bigint unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY, doc_key $doc_key_sql_type NOT NULL, UNIQUE doc_key_key (doc_key) ) END } sub drop_doc_key_table { my $self = shift; $self->drop_table($self->{DOC_KEY_TABLE}); } 1; __END__ =head1 NAME DBIx::TextIndex::DBD - Base class for database-specific SQL drivers =head1 SYNOPSIS Not for direct use, clients use L<DBIx::TextIndex>. =head1 DESCRIPTION This module is a base class for creating database drivers that encapsulate SQL calls specific to a given database. =head2 Restricted Methods =over =item C<add_mask> =item C<collection_count> =item C<create_all_docs_vector_table> =item C<create_collection_table> =item C<create_delete_queue_table> =item C<create_doc_key_table> =item C<create_docweights_table> =item C<create_inverted_table> =item C<create_mask_table> =item C<delete_collection_info> =item C<delete_doc_key_doc_ids> =item C<delete_mask> =item C<drop_doc_key_table> =item C<drop_table> =item C<fetch_all_collection_rows> =item C<fetch_all_docs_vector> =item C<fetch_collection_info> =item C<fetch_collection_version> =item C<fetch_delete_queue> =item C<fetch_doc> =item C<fetch_doc_all_fields> =item C<fetch_doc_id> =item C<fetch_doc_ids> =item C<fetch_doc_keys> =item C<fetch_docweights> =item C<fetch_mask> =item C<fetch_max_indexed_id> =item C<fetch_term_docs> =item C<fetch_term_freq_and_docs> =item C<fetch_term_pos> =item C<fetch_terms> =item C<insert_collection_table_row> =item C<insert_doc_key> =item C<inverted_replace> =item C<inverted_replace_execute> =item C<inverted_select> =item C<new> =item C<phrase_scan> =item C<phrase_scan_cz> =item C<ping_doc> =item C<store_collection_info> =item C<table_exists> =item C<total_terms> =item C<update_all_docs_vector> =item C<update_collection_info> =item C<update_delete_queue> =item C<update_docweights> =item C<update_docweights_execute> =back =head1 SEE ALSO L<DBIx::TextIndex> =head1 AUTHOR Daniel Koch <dkoch@cpan.org> =head1 COPYRIGHT Copyright (c) 2007 Daniel Koch. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut