package Lingua::FR::Ladl::Table; use warnings; use strict; use English; use version; our $VERSION = qv('0.0.1'); use Carp; use Readonly; use List::Util qw(max first); use List::MoreUtils qw(all); use Lingua::FR::Ladl::Exceptions; use Lingua::FR::Ladl::Parametrizer; use Lingua::FR::Ladl::Util; use Class::Std; { Readonly my %is_implemented_format => ( xls => \&_load_xls, xml => \&_load_gnumeric_xml ); my %name_of : ATTR( :default('none') :name<name>); my %table_of : ATTR; # hash of hashes to hold the table data in a perl data structure my %maxCol_of : ATTR( :default(0) :name<maxCol>); my %maxRow_of : ATTR( :default(0) :name<maxRow>); my %verbCol_of : ATTR; my %verbs_of : ATTR; my %colTypes_of : ATTR; my %is_tildaRow_of : ATTR; my %headers_of : ATTR; my %parameters_of : ATTR( :set<parameters> :get<parameters> ); # customization parameters ############# Utility subroutines ################################################################# sub _find_out_verbCol { my ($id) = @_; my $name = $name_of{$id}; my $verbCol = first { $table_of{$id}->{0}->{$_} =~ m{\b$name\b} } (keys %{$table_of{$id}->{0}}); if ($verbCol) { $verbCol_of{$id} = $verbCol; return $verbCol_of{$id}; } else { carp "Couldn't find out verb column: no column with column header $name\n"; return; } } sub _check_row { my ($table, $row, $message) = @_; croak "$row must be greater 0\n" unless $row; my $id = ident $table; X::NoTableData->throw( message => $message, table => $table, ) unless $table_of{$id}; croak "$row must be less or equal $maxRow_of{$id}\n" if $row > $maxRow_of{$id}; return; } sub _check_col { my ($table, $col, $message) = @_; croak "$col must be greater 0\n" unless $col; my $id = ident $table; X::NoTableData->throw( message => $message, table => $table, ) unless $table_of{$id}; croak "$col must be less or equal $maxCol_of{$id}\n" if $col > $maxCol_of{$id}; return; } sub _load_xls { my ($file_name) = @_; my $table_ref = {}; eval { use Spreadsheet::ParseExcel; }; if ($EVAL_ERROR) { croak "Can't load table data from excel file $file_name: Spreadsheed::ParseExcel not installed"; } my $excel = Spreadsheet::ParseExcel::Workbook->Parse($file_name) or croak "Error parsing $file_name: Spreadsheet::ParseExcel returned undef\n"; my $sheet = $excel->{Worksheet}->[0]; $sheet->{MaxRow} ||= $sheet->{MinRow}; unless ($sheet->{MaxRow}) { croak "Couldn't load table data: Spreadsheet::ParseExcel returned 0 max row when parsing $file_name\n"; } foreach my $row ($sheet->{MinRow} .. $sheet->{MaxRow}) { $sheet->{MaxCol} ||= $sheet->{MinCol}; foreach my $col ($sheet->{MinCol} .. $sheet->{MaxCol}) { my $cell = $sheet->{Cells}[$row][$col]; if ($cell) { $table_ref->{$row}->{$col} = $cell->{Val}; } } } return ($table_ref, $sheet->{MaxRow}, $sheet->{MaxCol}); } sub _load_gnumeric_xml { my ($file_name) = @_; my $table_ref = {}; eval { use XML::LibXML; }; if ($EVAL_ERROR) { croak "Can't load table data from xml file $file_name: XML::LibXML not installed\n"; } my $parser = XML::LibXML->new(); $parser->keep_blanks(0); my $table_doc = $parser->parse_file($file_name); my @cells = $table_doc->findnodes("//gmr:Cell"); foreach my $cell (@cells) { my $row = $cell->getAttribute('Row'); my $col = $cell->getAttribute('Col'); $table_ref->{$row}->{$col}=$cell->textContent(); } my @maxRowCells = $table_doc->findnodes("//gmr:MaxRow"); my $maxRow = $maxRowCells[0]->textContent(); my @column_nbrs = map { $_->getValue() } $table_doc->findnodes('//gmr:Cell/@Col'); my $maxCol = max @column_nbrs; return ($table_ref, $maxRow, $maxCol); } ############ Methods ################################################################### sub BUILD { my ($self, $id, $arg_ref) = @_; # parametrize with new default parametrizer my $param = Lingua::FR::Ladl::Parametrizer->new(); $parameters_of{$id} = $param; } sub load { my ($self, $arg_ref) = @_; my $format = $arg_ref->{format}; my $file_name = $arg_ref->{file}; my $id = ident $self; unless ($is_implemented_format{$format}) { croak 'Format must be one of '.join(', ', keys %is_implemented_format).", not $format\n"; } =for Rationale: We set a default table name, inferred from the file name. The table name is needed for inferring the verb column which is the header of the verb column. However the table name can be also set manually. The get_verb_column() method won't work if the table name is not set correctly. =cut $self->set_name(_Name::from_file_name($file_name)); ($table_of{$id}, $maxRow_of{$id}, $maxCol_of{$id}) = $is_implemented_format{$format}->($file_name); return; } =for Rationale The header line is line number 0 in the table This sub returns a hash with the headers as keys and the columns as values. =cut sub get_headers { my ($self) = @_; my $id = ident $self; if ($headers_of{$id}) { return $headers_of{$id}; } X::NoTableData->throw( message => "Could not get table headers:\n", table => $self) unless $table_of{$id}; foreach my $col_num (0..$maxCol_of{$id}) { if (exists $table_of{$id}->{0}->{$col_num}) { my $header = $table_of{$id}->{0}->{$col_num}; $headers_of{$id}->{$header} = $col_num; } else { $headers_of{$col_num} = $col_num; } } return $headers_of{$id}; } sub get_value_at { my ($self, $row, $col) = @_; my $id = ident($self); _check_row($self, $row, "Can't get value at $row, $col:"); _check_col($self, $col, "Can't get value at $row, $col:"); return $table_of{$id}->{$row}->{$col}; } sub get_col_for_header { my ($self, $header) = @_; my $id = ident($self); X::NoTableData->throw( message => "Couldn't find out col for header:\n", table => $self ) unless $table_of{$id}; my $col_for_headers = $self->get_headers(); if (exists($col_for_headers->{$header})) { return $col_for_headers->{$header}; } else { return; } } sub get_header_for_col { my ($self, $col) = @_; my $id = ident($self); _check_col($self, $col, "Can't get header for $col:"); my $header = $table_of{$id}->{0}->{$col}; my $empty_string_mark = $self->get_parameters()->get_empty_string_mark(); $header =~ s{$empty_string_mark}{}; return $header; } =for Rationale: we determine the verb column as the header column with the table name this is an empirical observation, it may fail! =cut sub get_verb_column { my ($self) = @_; my $id = ident $self; if ($verbCol_of{$id}) { return $verbCol_of{$id}; } else { X::NoTableData->throw( message => "Couldn't find out verb column:\n", table => $self ) unless $table_of{$id}; return _find_out_verbCol($id); } } sub get_verbs { my ($self) = @_; my $id = ident $self; if ($verbs_of{$id}) { return @{$verbs_of{$id}}; } else { croak "Need table to calculate verbs\n" unless $table_of{$id}; my $verbCol = $verbCol_of{$id} or $self->get_verb_column(); my $maxRow = $maxRow_of{$id} or $self->get_maxRow(); croak "Max row must be greater 0" unless $maxRow; $verbs_of{$id} = [ map { $table_of{$id}->{$_}->{$verbCol} } 1 .. $maxRow ]; return @{$verbs_of{$id}}; } } =for Rationale: the particle column is the column before the verb column =cut sub get_particle_column { my ($self) = @_; my $id = ident $self; my $verbCol = $verbCol_of{$id} or $self->get_verb_column(); return $verbCol - 1; } =for Rationale: the example column is the last column =cut sub get_example_column { my ($self) = @_; my $id = ident $self; if ($maxCol_of{$id}) { return $maxCol_of{$id}; } else { X::NoTableData->throw( message => "Max column is 0!\n", table => $self, ); return; } } =for Rationale: We try to determine the data type of a specific column first we read a sample row (other than 0 which is the header row) if the content of a given column is other than '+' '-' '~' we presume that this column is supposed to contain text data, else the possible values are '+', '-', '~' Returns a hash reference with the column index as keys and either the string `+-~' or the string `text' as values. =cut sub get_column_types { my ($self) = @_; my $id = ident $self; if ($colTypes_of{$id}) { return $colTypes_of{$id}; } else { X::NoTableData->throw( message => "Couldn't calculate column types:\n", table => $self, ) unless $table_of{$id}; while ( my ($col_num, $col_val) = each %{$table_of{$id}->{1}} ) { if ( $col_val =~ m{\A \s* [+-~] \s* \z}xms ) { $colTypes_of{$id}->{$col_num} = '+-~'; } else { $colTypes_of{$id}->{$col_num} = 'text'; } } return $colTypes_of{$id}; } } sub get_column_type_for_col { my ($self, $col) = @_; my $id = ident $self; _check_col($self, $col, "Can't get column type for column $col:"); my $colTypes = $colTypes_of{$id} ? $colTypes_of{$id} : $self->get_column_types(); return $colTypes->{$col}; } =for Rationale: A row of the table is a tilda row when all the columns of `+-~' type contain a `~'. This means, the table doesn't say anything about this verb (usage). The row is identified by number. =cut sub is_tilda_row { my ($self, $row) = @_; my $id = ident $self; if ($is_tildaRow_of{$id}) { return $is_tildaRow_of{$id}; } my $err_msg = "Can't check if tilda row: \n"; _check_row($self, $row, $err_msg); my $col_types = $self->get_column_types(); $is_tildaRow_of{$id} = all { $table_of{$id}->{$row}->{$_} eq '~' } grep { $col_types->{$_} eq '+-~' } keys %{$col_types}; return $is_tildaRow_of{$id}; } sub get_verb_for_row { my ($self, $row) = @_; my $id = ident $self; my $err_msg = "Can't get verb of row:\n"; _check_row($self, $row, $err_msg); return $table_of{$id}->{$row}->{$verbCol_of{$id}}; } sub get_rows_for_verb { my ($self, $verb) = @_; my $id = ident $self; croak "No verb\n" unless $verb; X::NoTableData->throw( message => "Couldn't get rows of verb: $verb:\n", table => $self, ) unless $table_of{$id}; my $verbCol = $verbCol_of{$id}; my @rows = grep { $table_of{$id}->{$_}->{$verbCol} =~ m{\b$verb\b} } 1 .. $maxRow_of{$id}; return (@rows); } =for Rationale: A column may be of type `text' or `+-~'. - A `text' column of a row is set if it's different from the `Empty mark', which in this case is <E>. - A `+-~' column of a row is set if it's `+'. The `empty string mark' can be set via the Parametrizer Object =cut sub is_column_set { my ($self, $row, $col) = @_; _check_row($self, $row, "Can't test if column is set:\n"); my $col_types = $self->get_column_types(); my $empty_mark = $self->get_parameters()->get_empty_string_mark(); my $re = qr($empty_mark); if ($col_types->{$col} eq 'text') { $table_of{ident $self}->{$row}->{$col} =~ m{$re} ? return 0 : return 1; } else { $table_of{ident $self}->{$row}->{$col} =~ m{[+]} ? return 1 : return 0; } } sub has_verb { my ($self, $verb) = @_; my $id = ident($self); croak "Can't check on empty verb\n" unless $verb; my $verbCol = $self->get_verb_column(); return grep { m{\b$verb\b} } map { $table_of{$id}->{$_}->{$verbCol} } 1 .. $maxCol_of{$id}; } sub has_verb_matching { my ($self, $match) = @_; # match is supposed to be a pattern: either a string or a qr// my $id = ident($self); croak "Won't check on empty match\n" unless $match; my $verbCol = $self->get_verb_column(); return grep { m{$match} } map { $table_of{$id}->{$_}->{$verbCol} } 1 .. $maxCol_of{$id}; } sub create_db_table { my ($self, $arg_ref) = @_; my $id = ident($self); X::NoTableData->throw( message => "Can't create DB handle:\n", table => $self, ) unless $table_of{$id}; use DBI; my $dbh=DBI->connect('dbi:AnyData(RaiseError=>1):'); my @column_names; if (not ($arg_ref) or $arg_ref->{col_names} eq 'col_numbers') { @column_names = map { "col_$_" } 0 .. $maxCol_of{$id}; } else { @column_names = map { my $header = $table_of{$id}->{0}->{$_}; if (not $header) { $header = "col_$_"; } $header =~ m{\A \p{Alphabetic} }x ? $header : "col_$header"; } 0 .. $maxCol_of{$id}; my $verbCol = $verbCol_of{$id} or $self->get_verb_column(); $column_names[$verbCol] = 'verb'; $column_names[$verbCol-1] = 'particle'; $column_names[$maxCol_of{$id}] = 'example'; } my $empty_string_mark = $parameters_of{$id}->get_empty_string_mark(); my $db_array_ref = [ [@column_names], map { my $row = $_; [ map { my $col_value = $table_of{$id}->{$row}->{$_}; $col_value =~ s{$empty_string_mark}{}; "$col_value"; } 0 .. $maxCol_of{$id} ]; } 1 .. $maxRow_of{$id} ]; $dbh->func( "table_$name_of{$id}", 'ARRAY', $db_array_ref, 'ad_import'); return $dbh; } } 1; # Magic true value required at end of module __END__ =head1 NAME Lingua::FR::Ladl::Table - An object representing a Ladl Table =head1 VERSION This document describes Lingua::FR::Ladl::Table version 0.0.1 =head1 SYNOPSIS use Lingua::FR::Ladl::Table; my $table = Lingua::FR::Ladl::Table->new({ name => $table_ref->{name} }); # load table data from an excel file: $table->load({ format => 'xls', file => 't/1.xls' }); # load table data from a gnumeric xml file: $table->load({ format => 'xml', file => 't/1.xml' }); $table->set_name('1'); my $name = $table->get_name(); my $verbCol = $table->get_verb_column(); # which column contains the verb my $col = $table->get_col_for_header('aux =: avoir'); # which column's header is 'aux =: avoir'? my $header = $table->get_header_for_col(4); # what is the column header of column 4? my $dbh = $table->create_db_table( { col_names => 'col_numbers' } ); # get a db handle with column numbers as column names # Query the table using SQL::Statement: for which verbs is column 8 empty and column 19 = '+'? my $query = "SELECT col_$verbCol FROM table_$name where col_8 = NULL AND col_19 = '+'"; my $sth = $dbh->prepare($query); $sth->execute(); =head1 DESCRIPTION This module provides a data structure representing a Ladl table. The Ladl tables are the digitized representation of Maurice Gross's B<Grammar Lexicon>, a very large scale, high precision, French linguistic resource, developed over several years by a group of skilled linguists and according to well defined linguistic criteria. The grammar lexicon describes syntactic and semantic properties of French (basic) sentences. A B<table> gathers together predicative items (verbs in this case) with comparable syntactico-semantic behaviour. In a table, B<columns> further specify the syntactico-semantic properties of each verb in that table. =head2 Example =begin HTML <TABLE border=1> <CAPTION>1</CAPTION> <TR> <TD valign="bottom" align="left" >N0 =: Nhum</TD> <TD COLSPAN=2 valign="bottom" align="left" >N0 =: Nnc</TD> <TD valign="middle" align="center" >1</TD> <TD valign="bottom" align="left" >aux =: avoir</TD> <TD valign="bottom" align="left" >aux =: être</TD> <TD valign="bottom" align="left" >N0 est Upp W</TD> <TD COLSPAN=2 valign="bottom" align="left" >N0 U</TD> <TD valign="bottom" align="left" >N1 =: Qu P</TD> <TD valign="bottom" align="left" >N1 =: Qu Psubj</TD> <TD valign="bottom" align="left" >Tp = Tc</TD> <TD valign="bottom" align="left" >Tc =: passé</TD> <TD valign="bottom" align="left" >Tc =: présent</TD> <TD valign="bottom" align="left" >Tc =: futur</TD> <TD valign="bottom" align="left" >Vc =: devoir</TD> <TD valign="bottom" align="left" >Vc =: pouvoir</TD> <TD valign="bottom" align="left" >Vc =: savoir</TD> <TD valign="bottom" align="left" >V-inf0 W = Ppv</TD> <TD valign="bottom" align="left" >N0 U Prép N1</TD> <TD valign="bottom" align="left" >N0 U Prép Nhum</TD> <TD valign="bottom" align="left" >N0 U Prép N-hum</TD> <TD valign="bottom" align="left" >Prép N1 = Ppv</TD> <TD valign="bottom" align="left" >N0 U dans N1</TD> <TD valign="bottom" align="left" >N0 U N1</TD> <TD valign="bottom" align="left" >N0 U Nhum</TD> <TD COLSPAN=2 valign="bottom" align="left" >N0 U N-hum</TD> </TR> <TR> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" ><E></TD> <TD valign="bottom" align="left" >achever</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >de</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >Max achève de peindre le mur</TD> </TR> <TR> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" ><E></TD> <TD valign="bottom" align="left" >aller</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" ><E></TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >Max va partir</TD> </TR> <TR> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" ><E></TD> <TD valign="bottom" align="left" >aller</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >jusqu'à</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >La pluie va tomber</TD> </TR> <TR> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >ne</TD> <TD valign="bottom" align="left" >aller Nég</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >sans</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >+</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >-</TD> <TD valign="bottom" align="left" >Cette mesure n'ira pas sans créer des troubles</TD> </TR> </TABLE> =end HTML The tables are available as a set of excel spreadsheets from L<http://ladl.univ-mlv.fr/> This module represents a table as a Ladl::Table object and allows to investigate and query: =over =item what verbs belong to the table. =item what are the headers of the table? =item which column corresponds to which header. =item which verb corresponds to which row(s). =item what is the value of a column in a given row. =item what is the value in a given row for a given header. =back It is also possible to formulate more complex queries using the SQL dialect implemented in SQL::Statement (see L<SQL::Statement>). =head1 INTERFACE =head2 Methods For all following methods, column and row numbering starts at 0. =over =item new - build a new Table object my $table = Lingua::FR::Ladl::Table->new({ name => 'test_table' }); There's one optional initial argument: I<name>, the table's name =item set/get_name =item load $table->load( {format => 'xls', file=>'file_name'} ) $table->load( {format => 'xml', file=>'file_name'} ) Load table data from a file in the given format. Format may be one of: =over =item xls - file is an excel file in this case Spreadsheet::Parser is used to parse the file. =item xml - an xml file in gnumeric xml format the file is parsed using XML::LibXML =back The file name is also set to a value inferred from the file name by removing the suffix. The file name is important because the B<get_verb_column> method relies on the correct file name. =item get/set_maxCol, get/set_maxRow get/set maximum row or column value =item get_headers Return a hash with the column headers as keys and the corresponding column numbers as values. When there's no header, I<col_>column number is used as key. =item get_value_at($row, $col) =item get_col_for_header( $header ) return column number for a given column header, undef if $header doesn't match. For the table in the example: $table->get_col_for_header('aux =: avoir') returns 4 =item get_header_for_col($col) return the header for a given column. Example: $table->get_header_for_col(4) returns 'aux =: avoir' =item get_verb_column return the column (by number) containing the verb. The verb column is assumed to be the column the header of which is equal to the table name. For the table in the example $table->get_verb_column(); returns 3 =item get_verbs return the list of verbs of the table (as an array). =item get_particle_column The particle column contains entries as I<ne>, I<n'>, I<se>, I<s'>, occuring in front of the verb. We assume it to be the column right before the verb column. Example: $table->get_particle_column() returns 2 =item get_example_column The example column contains example phrases with the verb of the row. We assume it's the last column of the table. For the table in the example above: my $col = $table->get_example_column() would set I<$col> to I<28> =item get_column_types Columns may either contain text or one of '+', '-' and '~'. The method returns a reference to a hash with the column numbers as keys and assigning to the columns either 'text' if they have text content or else '+-~'. For the table in the example: $table->get_column_types() returns the hash: { '0' => '+-~', '1' => '+-~', '2' => 'text', '3' => 'text', '4' => '+-~', '5' => '+-~' '6' => '+-~', '7' => '+-~', '8' => 'text', '9' => '+-~', '10' => '+-~', '11' => '+-~', '12' => '+-~', '13' => '+-~', '14' => '+-~', '15' => '+-~', '16' => '+-~', '17' => '+-~', '18' => '+-~', '19' => '+-~', '20' => '+-~', '21' => '+-~', '22' => '+-~', '23' => '+-~', '24' => '+-~', '25' => '+-~', '26' => '+-~', '27' => 'text', }; =item get_column_type_for_col Return the column type for a given column. The column type is either I<+-~> if the columns contains only one of `+', `-' or `~', or I<text> if the column contains some other text content. Throws an exception when column is inexistant. Example: $table->get_column_type_for_col(2) returns `text'; =item is_tilda_row($row) A row is a tilda row if all the columns of type '+-~' are '~' - i.e. they contain no specific information about this verb. =item get_verb_for_row($row) Return the verb for a given row. For the the table in the example above: my $verb = $table->get_verb_for_row(3) returns `aller' =item get_rows_for_verb($verb) Returns the rows the verb occurs in (there may be more than 1). Example: my @rows = $table->get_rows_for_verb('devoir'); @rows is (27, 28, 29) =item is_column_set($row, $col) A column may be of type I<text> or I<+-~>. =over =item a I<text> column of a row is set if it's different from the `Empty mark', which by default is <E>. =item a I<+-~> column of a row is set if it's I<+>. =back The I<empty_string_mark> can be set via the L</Parameters> accessors. =item has_verb($verb) Returns I<true> if the verb is contained in the table. =item has_verb_matching($regexp) Returns I<true> if a verb of the table matches $regexp. =item create_db_table( { col_names => 'col_numbers' } ) Provides a DB interface using DBI and returns a db handle to an in-memory table created using DBD::AnyData. The table name is I<table_>$table_name. The column names are either =over =item I<col_>$col_numbers, when the argument is { col_names => 'col_numbers' } (the default). =item The column headers when another argument is given. When the header is empty col_I<column_number> is used. =back Example: # get a db handle with columns named col_<column number> # default: { col_names => 'col_numbers' } my $dbh = $table->create_db_table(); # get a db handle using the column headers as column names $dbh = $table->create_db_table( { col_names => 'headers' } ); Once you have a db handle you can start querying the table using SQL::Statement (see L<SQL::Statements> and L<DBD::AnyData> for which SQL statements are supported). Example: my $query = "SELECT col_$verbCol FROM table_$name where col_8 = NULL AND col_19 = '+'"; my $sth = $dbh->prepare($query); $sth->execute(); Note: The empty string marks (`<E>' by default) are replaced by empty strings, equivalent to NULL. =back =head2 Parameters The class is parametrized by a Parametrizer (see L<Lingua::FR::Ladl::Parametrizer>) object, which can be accessed by the get/set_parameters method. A parametrizer object provides accessors for its customization items. Currently the most important item is the I<empty_string_mark> which defaults to `<E>'. You could change the I<empty_string_mark> like so: my $par_object = $table->get_parameters(); $par_object->set_empty_string_mark('EMPTY'); $table->set_parameters($par_object); =head1 DIAGNOSTICS =over =item C<< Format must be one of xls, xml not $format >> Exception thrown when trying to load the table from a format that is not supported currently. The only supported formats are: =over =item xls excel table =item xml gnumeric xml format =back =item C<< Could not create file parser context for file "unknown" >> Thrown by LibXML, the xml parser: Ladl::Table wants to load table data by parsing an xml file, but the xml parser throws an exception. Maybe the file is not accessible? =item C<< Couldn't load table data: error parsing file >> Ladl::Table wants to load table data by parsing an excel file, but Spreadsheet::ParseExcel returned invalid data. Maybe the file is not accessible? =item C<< Need table data for table_name, maybe you should call the load method first? >> Most methods only work and make sense if table data is loaded. =item C<< col/row must be less or equal max_row/max_col >> Method was called with an invalid row/column respectively. =back =head1 CONFIGURATION AND ENVIRONMENT Lingua::FR::Ladl::Table requires no configuration files or environment variables. =head1 DEPENDENCIES =over =item L<Class::Std> =item L<Readonly> =item L<List::Util> =item L<List::MoreUtils> =item L<XML::LibXML> if you want to load table data from a gnumeric XML file. =item L<Spreadsheet::ParseExcel> if you want to load table data from an excel file. =item L<DBI> and L<DBD::AnyData> if you want to use a DB interface. =back =head1 INCOMPATIBILITIES =for author to fill in: A list of any modules that this module cannot be used in conjunction with. This may be due to name conflicts in the interface, or competition for system or program resources, or due to internal limitations of Perl (for example, many modules that use source code filters are mutually incompatible). None reported. =head1 BUGS AND LIMITATIONS =for author to fill in: A list of known problems with the module, together with some indication Whether they are likely to be fixed in an upcoming release. Also a list of restrictions on the features the module does provide: data types that cannot be handled, performance issues and the circumstances in which they may arise, practical limitations on the size of data sets, special cases that are not (yet) handled, etc. No bugs have been reported. Please report any bugs or feature requests to C<bug-lingua-fr-ladl-table@rt.cpan.org>, or through the web interface at L<http://rt.cpan.org>. =head1 SEE ALSO L<http://ladl.univ-mlv.fr/>, where the Ladl tables have been developed and where they can be obtained. Some publications on this project: =over =item B<Maurice Gross' grammar lexicon and Natural Language Processing> by Claire Gardent, Bruno Guillaume, Guy Perrier, Ingrid Falk L<http://hal.archives-ouvertes.fr/action/open_file.php?url=http://hal.archives-ouvertes.fr/docs/00/10/31/56/PDF/poznan05.pdf&docid=103156> =item B<Extracting subcategorisation information from Maurice Gross' grammar lexicon>, by Claire Gardent, Bruno Guillaume, Guy Perrier, Ingrid Falk in Archives of Control Sciences (2005) 289--300 =item A talk at the French Perl Workshop 2006 (in French ;-) L<http://conferences.mongueurs.net/fpw2006/slides/lexique-syntaxique.pdf> =back =head1 AUTHOR Ingrid Falk C<< <ingrid dot falk at loria dot fr> >> =head1 LICENCE AND COPYRIGHT Copyright (c) 2007, Ingrid Falk C<< <ingrid dot falk at loria dot fr> >>. All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See L<perlartistic>. =head1 DISCLAIMER OF WARRANTY BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR, OR CORRECTION. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.