package WWW::Translate::interNOSTRUM; use strict; use warnings; use WWW::Mechanize; use Carp qw(carp croak); our $VERSION = '0.02'; my %lang_pairs = ( 'ca-es' => 'Catalan -> Spanish', # default 'es-ca' => 'Spanish -> Catalan', 'es-va' => 'Spanish -> Catalan with Valencian forms', ); my %output = ( plain_text => 'txtf', # default marked_text => 'txt', ); my %defaults = ( lang_pair => 'ca-es', output => 'plain_text', ); my @fields = ('agent', keys %defaults); sub new { my $class = shift; # validate overrides my %overrides = @_; foreach (keys %overrides) { # Check key; warn if illegal carp "Unknown parameter: $_\n" unless exists $defaults{$_}; # Check value; warn and delete if illegal if ($_ eq 'output' && !exists $output{$overrides{output}}) { carp _message($_, $overrides{$_}); delete $overrides{$_}; } if ($_ eq 'lang_pair' && !exists $lang_pairs{$overrides{lang_pair}}) { carp _message($_, $overrides{$_}); delete $overrides{$_}; } } # Remove invalid parameters my %args = (%defaults, %overrides); $args{agent} = WWW::Mechanize->new(); my %this; @this{@fields} = @args{@fields}; return bless(\%this, $class); } sub translate { my $self = shift; my $string; if (@_ > 0) { $string = shift; } else { croak "Nothing to translate\n"; } return '' if ($string eq ''); # interNOSTRUM url my $url = 'http://www.internostrum.com/welcome.php'; my $mech = $self->{agent}; $mech->get($url); croak $mech->response->status_line unless $mech->success; $mech->field("quadretext", $string); if ($self->{lang_pair} eq 'es-va') { $self->{lang_pair} = 'es-ca'; $mech->tick('valen', 1); } $mech->select("direccio", $self->{lang_pair}); $mech->select("tipus", $output{$self->{output}}); $mech->click(); my $response = $mech->content(); my $translated; if ($response =~ /spelling\.<\/div>\s*<p class="textonormal">(.+?)<\/p>/s) { $translated = $1; } # remove double spaces $translated =~ s/(\S)\s{2}(\S)/$1 $2/g; return $translated; } sub from_into { my $self = shift; if (@_) { my $pair = shift; $self->{lang_pair} = $pair if exists $lang_pairs{$pair}; } else { return $self->{lang_pair}; } } sub output_format { my $self = shift; if (@_) { my $format = shift; $self->{output} = $format if exists $output{$format}; } else { return $self->{output}; } } sub _message { my ($key, $value) = @_; my $string = "Invalid value for parameter $key, $value.\n" . "Will use the default value instead.\n"; return $string; } 1; __END__ =head1 NAME WWW::Translate::interNOSTRUM - Catalan < > Spanish machine translation =head1 VERSION Version 0.02 November 7, 2006 =head1 SYNOPSIS use WWW::Translate::interNOSTRUM; my $engine = WWW::Translate::interNOSTRUM->new(); my $translated_string = $engine->translate($string); # default language pair is Catalan -> Spanish # change to Spanish -> Catalan: $engine->from_into('es-ca'); # check current language pair: my $current_langpair = $engine->from_into(); # default output format is 'plain_text' # change to 'marked_text': $engine->output_format('marked_text'); # check current output format: my $current_format = $engine->output_format(); =head1 DESCRIPTION interNOSTRUM is a Catalan < > Spanish machine translation engine developed by the Department of Software and Computing Systems of the University of Alicante in Spain. This module provides an OO interface to the interNOSTRUM engine web server. interNOSTRUM provides approximate translations of Catalan into Spanish and Spanish into Catalan. It generates both the central variant of Oriental Catalan (the standard variant used in Catalonia) and, optionally, Valencian forms, which follow the recommendations published in the guide L<http://www.ua.es/spv/assessorament/criteris.pdf>. For more information on the Catalan variants, please see the References below. =head1 CONSTRUCTOR =head2 new() Creates and returns a new WWW::Translate::interNOSTRUM object. my $engine = WWW::Translate::interNOSTRUM->new(); WWW::Translate::interNOSTRUM recognizes the following parameters: =over 4 =item * C<< lang_pair >> The valid values of this parameter are: =over 8 =item * C<< es-ca >> Spanish into Standard Catalan =item * C<< es-va >> Spanish into Valencian =item * C<< ca-es >> Standard Catalan or Valencian into Spanish =back Default value: 'ca-es'. =item * C<< output >> The valid values of this parameter are: =over 8 =item * C<< plain_text >> Returns the translation as plain text. =item * C<< marked_text >> Returns the translation with the unknown words marked with an asterisk. =back Default value: 'plain_text' =back The default parameter values can be overridden when creating a new interNOSTRUM engine object: my %options = ( lang_pair => 'es-ca', output => 'marked_text', ); my $engine = WWW::Translate::interNOSTRUM->new(%options); =head1 METHODS =head2 $engine->translate($string) Returns the translation of $string generated by interNOSTRUM. $string must be a string of ANSI text, and can contain up to 16,384 characters. If the encoding of the source text isn't Latin-1, you must convert it to Latin-1 before sending it to the MT engine. You can use the Encode module for this task. =head2 $engine->from_into($lang_pair) Changes the engine language pair to $lang_pair. When called with no argument, it returns the value of the current engine language pair: $current_langpair = $engine->from_into(); =head2 $engine->output_format($format) Changes the engine output format to $format. When called with no argument, it returns the value of the current engine output format: $current_format = $engine->output_format(); =head1 DEPENDENCIES WWW::Mechanize 1.20 or higher. =head1 REFERENCES interNOSTRUM website: L<http://www.internostrum.com/welcome.php> Department of Software and Computing Systems (University of Alicante): L<http://www.dlsi.ua.es/index.cgi?id=eng> For more information on the variants of Catalan, see: L<http://en.wikipedia.org/wiki/Catalan_language> =head1 ACKNOWLEDGEMENTS Many thanks to Mikel Forcada Zubizarreta, coordinator of the interNOSTRUM project, who kindly answered my questions during the development of this module. =head1 AUTHOR Enrique Nell, E<lt>perl_nell@telefonica.netE<gt> =head1 COPYRIGHT AND LICENSE Copyright (C) 2006 by Enrique Nell. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut