#!/usr/bin/env perl
use strictures 1;
use utf8;
use 5.018;
=head1 NAME
Bio::WebService::LANL::SequenceLocator::Server - A JSON web API for LANL's HIV sequence locator
=head1 SYNOPSIS
After installation:
plackup `perldoc -l Bio::WebService::LANL::SequenceLocator::Server`
Or from a git checkout or tarball:
plackup # uses app.psgi
Or as a L<Server::Starter> managed service:
examples/service start
=head1 DESCRIPTION
This server powers
L<a simple, JSON-based web API|https://indra.mullins.microbiol.washington.edu/locate-sequence/>
for the L<LANL's HIV sequence locator|http://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html>
using L<Bio::WebService::LANL::SequenceLocator>.
=head1 ENVIRONMENT
=head2 SERVER_ADMIN
Set the SERVER_ADMIN environment variable before starting the server to provide
a contact address in requests to LANL and server error messages in API
responses.
=head1 INSTALLATION
The prerequisites for this server are optional and are probably not installed
by default on your computer when you install this distribution. From a git
checkout or tarball, you can install the necessary modules with
L<cpanm|App::cpanminus>:
cpanm --with-all-features --installdeps .
=cut
package Bio::WebService::LANL::SequenceLocator::Server;
use Web::Simple;
use Bio::WebService::LANL::SequenceLocator;
use File::Share qw< dist_file >;
use JSON qw< encode_json >;
use Text::CSV;
use Plack::App::File;
use Path::Tiny;
use IO::String;
has contact => (
is => 'ro',
default => sub { $ENV{SERVER_ADMIN} || '[no address provided]' },
);
has locator => (
is => 'ro',
isa => sub {
die "Attribute 'locator' is not a Bio::WebService::LANL::SequenceLocator"
unless $_[0]->isa("Bio::WebService::LANL::SequenceLocator");
},
lazy => 1,
builder => sub {
Bio::WebService::LANL::SequenceLocator->new(
agent_string => join " ", "via", __PACKAGE__, $_[0]->contact
)
},
);
has about_page => (
is => 'ro',
lazy => 1,
builder => sub { dist_file('Bio-WebService-LANL-SequenceLocator', 'about.html') },
);
has formats => (
is => 'ro',
default => sub { [qw( json csv )] },
);
sub dispatch_request {
sub (POST + /within/hiv) {
sub (%base~&format~) {
my ($self, $base, $format) = @_;
$format ||= 'json';
$format = lc $format;
return error(406 => "format '$format' is not supported; try one of " . join(", ", @{$self->formats}))
unless grep { $format eq $_ } @{$self->formats};
sub (%fasta=) {
my ($self, $fasta) = @_;
return $self->locate_sequences_from_fasta($fasta, $base, $format);
},
sub (*fasta=) {
my ($self, $fasta) = @_;
return error(422 => $fasta->reason)
unless $fasta->is_upload;
return $self->locate_sequences_from_fasta(path($fasta->path)->slurp, $base, $format);
},
sub (%@sequence~) {
my ($self, $sequences) = @_;
return $self->locate_sequences($sequences, $base, $format);
},
},
},
sub (GET + /within/hiv) {
error( 405 => "You must make location requests using POST." )
},
sub (GET + /) {
state $about = Plack::App::File->new(file => $_[0]->about_page);
$about;
},
}
sub locate_sequences_from_fasta {
my $self = shift;
my $fasta = shift;
my $sequences = $self->read_fasta(\$fasta)
or return error( 415 => "Couldn't parse FASTA; invalid formating?" );
return $self->locate_sequences($sequences, @_);
}
sub locate_sequences {
my ($self, $sequences, $base, $format) = @_;
return error(422 => 'At least one value for "sequence" is needed.')
unless $sequences and @$sequences;
my $results = $self->locator->find($sequences, base => $base)
or return error(503 => "Backend request to LANL failed, sorry! Contact @{[ $self->contact ]} if the problem persists.");
return $self->format_results($results, $format);
}
sub format_results {
my ($self, $results, $format) = @_;
my $formatter = $self->can("as_$format")
or return error(500 => "Unknown format '$format'");
return $formatter->($self, $results);
}
sub as_json {
my ($self, $results) = @_;
my $json = eval { encode_json($results) };
if ($@ or not $json) {
warn $@ ? "Error encoding JSON response: $@\n"
: "Failed to encode JSON response, but no error?!\n";
return error(500 => "Error encoding results to JSON. Contact @{[ $self->contact ]}");
}
return [
200,
[ 'Content-type' => 'application/json' ],
[ $json, "\n" ],
];
}
sub as_csv {
my ($self, $results) = @_;
my $csv = IO::String->new;
my $write = sub {
state $csv_writer = Text::CSV->new({ binary => 1 });
$csv_writer->print($csv, @_);
$csv->print("\n");
};
my @fields = qw( query_sequence base_type reverse_complement genome_start genome_end
polyprotein start end region_names similarity_to_hxb2 alignment hxb2_sequence );
$write->(\@fields);
for my $query (@$results) {
# Trim leading/trailing whitespace
$query->{alignment} =~ s/^\n//gm;
$query->{alignment} =~ s/^\s*$//gm;
chomp $query->{alignment};
$query->{region_names} = join " ", @{$query->{region_names}};
$write->([ @$query{@fields} ]);
}
$csv->seek(0, 0);
return [
200,
[ 'Content-type' => 'text/csv',
'Content-disposition' => 'inline; filename="located.csv"' ],
$csv,
];
}
sub read_fasta {
my ($self, $fasta) = @_;
# XXX TODO: preserve sequence names and use them in output?
my (@sequences) = map { chomp; $_ }
split /^>.*\R/m, $$fasta;
# Remove any leading garbage before the first description line (usually
# just the empty string)
shift @sequences;
return \@sequences;
}
sub error {
return [
shift,
[ 'Content-type' => 'text/plain' ],
[ join(" ", @_), "\n" ]
];
}
__PACKAGE__->run_if_script;
=head1 AUTHOR
Thomas Sibley E<lt>trsibley@uw.eduE<gt>
=head1 COPYRIGHT
Copyright 2014 by the Mullins Lab, Department of Microbiology, University of
Washington.
=head1 LICENSE
Licensed under the same terms as Perl 5 itself.
=cut