#!/usr/bin/perl -w

use lib qw(.);
use DTA::CAB;
use DTA::CAB::Client::HTTP;
use DTA::CAB::Utils ':all';
use DTA::CAB::Datum ':all';
#use Encode qw(encode decode);
use File::Basename qw(basename);
use Getopt::Long qw(:config no_ignore_case);
use Time::HiRes qw(gettimeofday tv_interval);
use IO::File;
use Pod::Usage;

use strict;

##==============================================================================
## DEBUG
##==============================================================================
#do "storable-debug.pl" if (-f "storable-debug.pl");

##==============================================================================
## Constants & Globals
##==============================================================================

##-- program identity
our $prog = basename($0);
our $VERSION = $DTA::CAB::VERSION;

##-- General Options
our ($help,$man,$version,$verbose);
#$verbose = 'default';

##-- Logging options
$DTA::CAB::Logger::defaultLogOpts{rootLevel}='WARN';
$DTA::CAB::Logger::defaultLogOpts{level}='INFO';

##-- Client Options
our $defaultPort = 9099;
our $defaultPath = '/query';
our $serverURL   = "http://localhost:${defaultPort}${defaultPath}";
our %clientOpts = (
		   timeout=>65535, ##-- wait for a *long* time (65535 = 2**16-1 ~ 18.2 hours)
		   testConnect=>0,
		   mode => 'xpost',
		   post => 'urlencoded',
		   cacheGet=>1,
		   cacheSet=>1,
		  );

##-- Analysis & Action Options
our $analyzer = 'default';
our $action = 'document';
our %analyzeOpts = (
		    headers=>[], ##-- additional HTTP request header+value pairs (e.g. Cache-Control)
		   );
our $doProfile = undef;

##-- I/O Options
our $inputClass  = undef;  ##-- default parser class
our $outputClass = undef;  ##-- default format class
our $outfile     = '-';
our %qfo = (
	    #encoding => 'UTF-8',
	   );
our (%ifo,%ofo, $qfmt,$ifmt,$ofmt);

our $bench_iters = 1; ##-- number of benchmark iterations for -bench mode
our $trace_request_file = undef; ##-- trace request to file?

##==============================================================================
## Command-line
GetOptions(##-- General
	   'help|h'    => \$help,
	   'man'       => \$man,
	   'version|V' => \$version,

	   ##-- Client Options
	   'server-url|serverURL|server|url|s|u=s' => \$serverURL,
	   'timeout|T=i' => \$clientOpts{timeout},
	   'test-connect|tc!' => \$clientOpts{testConnect},
	   'header|H=s%' => sub { push(@{$analyzeOpts{headers}},@_[1,2]); },
	   'cache-get|cg!' => \$clientOpts{cacheGet},
	   'cache-set|cs!' => \$clientOpts{cacheSet},
	   'cache|cc!' => sub { $clientOpts{cacheGet}=$clientOpts{cacheSet}=$_[1] },
	   'get' => sub { $clientOpts{mode}='get'; },
	   'post' => sub { $clientOpts{mode}='post'; },
	   'multipart|multi!' => sub { $clientOpts{post}=$_[1] ? 'multipart' : 'urlencoded'; },
	   'xpost' => sub { $clientOpts{mode}='xpost'; },
	   'xmlrpc' => sub { $clientOpts{mode}='xmlrpc'; },

	   ##-- Analysis Options
	   'analyzer|a=s' => \$analyzer,
	   'analysis-option|analyze-option|ao|O=s' => \%analyzeOpts,
	   'profile|p!' => \$doProfile,
	   'list|l'   => sub { $action='list'; },
	   'token|t|word|w' => sub { $action='token'; },
	   'sentence|S' => sub { $action='sentence'; },
	   'document|d' => sub { $action='document'; },
	   'data|D' => sub { $action='data'; }, ##-- server-side parsing
	   'raw|r' => sub { $action='raw'; },   ##-- string args, server-side tokenization & parsing
	   'rawfile|rf|R' => sub { $action='rawfile'; }, ##-- string args, server-side tokenization & parsing
	   'bench|b:i' => sub { $action='bench'; $bench_iters=$_[1]; },

	   ##-- I/O
	   'query-format-class|query-format|qfmt|qfc|qf|qc=s' => \$qfo{class},
	   'input-format-class|input-format|ifmt|ifc|if|ic=s' => \$ifo{class},
	   'output-format-class|output-format|ofmt|ofc|of|oc=s' => \$ofo{class},
	   'format-class|format|fmt|fc=s' => sub { $qfo{class}=$ifo{class}=$ofo{class}=$_[1]; },
	   ##
	   'query-format-option|query-option|qfo|qo=s' => \%qfo,
	   'input-format-option|input-option|ifo|io=s' => \%ifo,
	   'output-format-option|ofo|oo=s' => \%ofo,
	   'format-option|fo=s%' => sub { $qfo{$_[1]}=$ifo{$_[1]}=$ofo{$_[1]}=$_[2]; },
	   ##
	   #'query-format-encoding|query-encoding|qfe|qe' => \$qfo{encoding},
	   #'input-format-encoding|input-encoding|ife|ie=s' => \$ifo{encoding},
	   #'output-format-encoding|output-encoding|ofe|oe=s' => \$ofo{encoding},
	   #'format-encoding|encoding|enc|fe=s%' => sub { $qfo{encoding}=$ifo{encoding}=$ofo{encoding}=$_[1]; },
	   ##
	   'output-format-level|ofl|format-level|fl|output-level|ol|pretty=s' => \$ofo{level},

	   ##-- I/O: output
	   'format-file|ff|output-file|output|o=s' => \$outfile,

	   ##-- debugging
	   'trace-request|trace|request|tr=s' => \$trace_request_file, ##-- not implemented here

	   ##-- Log4perl
	   DTA::CAB::Logger->cabLogOptions('verbose'=>1),
	  );

if ($version) {
  print cab_version;
  exit(0);
}

pod2usage({-exitval=>0, -verbose=>1}) if ($man);
pod2usage({-exitval=>0, -verbose=>0}) if ($help);


##==============================================================================
## MAIN
##==============================================================================

##-- log4perl initialization
DTA::CAB::Logger->logInit();

##-- sanity checks
$serverURL = "http://$serverURL" if ($serverURL !~ m{[^:]+:.*//});
if (DTA::CAB::Client::HTTP->lwpUrl($serverURL) =~ m{([^:]+://[^/:]*)(/[^:]*)$}) {
  ##-- default port (tcp URLs only)
  $serverURL = "$1:${defaultPort}$2";
}
DTA::CAB::Logger->trace("serverURL=$serverURL");

##-- trace request file?
our $tracefh = undef;
if (defined($trace_request_file)) {
  $tracefh = IO::File->new(">$trace_request_file")
    or die("$0: open failed for trace file '$trace_request_file': $!");
}

##-- create client object
our $cli = DTA::CAB::Client::HTTP->new(%clientOpts,
				       serverURL => $serverURL,
				       #encoding => $qfo{encoding},
				       tracefh=>$tracefh,
				      );
$cli->connect() or die("$0: connect() failed: $!");

##======================================================
## Input & Output Formats

our $isFileAction = ($action =~ m(data|doc|rawfile|bench));

##-- format defaults
foreach my $fo (\%ifo, \%qfo, \%ofo) {
  delete @$fo{grep {!defined($fo->{$_})} keys %$fo};
}
$qfo{level} = $ofo{level} if (defined($ofo{level}) && $action eq 'data');
$ifo{$_} = $qfo{$_} foreach (grep {$_ ne 'class' && !exists($ifo{$_})} keys %qfo);
$ofo{$_} = $ifo{$_} foreach (grep {$_ ne 'class' && !exists($ofo{$_})} keys %ifo);

##-- formats: sanity checks
die("$prog: unknown query format class '$qfo{class}'")
  if (defined($qfo{class}) && !DTA::CAB::Format->newFormat($qfo{class}));
die("$prog: unknown input format class '$ifo{class}'")
  if (defined($ifo{class}) && !DTA::CAB::Format->newFormat($ifo{class}));
die("$prog: unknown output format class '$ofo{class}'")
  if (defined($ofo{class}) && !DTA::CAB::Format->newFormat($ofo{class}));

##-- formats: create
$ifmt = DTA::CAB::Format->newReader(($isFileAction ? (file=>$ARGV[0]) : (class=>$qfo{class})), %ifo)
  or die("$0: could not create input format of class '".($ifo{class}||'undef')."': $!");

$qfmt = DTA::CAB::Format->newReader(%qfo, class=>($qfo{class}||$ifmt->shortName))
  or die("$0: could not create query format of class '".($qfo{class}||'undef')."': $!");

$ofmt = DTA::CAB::Format->newWriter(%ofo, ($outfile ne '-' ? (file=>$outfile) : qw()))
  or die("$0: could not create output format of class '".($ofo{class}||'undef')."': $!");

##-- formats: post-creation sanity checks
if ($action eq 'data') {
  if ($ifmt->shortName ne $qfmt->shortName) {
    warn("$prog: -input-format-class must match -query-format-class in -data mode!");
    warn("$prog: setting -query-format-class=", $ifmt->shortName);
    $qfo{class} = $ifmt->shortName;
    $qfmt = DTA::CAB::Format->newWriter(%qfo)
      or die("$0: could not create query format of class '$qfo{class}': $!");
  }

  if ($ofmt->shortName ne $qfmt->shortName) {
    warn("$prog: -output-format-class must match -query-format-class in -data mode!");
    warn("$prog: setting -output-format-class=", $qfmt->shortName);
    $ofo{class} = $qfmt->shortName;
    $ofmt = DTA::CAB::Format->newWriter(%ofo, ($outfile ne '-' ? (file=>$outfile) : qw()))
      or die("$0: could not create output format of class '$ofo{class}': $!");
  }
}

DTA::CAB->debug("using input format class ", ref($ifmt));
DTA::CAB->debug("using query format class ", ref($qfmt), "(level=", ($qfmt->{level}||0), ")");
DTA::CAB->debug("using output format class ", ref($ofmt), "(level=", ($ofmt->{level}||0), ")");

##-- format-dependent analysis options
%analyzeOpts = (
		%analyzeOpts,
		fmt         => ($qfo{class}||$qfmt->shortName),
		contentType => $qfmt->mimeType,
		#encoding   => $qfmt->{encoding},
		pretty      => $qfmt->{level},
	       );

##-- input file
push(@ARGV,'-') if (!@ARGV && $isFileAction);

##-- output file
our $outfh = IO::File->new(">$outfile")
  or die("$0: open failed for output file '$outfile': $!");

##======================================================
## Profiling

our $ntoks = 0;
our $nchrs = 0;
our $cunit = 'chr';

our @tv_values = qw();
sub profile_start {
  return if (scalar(@tv_values) % 2 != 0); ##-- timer already running
  push(@tv_values,[gettimeofday]);
}
sub profile_stop {
  return if (scalar(@tv_values) % 2 == 0); ##-- timer already stopped
  push(@tv_values,[gettimeofday]);
}
sub profile_elapsed {
  my ($started,$stopped);
  my @values = @tv_values;
  my $elapsed = 0;
  while (@values) {
    ($started,$stopped) = splice(@values,0,2);
    $stopped  = [gettimeofday] if (!defined($stopped));
    $elapsed += tv_interval($started,$stopped);
  }
  return $elapsed;
}

profile_start() if ($doProfile);

##======================================================
## Actions
$ofmt->toFh($outfh);

if ($action eq 'list') {
  ##-- action: list
  my @anames = $cli->analyzers;
  $outfh->print("$0: analyzer list for $serverURL\n", map { "$_\n" } @anames);
}
elsif ($action eq 'token') {
  ##-- action: 'tokens'
  $doProfile = 0;
  my ($tokin,$tokout);
  foreach $tokin (map {DTA::CAB::Utils::deep_decode('utf8',$_)} @ARGV) {
    $tokout = $cli->analyzeToken($analyzer, $tokin, \%analyzeOpts);
    $ofmt->putTokenRaw($tokout);
  }
}
elsif ($action eq 'sentence') {
  ##-- action: 'sentence'
  $doProfile = 0;
  my $s_in  = DTA::CAB::Utils::deep_decode('utf8', toSentence([map {toToken($_)} @ARGV]));
  my $s_out = $cli->analyzeSentence($analyzer, $s_in, \%analyzeOpts);
  $ofmt->putSentenceRaw($s_out);
}
elsif ($action eq 'document') {
  ##-- action: 'document'
  my ($doc_filename,$doc);
  foreach $doc_filename (@ARGV) {
    ##-- parse
    $doc = $ifmt->parseFile($doc_filename)
      or die("$prog: could not parse file '$doc_filename': $!");

    ##-- analyze
    $doc = $cli->analyzeDocument($analyzer, $doc, {%analyzeOpts})
      or die("$prog: analyzeDocument() failed: $!");

    ##-- format
    $ofmt->putDocumentRaw($doc);

    if ($doProfile) {
      profile_stop();
      ##-- count tokens, pausing profile timer
      $nchrs += $doc->nChars;
      $ntoks += $doc->nTokens;
      profile_start();
    }
  }
}
elsif ($action eq 'data' || $action eq 'rawfile') {
  binmode($outfh,':raw');
  $cunit = 'chr';
  $analyzeOpts{qraw} = 1 if ($action eq 'rawfile');

  ##-- action: 'data': do server-side parsing
  my ($s_in,$s_out, $doc_filename);
  push(@ARGV,'-') if (!@ARGV);
  foreach $doc_filename (@ARGV) {
    open(DOC,"<$doc_filename") or die("$0: open failed for input file '$doc_filename': $!");
    {
      local $/=undef;
      $s_in = <DOC>;
      close(DOC);
    }
    $s_out = $cli->analyzeData($analyzer, $s_in, {%analyzeOpts});
    $outfh->print( $s_out );
    if ($doProfile) {
      $nchrs += length($s_in);
      ##-- count tokens, pausing profile timer
      profile_stop();
      $ntoks += $ofmt->parseString($s_out)->nTokens;
      profile_start();
    }
  }
}
elsif ($action eq 'raw') {
  $cunit = 'chr';

  my $s_in  = join(' ', @ARGV);
  #utf8::decode($s_in) if (!utf8::is_utf8($s_in) && $ifmt->{utf8});

  my $s_out = $cli->analyzeData($analyzer, $s_in, {%analyzeOpts,qraw=>1});
  $outfh->print($s_out);

  if ($doProfile) {
    $nchrs += length($s_in);
    ##-- count tokens, pausing profile timer
    profile_stop();
    $ntoks += $ofmt->parseString($s_out)->nTokens;
    profile_start();
  }
}
elsif ($action eq 'bench') {
  $doProfile=1;
  our ($bench_i);
  our ($doc_filename,$d_in,$w_in,$w_out);
  $bench_iters = 1 if (!$bench_iters);
  foreach $doc_filename (@ARGV) {
    $d_in = $ifmt->parseFile($doc_filename)
      or die("$0: parse failed for input file '$doc_filename': $!");
    foreach $bench_i (1..$bench_iters) {
      profile_start();
      foreach $w_in (map {@{$_->{tokens}}} @{$d_in->{body}}) {
	$w_out = $cli->analyzeToken($analyzer, $w_in, \%analyzeOpts);
      }
      profile_stop();
    }
    #$ofmt->putDocumentRaw($d_out);
    if ($doProfile) {
      $ntoks += $bench_iters * $d_in->nTokens();
      $nchrs += $bench_iters * $d_in->nChars();
    }
  }
}
else {
  die("$0: unknown action '$action'");
}
$ofmt->flush();
$cli->disconnect();

##-- profiling
DTA::CAB::Logger->logProfile('info', profile_elapsed, $ntoks, $nchrs) if ($doProfile);
DTA::CAB::Logger->trace("client exiting normally.");

__END__
=pod

=head1 NAME

dta-cab-http-client.perl - Generic HTTP client for DTA::CAB::Server::HTTP queries

=head1 SYNOPSIS

 dta-cab-http-client.perl [OPTIONS...] ARGUMENTS

 General Options:
  -help                           ##-- show short usage summary
  -man                            ##-- show longer help message
  -version                        ##-- show version & exit
  -verbose LEVEL                  ##-- set default log level

 Client Options:
  -server URL                     ##-- set server URL (default: http://localhost:9099)
  -timeout SECONDS                ##-- set server timeout in seconds (default: lots)
  -test-connect , -notest-connect ##-- do/don't send a test request to the server (default: don't)
  -trace FILE                     ##-- trace request(s) sent to the server to FILE
  -header HEADER=VALUE            ##-- set additional HTTP header
  -cache-get , -nocache-get       ##-- enable/disable cached response from server (Cache-Control: no-cache)
  -cache-set , -nocache-set       ##-- enable/disable caching of server response  (Cache-Control: no-store)
  -cache     , -nocache           ##-- alias for -[no]cache-get -[no]cache-set
  -get                            ##-- query server using URL-only GET requests
  -post                           ##-- query server using use content-only POST requests
  -multipart , -nomultipart       ##-- for POST requests, do/don't use 'multipart/form-data' encoding (default=don't)
  -xpost                          ##-- query server using URL+content POST requests (default)
  -xmlrpc                         ##-- query server using XML-RPC requests

 Analysis Options:
  -list                           ##-- just list registered analyzers
  -analyzer NAME                  ##-- set analyzer name (default: 'default')
  -analyze-option OPT=VALUE       ##-- set analysis option (default: none)
  -profile , -noprofile           ##-- do/don't report profiling information (default: don't)
  -token                          ##-- ARGUMENTS are token text
  -sentence                       ##-- ARGUMENTS are analyzed as a sentence
  -document                       ##-- ARGUMENTS are filenames, analyzed as documents (default)
  -data                           ##-- ARGUMENTS are filenames, analyzed as documents with server-side parsing
  -raw                            ##-- ARGUMENTS are strings, analyzed as raw untokenized text
  -rawfile                        ##-- ARGUMENTS are filenames, analyed as raw untokenized text

 I/O Options:
  -(input|query|output)-format-(class|option)
  -format-class CLASS             ##-- set {query,input,output} format classes at once
  -format-option                  ##-- set {query,input,output} format options at once
                                  ##-- for non -data mode, set I/O format options
  -output-format-level LEVEL      ##-- override output format level (default: 0)
  -output-file FILE               ##-- set output file (default: STDOUT)

 Logging Options                  ##-- see Log::Log4perl(3pm)
  -log-level LEVEL                ##-- set minimum log level (default=TRACE)
  -log-stderr , -nolog-stderr     ##-- do/don't log to stderr (default=true)
  -log-syslog , -nolog-syslog     ##-- do/don't log to syslog (default=false)
  -log-file LOGFILE               ##-- log directly to FILE (default=none)
  -log-rotate , -nolog-rotate     ##-- do/don't auto-rotate log files (default=true)
  -log-config L4PFILE             ##-- log4perl config file (overrides -log-stderr, etc.)
  -log-watch  , -nowatch          ##-- do/don't watch log4perl config file (default=false)
  -log-option OPT=VALUE           ##-- set any logging option (e.g. -log-option twlevel=trace)

=cut

##==============================================================================
## Description
##==============================================================================
=pod

=head1 DESCRIPTION

dta-cab-http-client.perl is a command-line client for L<DTA::CAB|DTA::CAB>
analysis of token(s), sentence(s), and/or document(s) by
querying a running L<DTA::CAB::Server::HTTP|DTA::CAB::Server::HTTP> server
with the L<DTA::CAB::Client::HTTP|DTA::CAB::Client::HTTP> module.

See L<dta-cab-http-server.perl(1)|dta-cab-http-server.perl> for a
corresponding server.

=cut

##==============================================================================
## Options and Arguments
##==============================================================================
=pod

=head1 OPTIONS AND ARGUMENTS

=cut

##==============================================================================
## Options: General Options
=pod

=head2 General Options

=over 4

=item -help

Display a short help message and exit.

=item -man

Display a longer help message and exit.

=item -version

Display program and module version information and exit.

=item -verbose

Set default log level (trace|debug|info|warn|error|fatal).

=back

=cut

##==============================================================================
## Options: Client Options
=pod

=head2 Client Options

=over 4

=item -server URL

Set server URL (default: http://localhost:8000).
To query a UNIX-domain CAB-server (L<DTA::CAB::Server::HTTP::UNIX>)
on F</path/to/cab.sock>,
you can specify the URL using either
the L<LWP::Protocol::http::SocketUnixAlt|LWP::Protocol::http::SocketUnixAlt>
or apache mod_proxy sytax; the following are equivalent:

 http:/path/to/cab.sock//uri/path
 unix:/path/to/cab.sock|http:///uri/path
 unix:///path/to/cab.sock|http:///uri/path
 http+unix:/path/to/cab.sock//uri/path
 http+unix:/path/to/cab.sock|/uri/path

=item -timeout SECONDS

Set server timeout in seconds (default: lots).

=item -test-connect , -notest-connect

Do/don't send a test HEAD request to the server (default: do).

=item -trace FILE

If specified, all client requests will be logged to FILE.

=back

=cut


##==============================================================================
## Options: Analysis Options
=pod

=head2 Analysis Options

=over 4

=item -list

Don't actually perform any analysis;
rather,
just print a list of analyzers registered with the server.

=item -analyzer NAME

Request analysis by the analyzer registered under name NAME (default: 'default').

=item -analyze-option OPT=VALUE

Set an arbitrary analysis option C<OPT> to C<VALUE>.
May be multiply specified.

Available options depend on the analyzer class to be called.

=item -profile , -noprofile

Do/don't report profiling information (default: do).

=item -token

Interpret ARGUMENTS as token text.

=item -sentence

Interpret ARGUMENTS as a sentence (list of tokens).

=item -document

Interpret ARGUMENTS as filenames, to be analyzed as documents.
This is the default action.

=item -data

Currently just an alias for -document.

=back

=cut

##==============================================================================
## Options: I/O Options
=pod

=head2 I/O Options

=over 4

=item -format-class CLASS

Select I/O format class B<CLASS>.  Default is TT.
B<CLASS> may be any alias supported by
L<DTA::CAB::Format::newFormat|DTA::CAB::Format/newFormat>.

=item -format-option OPT=VALUE

Set an arbitrary I/O format option.
May be multiply specified.

=item -format-encoding ENCODING

Set I/O encoding; default=UTF-8.

=item -format-level LEVEL

Override output format level (default: 1).

=item -output-file FILE

Set output file (default: STDOUT).

=back

=cut


##======================================================================
## Footer
##======================================================================
=pod

=head1 ACKNOWLEDGEMENTS

Perl by Larry Wall.

RPC::XML by Randy J. Ray.

=head1 AUTHOR

Bryan Jurish E<lt>moocow@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2010-2019 by Bryan Jurish. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.24.1 or,
at your option, any later version of Perl 5 you may have available.

=head1 SEE ALSO

L<dta-cab-analyze.perl(1)|dta-cab-analyze.perl>,
L<dta-cab-convert.perl(1)|dta-cab-convert.perl>,
L<dta-cab-cachegen.perl(1)|dta-cab-cachegen.perl>,
L<dta-cab-http-server.perl(1)|dta-cab-http-server.perl>,
L<dta-cab-http-client.perl(1)|dta-cab-http-client.perl>,
L<dta-cab-xmlrpc-server.perl(1)|dta-cab-xmlrpc-server.perl>,
L<dta-cab-xmlrpc-client.perl(1)|dta-cab-xmlrpc-client.perl>,
L<DTA::CAB(3pm)|DTA::CAB>,
L<RPC::XML(3pm)|RPC::XML>,
L<perl(1)|perl>,
...

=cut