#!/usr/bin/perl -w

use lib qw(. ./ddc-perl ./DDC-perl);
use DDC::Concordance;
use Encode qw(encode decode);
use Getopt::Long qw(:config no_ignore_case);
use Pod::Usage;
use File::Basename qw(basename dirname);

use strict;

##------------------------------------------------------------------------------
## Constants & Globals
##------------------------------------------------------------------------------
our ($help,$version);
our $prog = basename($0);
our $corpora = undef;
our $fmt_class = 'DDC::Format::Kwic';
our $fieldNamesStr = '';
our $qencoding = undef;
our $query_from_file = 0;
our $query_sleep = 0; ##-- pause inbetween multiple queries
our $query_verbose = 0; ##-- trace queries?
our $query_multi = 0; ##-- multi-query mode?

our $bench_iters = undef;
our $bench_seconds = undef;
our $bench_clear_cache = 0; ##-- not working at server log-level<=info 2020-02-10
our $expandChain = undef;

our %fmt = (
	    columns => 80,
	    width   => 32,
	    level   => 1,
	    vars    => {}, ##-- for template formatting
	   );

our %client = (
	       connect=>{Domain=>'INET',PeerAddr=>"localhost",PeerPort=>50011},
	       start=>0,
	       limit=>10,
	       hint=>'',
	       timeout=>60,
	       mode=>'json',
	       encoding=>'UTF-8',
	       parseMeta=>1,
	       parseContext=>1,
	       keepRaw=>0,
	       fieldNames=>undef,
	       fieldSeparator=>"\x{1f}",
	       tokenSeparator=>"\x{1e}",
	       #nFields=>7,
	       dropFields => [],
	       expandFields => 1,
	      );

##------------------------------------------------------------------------------
## Command-line
##------------------------------------------------------------------------------
sub setFormatSub {
  my $base = shift;
  my $class = "DDC::Format::$base";
  my $file  = $class;
  $file =~ s/::/\//g;
  $file .= ".pm";
  return sub {
    require $file or die("$prog: could not load class '$class': $!");
    $fmt_class=$class;
  }
}

GetOptions(##-- General
	   'help|h' => \$help,
	   'version|V' => \$version,

	   ##-- Connection
	   'server|s=s' => \$client{connect}{PeerAddr},
	   'port|p=s'   => \$client{connect}{PeerPort},
	   'unix|u=s'   => sub { %{$client{connect}} = (Domain=>'UNIX',Peer=>$_[1]) },
           'url|U=s'    => \$client{connect}{url},
	   'corpora|corpus|c=s' => \$corpora,
	   'opt-file|opt|O=s' => \$client{optFile},
	   'mode|m=s' => \$client{mode},
	   'query-encoding|qencoding|qe=s' => \$qencoding,

	   ##-- Benchmarking and testing
	   'benchmark-iterations|benchmark-iters|bench-iters|bibenchmark|bench|b|iters|i=i' => \$bench_iters,
	   'benchmark-seconds|bench-seconds|bs=i' => \$bench_seconds,
	   'bench-clear-cache|bench-clear|bc!' => \$bench_clear_cache,
	   'query-file|file|qf!' => \$query_from_file,
	   'query-sleep|sleep|qs=i' => \$query_sleep,
	   'query-verbose|qv|verbose|query-trace|qt|trace!' => \$query_verbose,

	   ##-- Hit selection
	   'start|S=i' => \$client{start},
	   'limit|l=i' => \$client{limit},
	   'timeout|t=i' => \$client{timeout},
	   'hint|H=s' => \$client{hint},
	   'expand-pipeline|xpipe|pipe|xp|expand-chain|chain|xc=s' => \$expandChain,

	   ##-- Hit Parsing
	   'parse-meta|meta|pm!' => \$client{parseMeta},
	   'parse-context|context|pc!' => \$client{parseContext},
	   'keep-raw|keep|k!' => \$client{keepRaw},
	   'field-names|fields|names|f=s' => \$fieldNamesStr,
	   'field-separator|fsep|fs=s' => \$client{fieldSeparator},
	   'token-separator|tsep|ts=s' => \$client{tokenSeparator},
	   'encoding|e=s' => \$client{encoding},
	   'expand-fields|expand|xf!' => \$client{expandFields},
	   'drop-field|drop|df=s' => $client{dropFields},

	   ##-- Formatting
	   'columns|cols|C=i' => \$fmt{columns},
	   'kwic-width|kw|width|w=i' => \$fmt{width},
	   'level|L=i' => \$fmt{level},
	   'pretty!' => sub { $fmt{level}=$_[1] ? 1 : 0; },
	   'compact|z|ugly' => sub { $fmt{level}=0; },
	   'text|txt' => setFormatSub('Text'),
	   'kwic|kwc' => setFormatSub('Kwic'),
	   'dumper|dump|d' => setFormatSub('Dumper'),
	   'json|JSON|j' => setFormatSub('JSON'),
	   'yaml|yml|YAML|y' => setFormatSub('YAML'),
	   'template|tmpl|tt|T=s' => sub { setFormatSub('Template')->(); $fmt{src}=$_[1]; },
	   #'raw|r' => sub { $fmt_class='DDC::Format::Raw'; },
	   'raw|r' => sub { $fmt_class='DATA'; },
	   'request|req|R' => sub { $client{mode}='request'; $fmt_class='DATA'; },
	   'multi|M!' => \$query_multi,
	   'variable|var|v=s%' => $fmt{vars},
	  );

if ($version) {
  print "$prog (DDC::Concordance version $DDC::Concordance::VERSION) by Bryan Jurish <jurish\@bbaw.de>\n";
  exit 0;
}
pod2usage({-exitval=>0, -verbose=>0}) if ($help);
pod2usage({-msg=>"No query specified!", -exitval=>1, -verbose=>0}) if (!@ARGV);


##------------------------------------------------------------------------------
## subs

sub qtrace {
  return if (!$query_verbose);
  print STDERR "$prog: ", @_, "\n";
}

sub quotenp {
  my $s = shift;
  $s =~ s{([^[:print:]])}{sprintf("\\x{%0.2x}",ord($1))}ge;
  return $s;
}

##------------------------------------------------------------------------------
## MAIN
##------------------------------------------------------------------------------

##-- field names
our $fieldNames = undef;
if ($fieldNamesStr ne '') {
  $fieldNames = [grep {defined($_) && $_ ne ''} split(/[\,\s]+/,$fieldNamesStr)];
}

##-- port
$client{connect}{PeerPort} = $1 if ($client{connect}{Domain} eq 'INET' && $client{connect}{PeerAddr} =~ s/\:([0-9]+)$//);

##-- client
our $dclient = DDC::Client::Distributed->new(%client,
					     keepRaw=>($fmt_class =~ /Raw/ || $client{mode} =~ /^(?:raw|req)/i ? 1 : 0),
					     fieldNames=>$fieldNames,
					    );
$dclient->open()
  or die("$prog: could not connect to DDC server on ", $dclient->addrStr, ": $!");

if ($expandChain) {
  ##-- term expansion mode
  my $terms = [@ARGV];
  my $chain = $expandChain;
  qtrace("EXPAND [$chain]: $terms");
  my $buf = $dclient->expand_terms($chain,$terms);
  $buf = encode($client{encoding},$buf) if ($client{encoding} && utf8::is_utf8($buf));
  print $buf, "\n";
  exit 0;
}

##-- query from command-line or file
##   + file format: QUERY "\n"
##   + optional pseudo-flags "#start START" "#limit LIMIT"
##   + @queries = ({query=>$str, start=>$start, limit=>$limit}, ...)
our (@queries);
if ($query_from_file) {
  foreach my $qfile (@ARGV) {
    open(my $qfh,"<$qfile") or die("$prog: failed to open query file '$qfile': $!");
    my ($q,$start,$limit);
    while (<$qfh>) {
      chomp;
      next if (/^\s*$/ || /^\s*\/\//);
      $q = $_;
      $start = ($q =~ s/\s*\#start[=\s\[]+(\d+)\]?//gi ? $1 : $dclient->{start});
      $limit = ($q =~ s/\s*\#limit[=\s\[]+(\d+)\]?//gi ? $1 : $dclient->{limit});
      push(@queries,{start=>$start,limit=>$limit,query=>$q});
    }
    close($qfh);
  }
}
elsif ($query_multi) {
  @queries = map { {start=>$dclient->{start},limit=>$dclient->{limit},query=>$_} } @ARGV;
}
else {
  @queries = ( {start=>$dclient->{start},limit=>$dclient->{limit},query=>join(' ',@ARGV)} );
}

##-- decode and tweak control-character escapes in queries
foreach (@queries) {
  $_->{query} .= " :${corpora}" if (defined($corpora));
  $_->{query} = decode($qencoding,$_->{query}) if (defined($qencoding) && !utf8::is_utf8($_->{query}));

  my $q0 = $_->{query};
  if ($client{mode} =~ /^(?:raw|req)/i) {
    my $q = $_->{query};
    $q =~ s/\\x\{([0-9a-f]+)\}/chr(hex($1))/egi;
    $q =~ s/\\x([0-9a-f]{1,2})/chr(hex($1))/egi;
    $_->{query} = $q;
  }
}

##-- benchmark?
if ($bench_iters || $bench_seconds) {
  require Time::HiRes;

  my $mode = $dclient->{mode};
  print
    ("$prog: benchmarking "
     .join(", ",
	   ($bench_seconds ? "up to $bench_seconds second(s)" : qw()),
	   ($bench_iters ? "$bench_iters iteration(s)" : qw()))
     ." of ",
     (@queries==1 ? "query ($queries[0]{query})" : (scalar(@queries)." queries")),
     " with query mode $mode...\n"
    );
  $bench_iters ||= 'inf';
  $bench_seconds ||= 'inf';

  my ($t0,$i);
  my $elapsed = 0;
  my $n = 0;
  for ($i=0; $i < $bench_iters; ++$i) {
    $dclient->queryRaw(['clear_cache -1']) if ($bench_clear_cache);
    if (!$query_multi) {
      ##-- atomic query mode (default)
      foreach (@queries) {
	@$dclient{qw(start limit)} = @$_{qw(start limit)};
	$t0 = [Time::HiRes::gettimeofday()];
	$dclient->queryRaw($_->{query});
	$elapsed += Time::HiRes::tv_interval($t0, [Time::HiRes::gettimeofday()]);
	++$n;
	last if ($elapsed >= $bench_seconds);
      }
    }
    else {
      ##-- multi-query mode (e.g. get_first_hits + get_hit_strings; no handling of @$_{qw(start limit)})
      $t0 = [Time::HiRes::gettimeofday()];
      $dclient->queryMulti(@queries);
      $elapsed += Time::HiRes::tv_interval($t0, [Time::HiRes::gettimeofday()]);
      $n += scalar(@queries);
    }
    $dclient->queryRaw(['clear_cache -1']) if ($bench_clear_cache);
    last if ($elapsed >= $bench_seconds);
  }
  my $rate = sprintf("%7.2f", ($elapsed ? ($n/$elapsed) : "nan"));
  print "\t", sprintf("%.5f", $elapsed), " wallclock secs @ $rate q/s (n=$n)\n";
  exit 0;
}

##-- query handling guts
sub showQueryResponse {
  my ($query,$buf) = @_;

  if ($fmt_class eq 'DATA') {
    print $buf, ($buf =~ /\n\z/s ? qw() : "\n");
    return;
  }

  ##-- parse query buffer
  my $hits = $dclient->parseData($buf)
    or die("$prog: could not parse query data: $!");
  if ($hits->{error_}) {
    print STDERR "$prog: server error ($hits->{istatus_} $hits->{nstatus_}): $hits->{error_}\n";
    return;
  }
  if (!@{$hits->{hits_}//[]} && !@{$hits->{counts_}//[]}) {
    print STDERR "$prog: no hits found.\n";
    return;
  }

  ##-- format data
  $fmt{vars}{totalResults} = $hits->{nhits_};
  $fmt{vars}{client}       = $dclient;
  $fmt{vars}{query}        = $query;
  my $fmt = $fmt_class->new(%fmt,encoding=>$client{encoding})
    or die("$prog: could not create $fmt_class formatting object");

  my $outstr = $fmt->toString($hits);
  $outstr = encode($client{encoding},$outstr) if ($client{encoding} && utf8::is_utf8($outstr));
  print $outstr;
}

##-- query guts
if (!$query_multi) {
  ##-- atomic query mode (default)
  foreach (@queries) {
    sleep($query_sleep) if ($query_sleep > 0); ##-- sleep

    my $query = $_->{query};
    @$dclient{qw(start limit)} = @$_{qw(start limit)};

    print "#\n# QUERY: $query -start=$dclient->{start} -limit=$dclient->{limit}\n" if (@queries > 1);
    qtrace("QUERY [start=$dclient->{start},limit=$dclient->{limit}] $_->{query}");
    my $buf = $dclient->queryRaw($query)
      or die("$prog: query ($query) failed: $!");

    showQueryResponse($query,$buf);
  }
}
else {
  ##-- multi-query mode (e.g. get_first_hits + get_hit_strings)

  print "#\n# MULTI:\n", map {"#  + ".quotenp($_->{query})."\n"} @queries;
  my @bufs = $dclient->queryMulti(map {$_->{query}} @queries);
  for (my $i=0; $i <= $#queries; ++$i) {
    print "#\n# QUERY: ".quotenp($queries[$i]{query})."\n";
    showQueryResponse($queries[$i]{query},$bufs[$i]);
  }
}


__END__

##------------------------------------------------------------------------------
## PODS
##------------------------------------------------------------------------------
=pod

=head1 NAME

ddc-query.perl - distributed DDC query tool in perl

=head1 SYNOPSIS

 ddc-query.perl [OPTIONS] [QUERY...]

 General Options:
  -help
  -version

 Connection Options:
  -server  SERVER                   ##-- TCP server host address (default=localhost)
  -port    PORT                     ##-- TCP server port (default=50011)
  -unix    PATH                     ##-- UNIX server socket path (overrides TCP settings)
  -url     URL                      ##-- generic server URL (unix://PATH or inet://HOST:PORT)
  -timeout DDC_TIMEOUT              ##-- default=60
  -mode    QMODE                    ##-- query mode: 'json', 'table', 'text', 'request' (default='json')
  -qenc    QENCODING                ##-- query encoding (default=raw bytes)
  -file			            ##-- arguments are query-list filenames, not queries
  -request			    ##-- alias for -mode=request -raw (for protocol debugging)
  -multi                            ##-- send multiple requests on same connection (default=no)

 Benchmarking and Batch Options:
  -bench-iters ITERS                ##-- benchmark: time ITERS query iterations
  -bench-seconds SECONDS            ##-- benchmark: abort after SECONDS query-processing time (default=inf)
  -[no]bench-clear		    ##-- do/don't clear server cache between benchmark iterations (default=do)
  -query-file                       ##-- batch-execute queries from command-line argument file(s)
  -query-sleep SECONDS              ##-- sleep for SECONDS between multiple queries (default=0)
  -query-verbose                    ##-- trace query execution to stderr

 Hit Selection Options:
  -corpora DDC_CORPORA              ##-- comma-separated list; default=none
  -start FIRST_HIT                  ##-- offset of 1st hit to retrieve (default=0)
  -limit MAX_HITS                   ##-- maximum number of hits to retrieve (default=10)
  -hint HINT                        ##-- set optional navigation hint (default: empty string)
  -expand-chain PIPELINE            ##-- perform term expansion via PIPELINE rather than a retrieval query

 Hit Parsing Options
  -parse-meta   , -no-meta          ##-- do/don't parse hit metadata (default=do)
  -parse-context, -no-context       ##-- do/don't parse hit context  (default=do)
  -keep-raw     , -nokeep-raw       ##-- do/don't keep raw hit context (default=don't)
  -opt-file OPTFILE                 ##-- load DDC .opt file?
  -field-names FIELDS               ##-- parse into named FIELDS (space-separated list; default=none)
  -word-separator REGEX             ##-- word separator regex for context parsing (default=' ')
  -field-separator REGEX            ##-- field separator regex for context parsing (default='\x{1f}' : ASCII unit separator)
  -token-separator REGEX            ##-- token separator regex for context parsing (default='\x{1e}' : ASCII record separator)
  -encoding ENCODING                ##-- server encoding (default='UTF-8')

 Formatting Options:
  -columns COLS                     ##-- output columns for text formatter (default=80)
  -width   COLS                     ##-- number of context characters for KWIC formatter (default=32)
  -pretty , -ugly                   ##-- do/don't pretty-print (default=don't)
  -raw                              ##-- dump raw query response buffer
  -dumper                           ##-- use Data::Dumper formatter
  -kwic                             ##-- use KWIC formatter
  -text                             ##-- use old text formatter
  -json                             ##-- use JSON formatter
  -yaml                             ##-- use YAML formatter
  -template TTKFILE                 ##-- use Template-Toolkit formatter with TTKFILE

=cut

##------------------------------------------------------------------------------
## Options and Arguments
##------------------------------------------------------------------------------
=pod

=head1 OPTIONS AND ARGUMENTS

not yet written

=cut

##------------------------------------------------------------------------------
## Description
##------------------------------------------------------------------------------
=pod

=head1 DESCRIPTION

not yet written

=cut


##------------------------------------------------------------------------------
## Footer
##------------------------------------------------------------------------------
=pod

=head1 AUTHOR

Bryan Jurish E<lt>moocow@cpan.orgE<gt>

=cut