package App::ElasticSearch::Utilities::QueryString;
# ABSTRACT: CLI query string fixer

use strict;
use warnings;

our $VERSION = '5.9'; # VERSION

use App::ElasticSearch::Utilities qw(:config);
use App::ElasticSearch::Utilities::Query;
use CLI::Helpers qw(:output);
use Module::Pluggable::Object;
use Moo;
use Ref::Util qw(is_arrayref);
use Types::Standard qw(ArrayRef Enum);

use namespace::autoclean;


my %JOINING  = map { $_ => 1 } qw( AND OR );
my %TRAILING = map { $_ => 1 } qw( AND OR NOT );


has 'context' => (
    is      => 'rw',
    isa     => Enum[qw(query filter)],
    lazy    => 1,
    default => sub { 'query' },
);


has search_path => (
    is      => 'rw',
    isa     => ArrayRef,
    default => sub {[]},
);


has default_join => (
    is      => 'rw',
    isa     => Enum[qw(AND OR)],
    default => sub { 'AND' },
);


has plugins => (
    is      => 'ro',
    isa     => ArrayRef,
    builder => '_build_plugins',
    lazy    => 1,
);


sub expand_query_string {
    my $self = shift;

    my $query  = App::ElasticSearch::Utilities::Query->new();
    my @processed = ();
    TOKEN: foreach my $token (@_) {
        foreach my $p (@{ $self->plugins }) {
            my $res = $p->handle_token($token);
            if( defined $res ) {
                push @processed, is_arrayref($res) ? @{$res} : $res;
                next TOKEN;
            }
        }
        push @processed, { query_string => $token };
    }

    debug({color=>"magenta"}, "Processed parts");
    debug_var({color=>"magenta"},\@processed);

    my $context = $self->context eq 'query' ? 'must' : 'filter';
    my $invert=0;
    my @dangling=();
    my @qs=();
    foreach my $part (@processed) {
        if( exists $part->{dangles} ) {
            push @dangling, $part->{query_string};
        }
        elsif( exists $part->{query_string} ) {
            push @qs, @dangling, $part->{query_string};
            @dangling=(),
        }
        elsif( exists $part->{condition} ) {
            my $target = $invert ? 'must_not' : $context;
            $query->add_bool( $target => $part->{condition} );
            @dangling=();
        }
        elsif( exists $part->{nested} ) {
            $query->nested($part->{nested}{query});
            $query->nested_path($part->{nested}{path});
            @dangling=();
        }
        # Carry over the Inversion for instance where we jump out of the QS
        $invert = exists $part->{invert} && $part->{invert};
    }
    if(@qs)  {
        pop   @qs while @qs && exists $TRAILING{$qs[-1]};
        shift @qs while @qs && exists $JOINING{$qs[0]};

        # Ensure there's a joining token, otherwise use our default
        if( @qs > 1 ) {
            my $prev_query = 0;
            my @joined = ();
            foreach my $part ( @qs ) {
                if( $prev_query ) {
                    push @joined, $self->default_join() unless exists $JOINING{$part};
                }
                push @joined, $part;
                # Here we include AND, NOT, OR
                $prev_query = exists $TRAILING{$part} ? 0 : 1;
            }
            @qs = @joined;
        }
    }
    # $query->add_aggregation();
    $query->add_bool($context => { query_string => { query => join(' ', @qs) } }) if @qs;

    return $query;
}

# Builder Routines for QS Objects
sub _build_plugins {
    my $self    = shift;
    my $globals = es_globals('plugins');
    my $finder = Module::Pluggable::Object->new(
        search_path => ['App::ElasticSearch::Utilities::QueryString',@{ $self->search_path }],
        except      => [qw(App::ElasticSearch::Utilities::QueryString::Plugin)],
        instantiate => 'new',
    );
    my @plugins;
    foreach my $p ( sort { $a->priority <=> $b->priority || $a->name cmp $b->name }
        $finder->plugins( options => defined $globals ? $globals : {} )
    ) {
        debug(sprintf "Loaded %s with priority:%d", $p->name, $p->priority);
        push @plugins, $p;
    }
    return \@plugins;
}

# Return true
1;

__END__

=pod

=encoding UTF-8

=head1 NAME

App::ElasticSearch::Utilities::QueryString - CLI query string fixer

=head1 VERSION

version 5.9

=head1 SYNOPSIS

This class provides a pluggable architecture to expand query strings on the
command-line into complex Elasticsearch queries.

=head1 ATTRIBUTES

=head2 context

Defaults to 'query', but can also be set to 'filter' so the elements will be
added to the 'must' or 'filter' parameter.

=head2 search_path

An array reference of additional namespaces to search for loading the query string
processing plugins.  Example:

    $qs->search_path([qw(My::Company::QueryString)]);

This will search:

    App::ElasticSearch::Utilities::QueryString::*
    My::Company::QueryString::*

For query processing plugins.

=head2 default_join

When fixing up the query string, if two tokens are found next to eachother
missing a joining token, join using this token.  Can be either C<AND> or C<OR>,
and defaults to C<AND>.

=head2 plugins

Array reference of ordered query string processing plugins, lazily assembled.

=head1 METHODS

=head2 expand_query_string(@tokens)

This function takes a list of tokens, often from the command line via @ARGV.  Uses
a plugin infrastructure to allow customization.

Returns: L<App::ElasticSearch::Utilities::Query> object

=head1 TOKENS

The token expansion plugins can return undefined, which is basically a noop on the token.
The plugin can return a hash reference, which marks that token as handled and no other plugins
receive that token.  The hash reference may contain:

=over 2

=item query_string

This is the rewritten bits that will be reassembled in to the final query string.

=item condition

This is usually a hash reference representing the condition going into the bool query. For instance:

    { terms => { field => [qw(alice bob charlie)] } }

Or

    { prefix => { user_agent => 'Go ' } }

These conditions will wind up in the B<must> or B<must_not> section of the B<bool> query depending on the
state of the the invert flag.

=item invert

This is used by the bareword "not" to track whether the token invoked a flip from the B<must> to the B<must_not>
state.  After each token is processed, if it didn't set this flag, the flag is reset.

=item dangles

This is used for bare words like "not", "or", and "and" to denote that these terms cannot dangle from the
beginning or end of the query_string.  This allows the final pass of the query_string builder to strip these
words to prevent syntax errors.

=back

=head1 Extended Syntax

The search string is pre-analyzed before being sent to ElasticSearch.  The following plugins
work to manipulate the query string and provide richer, more complete syntax for CLI applications.

=head2 App::ElasticSearch::Utilities::Barewords

The following barewords are transformed:

    or => OR
    and => AND
    not => NOT

=head2 App::ElasticSearch::Utilities::QueryString::IP

If a field is an IP address uses CIDR Notation, it's expanded to a range query.

    src_ip:10.0/8 => src_ip:[10.0.0.0 TO 10.255.255.255]

=head2 App::ElasticSearch::Utilities::Range

This plugin translates some special comparison operators so you don't need to
remember them anymore.

Example:

    price:<100

Will translate into a:

    { range: { price: { lt: 100 } } }

And:

    price:>50,<100

Will translate to:

    { range: { price: { gt: 50, lt: 100 } } }

=head3 Supported Operators

B<gt> via E<gt>, B<gte> via E<gt>=, B<lt> via E<lt>, B<lte> via E<lt>=

=head2 App::ElasticSearch::Utilities::Underscored

This plugin translates some special underscore surrounded tokens into
the Elasticsearch Query DSL.

Implemented:

=head3 _prefix_

Example query string:

    _prefix_:useragent:'Go '

Translates into:

    { prefix => { useragent => 'Go ' } }

=head2 App::ElasticSearch::Utilities::QueryString::FileExpansion

If the match ends in .dat, .txt, or .csv, then we attempt to read a file with that name and OR the condition:

    $ cat test.dat
    50  1.2.3.4
    40  1.2.3.5
    30  1.2.3.6
    20  1.2.3.7

Or

    $ cat test.csv
    50,1.2.3.4
    40,1.2.3.5
    30,1.2.3.6
    20,1.2.3.7

Or

    $ cat test.txt
    1.2.3.4
    1.2.3.5
    1.2.3.6
    1.2.3.7

We can source that file:

    src_ip:test.dat => src_ip:(1.2.3.4 1.2.3.5 1.2.3.6 1.2.3.7)

This make it simple to use the --data-file output options and build queries
based off previous queries. For .txt and .dat file, the delimiter for columns
in the file must be either a tab, comma, or a semicolon.  For files ending in
.csv, Text::CSV_XS is used to accurate parsing of the file format.

You can also specify the column of the data file to use, the default being the last column or (-1).  Columns are
B<zero-based> indexing. This means the first column is index 0, second is 1, ..  The previous example can be rewritten
as:

    src_ip:test.dat[1]

or:
    src_ip:test.dat[-1]

This option will iterate through the whole file and unique the elements of the list.  They will then be transformed into
an appropriate L<terms query|http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html>.

=head1 AUTHOR

Brad Lhotsky <brad@divisionbyzero.net>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2012 by Brad Lhotsky.

This is free software, licensed under:

  The (three-clause) BSD License

=cut