#!perl

our $DATE = '2019-07-03'; # DATE
our $VERSION = '0.122'; # VERSION

use 5.010001;
use strict;
use warnings;

use Getopt::Long;

my %Opts = (
    op => undef,
    ignore_case => 0,
    ignore_all_space => 0,
    skip_chars => 0,
    glue => ',',
);

sub parse_cmdline {
    my $res = GetOptions(
        'ignore-case|i'      => \$Opts{ignore_case},
        'ignore-all-space|w' => \$Opts{ignore_all_space},
        'op=s'          => \$Opts{op},
        'union'         => sub { $Opts{op} = 'union' },
        'intersect'     => sub { $Opts{op} = 'intersect' },
        'diff'          => sub { $Opts{op} = 'diff' },
        'symdiff'       => sub { $Opts{op} = 'symdiff' },
        'cross'         => sub { $Opts{op} = 'cross' },
        'glue=s'        => \$Opts{glue},
        'skip-chars=i'  => \$Opts{skip_chars},
        'check-chars=i' => \$Opts{check_chars},
        'version|v'     => sub {
            no warnings;
            say "setop version $main::VERSION ($main::DATE)";
            exit 0;
        },
        'help|h'        => sub {
            print <<USAGE;
Usage:
  setop [OPTIONS]... FILE FILE ...
  setop --help
Options:
  --ignore-case, -i
  --ignore-all-space, -w
  --op=s
  --union (shortcut for --op=union)
  --intersect (shortcut for --op=intersect)
  --diff (shortcut for --op=diff)
  --symdiff (shortcut for --op=symdiff)
  --cross (shortcut for --op=cross)
  --check-chars=i
  --skip-chars=i
For more details, see the manpage/documentation.
USAGE
            exit 0;
        },
    );
    exit 99 if !$res;
    if (!$Opts{op}) {
        warn "Please specify an operation (--op)\n"; exit 99;
    }
    if ($Opts{op} !~ /\A(union|intersect|diff|symdiff|cross)\z/) {
        warn "Unknown op, please see --help for known operations\n"; exit 99;
    }
    unless ($Opts{op} ne 'cross' || @ARGV > 1) {
        warn "Please specify at least 2 input files\n";
        exit 99;
    }
    unless (@ARGV >= 1) {
        warn "Please specify at least 1 input file\n";
        exit 99;
    }
}

sub run {
    require Tie::IxHash;

    # a regular hash
    my %res0;

    # a hash that remembers insert order
    tie my(%res), 'Tie::IxHash';

    my $op  = $Opts{op};
    my $ic  = $Opts{ignore_case};
    my $ias = $Opts{ignore_all_space};
    my $sch = $Opts{skip_chars};
    my $cch = $Opts{check_chars};
    my $ign = $ic || $ias || $sch || defined($cch);

    my @aoaos;

    my $i = 0;
    my $stdin_specified;
  FILE:
    for my $i (1..@ARGV) {
        my $fh;
        my $fname = $ARGV[$i-1];
        if ($fname eq '-') {
            do { warn "Can't use STDIN twice\n"; exit 99 }
                if $stdin_specified++;
            $fh = *STDIN;
        } else {
            open $fh, "<", $fname or die "Can't open input file $fname: $!\n";
        }
        if ($op eq 'union') {

            if ($ign) {
                while (<$fh>) {
                    my $k = $ic ? lc($_) : $_;
                    $k =~ s/\s+//g if $ias;
                    $res{$k} = $_ unless exists $res{$k};
                }

                # print result
                if ($i == @ARGV) {
                    print $res{$_} for keys %res;
                }
            } else {
                while (<$fh>) { $res{$_}++ }

                # print result
                if ($i == @ARGV) {
                    print for keys %res;
                }
            }

        } elsif ($op eq 'intersect') {

            if ($ign) {
                if ($i == 1) {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        $res{$k} = [1,$_] unless exists $res{$k};
                    }
                } else {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        if ($res{$k} && $res{$k}[0] == $i-1) {
                            $res{$k}[0]++;
                        }
                    }
                }
                # print result
                if ($i == @ARGV) {
                    for (keys %res) {
                        print $res{$_}[1] if $res{$_}[0] == $i;
                    }
                }
            } else {
                if ($i == 1) {
                    while (<$fh>) { $res{$_} = 1 }
                } else {
                    while (<$fh>) {
                        if ($res{$_} && $res{$_} == $i-1) {
                            $res{$_}++;
                        }
                    }
                }
                # print result
                if ($i == @ARGV) {
                    for (keys %res) {
                        print if $res{$_} == $i;
                    }
                }
            }


        } elsif ($op eq 'diff') {

            if ($ign) {
                if ($i == 1) {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        chomp $k;
                        $k = substr($k, $sch) if $sch;
                        $k = substr($k, 0, $cch) if defined $cch;
                        $k =~ s/[ \t]+//g if $ias;
                        $res0{$k} //= [];
                        push @{ $res0{$k} }, [$., $_];
                    }
                    #use DD; dd \%res0;
                } else {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        chomp $k;
                        $k = substr($k, $sch) if $sch;
                        $k = substr($k, 0, $cch) if defined $cch;
                        $k =~ s/\s+//g if $ias;
                        delete $res0{$k};
                    }
                }
                # print result
                if ($i == @ARGV) {
                    my @lines = map { @{ $res0{$_} } } keys %res0;
                    for (sort { $a->[0] <=> $b->[0] } @lines) {
                        print $_->[1];
                    }
                }
            } else {
                if ($i == 1) {
                    while (<$fh>) { $res{$_}++ }
                } else {
                    while (<$fh>) {
                        delete $res{$_};
                    }
                }
                # print result
                if ($i == @ARGV) {
                    print for keys %res;
                }
            }

        } elsif ($op eq 'symdiff') {

            if ($ign) {
                if ($i == 1) {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        $res{$k} = [1,$_] unless exists $res{$k};
                    }
                } else {
                    while (<$fh>) {
                        my $k = $ic ? lc($_) : $_;
                        $k =~ s/\s+//g if $ias;
                        if (!$res{$k}) {
                            $res{$k} = [1, $_];
                        } elsif ($res{$k}[0] <= 2) {
                            $res{$k}[0]++;
                        }
                    }
                }
                # print result
                if ($i == @ARGV) {
                    for (keys %res) {
                        print $res{$_}[1] if $res{$_}[0] == 1;
                    }
                }
            } else {
                if ($i == 1) {
                    while (<$fh>) { $res{$_} = 1 }
                } else {
                    while (<$fh>) {
                        if (!$res{$_} || $res{$_} <= 2) {
                            $res{$_}++;
                        }
                    }
                }
                # print result
                if ($i == @ARGV) {
                    for (keys %res) {
                        print if $res{$_} == 1;
                    }
                }
            }

        } elsif ($op eq 'cross') {

            my $aos = [];
            while (<$fh>) {
                chomp;
                push @$aos, $_;
            }
            push @aoaos, $aos;
            # print result
            if ($i == @ARGV) {
                require Set::CrossProduct;
                my $iter = Set::CrossProduct->new(\@aoaos);
                my $glue = $Opts{glue};
                while (my $tuple = $iter->get) {
                    print join($glue, @$tuple), "\n";
                }
            }

        }

    }

}

# MAIN

parse_cmdline();
run();

1;
# ABSTRACT: Set operations (union, intersection, difference, symmetric diff) on lines of files
# PODNAME: setop

__END__

=pod

=encoding UTF-8

=head1 NAME

setop - Set operations (union, intersection, difference, symmetric diff) on lines of files

=head1 VERSION

This document describes version 0.122 of setop (from Perl distribution App-setop), released on 2019-07-03.

=head1 SYNOPSIS

 setop [OPTION]... FILE FILE ...

Examples:

 % setop --union file1 file2 file3 ;# combine files, duplicate lines removed, order preserved
 % setop --intersect file1 file2 file3; # show lines common in all three files
 % setop --diff <(ls /path1) <(ls /path2) ;# show lines in /path1 not in /path2

=head1 DESCRIPTION

C<setop> treats files as a sets of lines, and performs operations between the
sets.

=head1 EXIT CODES

0 on success.

255 on I/O error.

99 on command-line options error.

=head1 OPTIONS

=over

=item * --help, -h

Show help message and exit.

=item * --version, -v

Show version and exit.

=item * --op=S

Pick operation. Known operations are: B<union> (return lines from the first file
and others, duplicate lines removed [even duplicates from the same file], order
preserved), B<intersect> (return common lines found in every file, order
preserved), B<diff> (return lines found in the first file but not the second,
duplicate removed, order preserved), B<symdiff> (short for "symetric
difference", return lines found in either file but not both, duplicate removed,
order preserved), B<cross> (short for "cross product", produce cross product
from lines from each file).

=item * --union

Shortcut for C<--op union>.

=item * --intersect

Shortcut for C<--op intersect>.

=item * --diff

Shortcut for C<--op diff>.

=item * --symdiff

Shortcut for C<--op symdiff>.

=item * --cross

Shortcut for C<--op cross>.

=item * --ignore-case, -i

=item * --ignore-all-space, -w

=item * --glue=s

Glue character to use to combine lines when operation is B<cross>.

=item * --skip-chars=i

Number of characters to skip when comparing, much like the same option in
B<uniq> command.

Currently this is only implemented for C<diff> operation.

=item * --check-chars=i

Number of characters to check when comparing, much like the same option in
B<uniq> command.

Currently this is only implemented for C<diff> operation.

=back

=head1 HISTORY

I first wrote C<fileop> in Ruby in 2003, since Ruby has nice C<+> and C<->
operators for arrays.

Rewrote in Perl in 2014. Script renamed to C<setop>, changed command-line
options a bit, now preserves order of lines.

=head1 HOMEPAGE

Please visit the project's homepage at L<https://metacpan.org/release/App-setop>.

=head1 SOURCE

Source repository is at L<https://github.com/perlancar/perl-App-setop>.

=head1 BUGS

Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-setop>

When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.

=head1 SEE ALSO

=head1 AUTHOR

perlancar <perlancar@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2019, 2016, 2015, 2014 by perlancar@cpan.org.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut