The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

#!perl
use 5.010001;
use strict;
#use Log::ger::Screen;
use Getopt::Long ();
use CLI::MetaUtil::Getopt::Long qw(GetOptionsCLIWrapper);
our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
our $DATE = '2022-09-08'; # DATE
our $DIST = 'App-DiffDocText'; # DIST
our $VERSION = '0.004'; # VERSION
my @files;
Getopt::Long::Configure("gnu_getopt", "no_ignore_case", "pass_through");
my $fail;
GetOptionsCLIWrapper(
cli => 'diff',
add_opts => {
'<>' => sub {
my $filename = $_[0];
unless (-f $filename) {
warn "diff-doc-text: No such file or not a file: '$filename'\n";
$fail++;
return;
}
my $res = App::OfficeUtils::officewp2txt(
input_file => $filename,
fmt => 1,
return_output_file => 1,
);
my $file;
if ($res->[0] == 304) {
$file = $_[0];
} elsif ($res->[0] == 200) {
$file = $res->[2];
} else {
die "Can't convert doc '$_[0]' to text: $res->[0] - $res->[1]";
}
push @CLI::MetaUtil::Getopt::Long::cli_argv, $file;
},
},
);
exit 1 if $fail;
require File::Which;
my $diff_cmd =
$ENV{DIFF_DOC_TEXT_DIFF_CMD} //
$ENV{DIFF_CMD} //
(File::Which::which("diffwc") ? "diffwc" : undef) // "diff";
IPC::System::Options::system(
{log=>1},
$diff_cmd, @ARGV,
);
# ABSTRACT: Diff the text of two Office word-processor documents (.doc, .docx, .odt, etc)
# PODNAME: diff-doc-text
__END__
=pod
=encoding UTF-8
=head1 NAME
diff-doc-text - Diff the text of two Office word-processor documents (.doc, .docx, .odt, etc)
=head1 VERSION
This document describes version 0.004 of diff-doc-text (from Perl distribution App-DiffDocText), released on 2022-09-08.
=head1 SYNOPSIS
Use like you would use the Unix command B<diff>:
% diff-doc-text [options] <FILE>...
=head1 DESCRIPTION
This is a wrapper for the Unix command B<diff>. It assumes that each file is an
Office word-processor documents (.doc, .docx, .odt, etc) and tries to convert
the file to plaintext first. It then passes the converted files to C<diff>.
=head1 ENVIRONMENT
=head2 DIFF_CMD
String. Can be used to set path to diff command. See also
L</DIFF_XLS_TEXT_DIFF_CMD> which takes precedence.
=head2 DIFF_DOC_TEXT_DIFF_CMD
String. Can be used to set path to diff command. The defaultl is L<diffwc> if
available in PATH, or C<diff>. Takes precedence over L</DIFF_CMD>.
=head1 HOMEPAGE
Please visit the project's homepage at L<https://metacpan.org/release/App-DiffDocText>.
=head1 SOURCE
=head1 SEE ALSO
Unix command L<diff>.
L<App::OfficeUtils> and the included CLI L<doc2txt>.
=head1 AUTHOR
perlancar <perlancar@cpan.org>
=head1 CONTRIBUTING
To contribute, you can send patches by email/via RT, or send pull requests on
GitHub.
Most of the time, you don't need to build the distribution yourself. You can
simply modify the code, then test via:
% prove -l
If you want to build the distribution (e.g. to try to install it locally on your
system), you can install L<Dist::Zilla>,
L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
that are considered a bug and can be reported to me.
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2022, 2020 by perlancar <perlancar@cpan.org>.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=head1 BUGS
Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-DiffDocText>
When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.
=cut