—#!perl
use
5.010001;
use
strict;
use
warnings;
use
Log::ger;
#use Log::ger::Screen;
our
$AUTHORITY
=
'cpan:PERLANCAR'
;
# AUTHORITY
our
$DATE
=
'2022-11-26'
;
# DATE
our
$DIST
=
'App-DiffXlsText'
;
# DIST
our
$VERSION
=
'0.012'
;
# VERSION
unless
(which
"xlsx2csv"
) {
die
"diff-xls-text: This tool requires xlsx2csv from CPAN module Spreadsheet::Read\n"
;
}
my
@files
;
Getopt::Long::Configure(
"gnu_getopt"
,
"no_ignore_case"
,
"pass_through"
);
my
$fail
;
my
$opt_transpose_csv
;
my
$tempdir
;
my
%basenames
;
my
$has_checked_xlsx2csv
;
GetOptionsCLIWrapper(
cli
=>
'diff'
,
add_opts
=> {
'transpose-csv'
=> \
$opt_transpose_csv
,
'<>'
=>
sub
{
return
if
$fail
;
my
$filename
=
$_
[0];
unless
(-f
$filename
) {
warn
"diff-xls-text: No such file or not a file: '$filename'\n"
;
$fail
++;
return
;
}
if
(!
$tempdir
) {
$tempdir
= File::Temp::tempdir(
CLEANUP
=> !
$ENV
{DEBUG});
}
(
my
$basename0
=
$filename
) =~ s!.+/!!;
my
$basename
;
my
$i
= 0;
while
(1) {
$basename
=
$basename0
. (
$i
++ ?
".$i"
:
""
);
last
unless
$basenames
{
$basename
}++;
}
unless
(
$has_checked_xlsx2csv
++) {
my
$content
= read_binary(which
"xlsx2csv"
);
warn
"diff-xls-text: xlsx2csv in PATH doesn't seem to be the one from Spreadsheet::Read, please make sure we have the correct xlsx2csv\n"
;
$fail
++;
return
;
}
}
mkdir
"$tempdir/$basename"
;
system
"xlsx2csv"
,
"-A"
,
"-o"
,
"$tempdir/$basename/workbook"
,
$filename
;
push
@CLI::MetaUtil::Getopt::Long::cli_argv
,
"$tempdir/$basename"
;
if
(
$opt_transpose_csv
) {
log_trace
"Transposing CSV files in $tempdir/$basename ..."
;
local
$CWD
=
"$tempdir/$basename"
;
for
my
$file
(
glob
"*.csv"
) {
(
my
$newfile
=
$file
) =~ s/\.csv$/.transpose.csv/;
system
({
shell
=>1},
"csv-transpose"
,
$file
, \
">"
,
$newfile
);
unlink
$file
;
}
}
},
},
);
exit
1
if
$fail
;
require
File::Which;
my
$diff_cmd
=
$ENV
{DIFF_XLS_TEXT_DIFF_CMD} //
$ENV
{DIFF_CMD} //
(File::Which::which(
"diffwc"
) ?
"diffwc"
:
undef
) //
"diff"
;
require
IPC::System::Options;
IPC::System::Options::
system
(
{
log
=>1},
$diff_cmd
,
"-ruN"
,
@ARGV
,
);
# ABSTRACT: Diff the text of two Office spreadsheets (.ods, .xls, .xlsx) as two directories of CSV files
# PODNAME: diff-xls-text
__END__
=pod
=encoding UTF-8
=head1 NAME
diff-xls-text - Diff the text of two Office spreadsheets (.ods, .xls, .xlsx) as two directories of CSV files
=head1 VERSION
This document describes version 0.012 of diff-xls-text (from Perl distribution App-DiffXlsText), released on 2022-11-26.
=head1 SYNOPSIS
Use like you would use the Unix command B<diff>:
% diff-xls-text [options] <FILE>...
Sample F<old.xlsx> (C<*foo*> marks bolded text):
# sheet 1: en_id
| english | indonesian |
|-----------+----------------|
| boy | anak laki-laki |
| girl | anak wanita |
| man | Laki-laki |
| lad | pemuda |
# sheet 2: id_en
| indonesian | english |
|--------------+-------------|
| pemuda | young man |
| pemudi | young woman |
Sample F<new.xlsx>:
# sheet 1: en_id
| *English* | *Indonesian* |
|------------+----------------|
| boy | anak laki-laki |
| girl | anak perempuan |
| man | laki-laki |
| woman | perempuan |
# sheet 2: en_su
| English | Sundanese |
|------------+----------------|
| man | lalaki |
Sample session:
% diff-xls-text old.xlsx new.xlsx
Saving sheet to /tmp/IZxMSeSyr_/old.xlsx/workbook-en_id.csv ...
Saving sheet to /tmp/IZxMSeSyr_/old.xlsx/workbook-id_en.csv ...
Saving sheet to /tmp/IZxMSeSyr_/new.xlsx/workbook-en_id.csv ...
Saving sheet to /tmp/IZxMSeSyr_/new.xlsx/workbook-en_su.csv ...
diff -ruN /tmp/IZxMSeSyr_/old.xlsx/workbook-en_id.csv /tmp/IZxMSeSyr_/new.xlsx/workbook-en_id.csv
--- /tmp/IZxMSeSyr_/old.xlsx/workbook-en_id.csv 2022-11-26 07:43:56.293111001 +0700
+++ /tmp/IZxMSeSyr_/new.xlsx/workbook-en_id.csv 2022-11-26 07:43:56.661107387 +0700
@@ -1,5 +1,5 @@
-english,indonesian
+English,Indonesian
boy,"anak laki-laki"
-girl,"anak wanita"
-man,Laki-laki
-lad,pemuda
+girl,"anak perempuan"
+man,laki-laki
+woman,perempuan
diff -ruN /tmp/IZxMSeSyr_/old.xlsx/workbook-en_su.csv /tmp/IZxMSeSyr_/new.xlsx/workbook-en_su.csv
--- /tmp/IZxMSeSyr_/old.xlsx/workbook-en_su.csv 1970-01-01 07:00:00.000000000 +0700
+++ /tmp/IZxMSeSyr_/new.xlsx/workbook-en_su.csv 2022-11-26 07:43:56.661107387 +0700
@@ -0,0 +1,2 @@
+English,Sundanese
+man,lalaki
diff -ruN /tmp/IZxMSeSyr_/old.xlsx/workbook-id_en.csv /tmp/IZxMSeSyr_/new.xlsx/workbook-id_en.csv
--- /tmp/IZxMSeSyr_/old.xlsx/workbook-id_en.csv 2022-11-26 07:43:56.293111001 +0700
+++ /tmp/IZxMSeSyr_/new.xlsx/workbook-id_en.csv 1970-01-01 07:00:00.000000000 +0700
@@ -1,3 +0,0 @@
-indonesian,english
-pemuda,"young man"
-pemudi,"young woman"
=head1 DESCRIPTION
This is a wrapper for the Unix command B<diff>. It assumes that each input file
is an Office spreadsheet (.ods, .xls, or .xlsx) and tries to convert the file to
a directory of CSV files (where each CSV file is converted from a single
worksheet) using L<xlsx2csv>. It then passes the converted directories to C<<
diff -ruN >> command. Thus, formatting differences will not be diff'ed.
=head1 OPTIONS
These are options that are interpreted by B<diff-xls-text> and not passed to
B<diff>.
B<Please specify these options before file names.>
=over
=item * --transpose-csv
Transpose CSV first using L<csv-transpose> from L<App::CSVUtils>. This allows
you to do column-based instead of row-based diff.
=back
=head1 ENVIRONMENT
=head2 DEBUG
If set to true, do not cleanup temporary directories.
=head2 DIFF_CMD
String. Can be used to set path to diff command. See also
L</DIFF_XLS_TEXT_DIFF_CMD> which takes precedence.
=head2 DIFF_XLS_TEXT_DIFF_CMD
String. Can be used to set path to diff command. The defaultl is L<diffwc> if
available in PATH, or C<diff>. Takes precedence over L</DIFF_CMD>.
=head1 HOMEPAGE
Please visit the project's homepage at L<https://metacpan.org/release/App-DiffXlsText>.
=head1 SOURCE
Source repository is at L<https://github.com/perlancar/perl-App-DiffXlsText>.
=head1 SEE ALSO
Unix command L<diff>.
L<ssdiff> from L<Spreadsheet::Read> which also diffs two spreadsheets but
presents the result differently.
L<xlsx2csv> from L<Spreadsheet::Read>
=head1 AUTHOR
perlancar <perlancar@cpan.org>
=head1 CONTRIBUTING
To contribute, you can send patches by email/via RT, or send pull requests on
GitHub.
Most of the time, you don't need to build the distribution yourself. You can
simply modify the code, then test via:
% prove -l
If you want to build the distribution (e.g. to try to install it locally on your
system), you can install L<Dist::Zilla>,
L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
that are considered a bug and can be reported to me.
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2022, 2021, 2020 by perlancar <perlancar@cpan.org>.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=head1 BUGS
Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-DiffXlsText>
When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.
=cut