The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

#!/usr/bin/env perl
package App::ccdiff;
use 5.014000;
use charnames ();
use Encode qw( encode );
use List::Util qw( first max );
use Term::ANSIColor qw(:constants color );
use Getopt::Long qw(:config bundling noignorecase);
our $VERSION = "0.34";
our $CMD = $0 =~ s{.*/}{}r;
sub usage {
my $err = shift and select STDERR;
say "usage: $CMD [options] file1 [file2]";
say " $CMD [options] dir1 dir2";
say " $CMD --man | --info";
say " file1 or file2 can be - (but not both)";
say " -V --version Show version and exit";
say " -v[1] --verbose[=1] Set verbosity";
say " Diff options:";
say " -U --utf-8 Input is in UTF-8";
say " -u[3] --unified=3 Show a unified diff";
say " --no-header Skip header with file names/stamps";
say " -I --index Add indices to the change chunks";
say " -I n --index=4 Only show chunk n";
say " -w --ignore-all-space Ignore all whitespace";
say " -b --ignore-space-change Ignore horizontal whitespace changes";
say " -Z --ignore-trailing-space Ignore whitespace at line ending";
say " -B --ignore-blank-lines Ignore changes where lines are all blank";
say " -i --ignore-case Ignore case changes";
say " --dc=C --diff-class=C Select Algorithm::Diff (AD or PP)";
say " or Algorithm::Diff::XS (XS = default)";
say " Other options:";
say " -t n --threshold=2 Horizontal line diff threshold";
say " -h n --heuristics=n Horizontal char diff threshold";
say " -e n --ellipsis=n Compress horizontal equal sections";
say " -m --markers Use markers to indicate change positions";
say " -a --ascii Use ASCII instead of Unicode indicators";
say " --no-color Reset all colors to none.";
say " --list-colors List available colors and exit";
say " --old=red Color to indicate removed content";
say " --new=green Color to indicate added content";
say " --bg=white Background color for colored indicators";
say " -p --pink Shortcut for --old=magenta";
say " -r --reverse Reverse/invert the colors of the indicators";
say " -s --swap Swap old/new color indicators";
say " --settings Show default settings (after reading rc)";
exit $err;
} # usage
my %rc = (
ascii => 0,
bg => "white",
chr_cml => "\x{21b1}",
chr_cmr => "\x{21b0}",
chr_eli => "\x{2508}",
chr_eli_v => "\x{21a4}\x{21a6}",
chr_eql => " ",
chr_new => "\x{25b2}",
chr_old => "\x{25bc}",
ellipsis => 0,
emacs => 0,
header => 1, # A color name is allowed
heuristics => 0,
index => 0,
iwbzusepp => 0,
markers => 0,
new => "green",
old => "red",
reverse => 0,
swap => 0,
threshold => 2,
utf8 => 0,
verbose => "cyan",
);
read_rc ();
my $opt_a = $rc{ascii};
my $opt_b;
my $opt_B;
#y $opt_c;
my $opt_E;
my $opt_h = $rc{heuristics};
my $opt_H = $rc{header};
my $opt_i;
my $opt_I = $rc{index};
my $opt_m = $rc{markers};
my $opt_r = $rc{reverse};
my $opt_s = $rc{swap};
my $opt_t = $rc{threshold};
my $opt_e = $rc{ellipsis};
my $opt_u = $rc{unified};
my $opt_U = $rc{utf8};
my $opt_v = 0;
my $opt_w;
my $opt_Z;
my $emacs = $rc{emacs};
my $old_color = $rc{old};
my $new_color = $rc{new};
my $rev_color = $rc{bg};
my $cli_color = $ENV{CLICOLOR}; # https://bixense.com/clicolors/
my $no_colors = $ENV{NO_COLOR}; # https://no-color.org
my $list_colors;
my $diff_class;
if ($no_colors) {
$ENV{CLICOLOR_FORCE} and $no_colors = 0;
}
elsif (defined $cli_color) {
# true $cli_color is the default for ccdiff
!$cli_color || !-t and $no_colors = 1;
}
unless (caller) {
$ENV{CCDIFF_OPTIONS} and unshift @ARGV, split m/\s+/ => $ENV{CCDIFF_OPTIONS};
GetOptions (
"help|?" => sub { usage (0); },
"V|version" => sub { say "$CMD [$VERSION]"; exit 0; },
"man" => sub { pod_nroff (); },
"info" => sub { pod_text (); },
"U|utf-8!" => \$opt_U,
"dc|diff-class=s" => \$diff_class,
"pp!" => sub { $diff_class = "PP" },
# "c|context:3" => \$opt_c, # implement context-diff?
"u|unified:3" => \$opt_u,
"I|idx|index:-1" => \$opt_I,
"t|threshold=i" => \$opt_t,
"H|header!" => \$opt_H,
"HC|header-color=s" => \$opt_H,
"h|heuristics=i" => \$opt_h,
"e|ellipsis=i" => \$opt_e,
"emacs!" => \$emacs,
"a|ascii" => sub { $opt_a ^= 1 },
"m|markers" => sub { $opt_m ^= 1 },
"r|reverse|invert" => sub { $opt_r ^= 1 },
"s|swap!" => sub { $opt_s ^= 1 },
"i|ignore-case!" => \$opt_i,
"w|ignore-all-space!" => \$opt_w,
"b|ignore-ws|ignore-space-change!" => \$opt_b,
"Z|ignore-trailing-space!" => \$opt_Z,
"E|ignore-tab-expansion!" => \$opt_E, # NYI
"B|ignore-blank-lines!" => \$opt_B, # Partly implemented
"p|pink!" => sub { $old_color = "magenta" },
"old=s" => \$old_color,
"new=s" => \$new_color,
"bg=s" => \$rev_color,
"no-colors" => \$no_colors,
"list-colors!" => \$list_colors,
"settings|defaults" => sub {
binmode STDOUT, ":encoding(utf-8)";
printf "%-10s : %s\n", $_, $rc{$_} // "<undef>" for sort keys %rc;
exit 0;
},
"v|verbose:1" => \$opt_v,
) or usage (1);
}
$opt_w and $opt_b = $opt_Z = $opt_E = $opt_B = 1;
$opt_h >= 1 and $opt_h /= 100;
sub pod_text {
my $m = $no_colors ? "Pod::Text" : "Pod::Text::Color";
my $p = $m->new ();
open my $fh, ">", \my $out;
$p->parse_from_file ($0, $fh);
close $fh;
print $out;
exit 0;
} # pod_text
sub pod_nroff {
first { -x "$_/nroff" } grep { -d } split m/:+/ => $ENV{PATH} or pod_text ();
require Pod::Man;
my $p = Pod::Man->new ();
open my $fh, "|-", "nroff", "-man";
$p->parse_from_file ($0, $fh);
close $fh;
exit 0;
} # pod_nroff
# Color initialization
for ($old_color, $new_color, $rev_color) {
s/^(.*)[ _]bold$/bold $1/i;
s/^bold_/bold /i;
}
my %clr = map { $_ => color (s{^(.*)[ _]bold$}{bold $1}ir =~
s{^bold[ _]}{bold }ir) }
map {( $_, "on_$_", "bold $_" )}
qw( red green blue black white cyan magenta yellow );
$clr{$_} //= color ($_) for tac_colors ();
$no_colors and $clr{$_} = "" for keys %clr;
$clr{none} = $clr{on_none} = "";
my ($reset, $bg_new, $bg_old,
$chr_cml, $chr_cmr, $chr_ctx, $chr_eli, $chr_eql, $chr_lft,
$chr_new, $chr_old, $chr_rgt,
$clr_dbg, $clr_grn, $clr_new, $clr_old, $clr_red, $clr_rev,
$cmp_sub) = (RESET);
if ($list_colors) {
my @clr = map { sprintf "%s%-18s%s", $clr{$_}, $_, $reset } sort keys %clr;
while (@clr) {
say join " " => map { $_ // "" } splice @clr, 0, 4;
}
exit;
}
sub set_options {
for ([ \$old_color, $rc{old} ], [ \$new_color, $rc{new} ], [ \$rev_color, $rc{bg} ]) {
my ($c, $def) = @$_;
$$c && exists $clr{$$c} and next;
warn "color ", $$c // "(undefined)", " is unknown, using $def instead\n";
$$c = $def;
}
$clr_red = $clr{$old_color};
$clr_grn = $clr{$new_color};
$clr_rev = $clr{$rev_color};
$clr_dbg = $opt_r && exists $clr{"on_$rc{verbose}"} ? $clr{"on_$rc{verbose}"} : $clr{$rc{verbose}};
$reset = $no_colors ? "" : RESET;
$bg_old = $clr{$rc{bg_old} || ($opt_r ? "on_$old_color" =~ s/bold //ir :
"on_$rev_color" =~ s/bold //ir)};
$bg_new = $clr{$rc{bg_new} || ($opt_r ? "on_$new_color" =~ s/bold //ir :
"on_$rev_color" =~ s/bold //ir)};
$clr_old = $opt_r ? $clr_rev . $bg_old : $clr_red . $bg_old;
$clr_new = $opt_r ? $clr_rev . $bg_new : $clr_grn . $bg_new;
$opt_s and ($clr_new, $clr_old) = ($clr_old, $clr_new);
# Indicators
if ($opt_a) {
@rc{qw( chr_old chr_new chr_cml chr_cmr chr_eli chr_eli_v )} = qw( ^ ^ > < - <> );
}
elsif (!$opt_U) {
$rc{$_} = encode ("utf-8", $rc{$_}) for qw( chr_old chr_new chr_cml chr_cmr chr_eli chr_eli_v );
}
$chr_old = $clr_old . $rc{chr_old} . $reset;
$chr_new = $clr_new . $rc{chr_new} . $reset;
$chr_cml = $clr_dbg . $rc{chr_cml} . $reset;
$chr_cmr = $clr_dbg . $rc{chr_cmr} . $reset;
$chr_eql = $rc{chr_eql};
$chr_lft = $clr_old . (defined $opt_u ? "-" : "< ") . $reset;
$chr_rgt = $clr_new . (defined $opt_u ? "+" : "> ") . $reset;
$chr_ctx = defined $opt_u ? " " : " ";
$chr_eli = $opt_v >= 2 ? $rc{chr_eli_v} : $rc{chr_eli};
$opt_m && $opt_v > 1 && length ($chr_eli) > 1 and $opt_m = 0;
if ($opt_i || $opt_b || $opt_Z) {
$rc{iwbzusepp} and $diff_class = "Algorithm::Diff";
$cmp_sub = {
keyGen => sub {
my $line = shift;
$opt_i and $line = lc $line;
$opt_Z and $line =~ s/[ \t\r]+$//g;
$opt_b and $line =~ s/[ \t]+/ /g;
return $line;
}
};
}
} # set_options
if ($diff_class) {
if ($diff_class =~ m/^(?:ad|pp|algorithm(?:-|::)diff(?:(?:-|::)pp)?)$/i) {
$diff_class =
eval { require Algorithm::Diff; "Algorithm::Diff"; }
}
elsif ($diff_class =~ m/^(?:adx|xs|algorithm(?:-|::)diff(?:-|::)xs)$/i) {
$diff_class = "Algorithm::Diff::XS";
$diff_class =
eval { require Algorithm::Diff::XS; "Algorithm::Diff::XS"; }
}
else {
die "$diff_class is an unsupported Diff class\n";
}
}
else {
$diff_class =
eval { require Algorithm::Diff::XS; "Algorithm::Diff::XS"; }
|| eval { require Algorithm::Diff; "Algorithm::Diff"; };
}
$diff_class or die "Cannot load Algorithm::Diff:\n";
unless (caller) {
if (@ARGV == 2 && -d $ARGV[0] && -d $ARGV[1]) {
my ($d_from, $d_to) = @ARGV;
my %fn;
my @f;
foreach my $idx (0, 1) {
my $dir = $ARGV[$idx];
find (sub {
-f && -s or return;
my $fn = $File::Find::name =~ s{^$dir/*}{}r;
$fn{$fn}++;
$f[$idx]{$fn}++;
}, $dir);
}
foreach my $fn (sort keys %fn) {
if ($f[0]{$fn} && $f[1]{$fn}) {
ccdiff ("$d_from/$fn", "$d_to/$fn");
}
elsif ($f[0]{$fn}) {
say "Only in $d_from: $fn";
}
else {
say "Only in $d_to: $fn";
}
delete $f[0]{$fn};
}
}
else {
ccdiff (@ARGV);
}
exit 0;
}
sub ccdiff {
my $f1 = shift or usage (1);
my $f2 = $_[0] // "-";
-b $f1 || -c $f1 || -b $f2 || -c $f2 and
die "Character and block devices are not supported\n";
-d $f1 || -d $f2 and
die "$CMD does not support directory diff\n";
my $fh;
if (@_ > 1 && ref $_[1]) { # optional hash with overruling arguments
my %opt = %{$_[1]};
foreach my $o (keys %opt) {
my $v = $opt{$o};
$o eq "ascii" and $opt_a = $v;
$o eq "bg" and $rev_color = $v;
# $o eq "context" and $opt_c = $v;
$o eq "ellipsis" and $opt_e = $v;
$o eq "emacs" and $emacs = $v;
$o eq "header" and $opt_H = $v;
$o eq "heuristics" and $opt_h = $v;
$o eq "ignore-all-space" and $opt_w = $v;
$o eq "ignore-blank-lines" and $opt_B = $v;
$o eq "ignore-case" and $opt_i = $v;
$o eq "ignore-space-change" and $opt_b = $v;
$o eq "ignore-tab-expansion" and $opt_E = $v;
$o eq "ignore-trailing-space" and $opt_Z = $v;
$o eq "index" and $opt_I = $v;
$o eq "list-colors" and $list_colors = $v;
$o eq "markers" and $opt_m = $v;
$o eq "new" and $new_color = $v;
$o eq "old" and $old_color = $v;
$o eq "reverse" and $opt_r = $v;
$o eq "swap" and $opt_s = $v;
$o eq "threshold" and $opt_t = $v;
$o eq "unified" and $opt_u = $v;
$o eq "unified" and $opt_u = $v;
$o eq "utf-8" and $opt_U = $v;
$o eq "verbose" and $opt_v = $v;
if ($o eq "out") {
open $fh, ">", $v or die "Cannot select out: $!\n";
select $fh;
}
}
}
set_options ();
$emacs and @_ == 0 && -f $f1 && -f "$f1~" and ($f1, $f2) = ("$f1~", $f1);
$f1 eq "-" && $f2 eq "-" and usage (1);
binmode STDERR, ":encoding(utf-8)";
if ($opt_U) {
binmode STDIN, ":encoding(utf-8)";
binmode STDOUT, ":encoding(utf-8)";
}
my @d1 = ref $f1 eq "ARRAY" ? @$f1 : $f1 eq "-" ? <STDIN> : do {
open my $fh, "<", $f1 or die "$f1: $!\n";
$opt_U and binmode $fh, ":encoding(utf-8)";
<$fh>;
};
my @d2 = ref $f2 eq "ARRAY" ? @$f2 : $f2 eq "-" ? <STDIN> : do {
open my $fh, "<", $f2 or die "$f2: $!\n";
$opt_U and binmode $fh, ":encoding(utf-8)";
<$fh>;
};
if ($opt_H) {
my $hc = "";
if ($opt_H =~ m/^\w\w+/) {
my ($hfg, $hbg) = split m/_?(?=on_)/ => lc $opt_H =~ s/\s+/_/gr;
$hfg && defined $clr{$hfg} and $hc .= $clr{$hfg};
$hbg && defined $clr{$hbg} and $hc .= $clr{$hbg};
}
my $nl = max length $f1, length $f2, 7;
my $sl = $hc ? ($ENV{COLUMNS} || 80) - 4 - $nl : 1;
my @h = map { -f $_
? { tag => "",
name => $_,
stamp => scalar localtime ((stat $_)[9]),
}
: { tag => "",
name => "*STDIN",
stamp => scalar localtime,
}
} $f1, $f2;
if (defined $opt_u) {
($h[0]{tag}, $h[1]{tag}) = ("---", "+++");
$sl -= 2;
printf "%s%s %-*s %-*s%s\n", $hc, $_->{tag},
$nl, $_->{name}, $sl, $_->{stamp}, $clr{reset} for @h;
}
#elsif ($opt_c) { # diff -c also provides (ugly) headers, but opt_c is NYI
# }
else {
($h[0]{tag}, $h[1]{tag}) = ("<", ">");
printf "%s%s %-*s %-*s%s\n", $hc, $_->{tag},
$nl, $_->{name}, $sl, $_->{stamp}, $clr{reset} for @h;
}
}
my $diff = $diff_class->new (\@d1, \@d2, $cmp_sub);
$diff->Base (1);
my ($N, $idx, @s) = (0, 0);
while ($diff->Next) {
$N++;
if ($diff->Same) {
if (defined $opt_u) {
@s = $diff->Items (1);
$N > 1 and print "$chr_ctx$_" for grep { defined } @s[0..($opt_u - 1)];
unshift @s, undef while @s < $opt_u;
}
next;
}
my $sep = "";
my @d = map {[ $diff->Items ($_) ]} 1, 2;
my @do = @{$d[0]};
my @dn = @{$d[1]};
if ($opt_B and "@do" !~ m/\S/ && "@dn" !~ m/\S/) {
# Modify @s for -u?
next;
}
if ($opt_I) {
$idx++;
$opt_I > 0 && $idx != $opt_I and next;
printf "%s[%03d]%s ", ${clr_dbg}, $idx, $reset;
}
if (!@dn) {
printf "%d,%dd%d\n", $diff->Get (qw( Min1 Max1 Max2 ));
$_ = $clr_old . (s/$/$reset/r) for @do;
}
elsif (!@do) {
printf "%da%d,%d\n", $diff->Get (qw( Max1 Min2 Max2 ));
$_ = $clr_new . (s/$/$reset/r) for @dn;
}
else {
$sep = "---\n" unless defined $opt_u;
printf "%d,%dc%d,%d\n", $diff->Get (qw( Min1 Max1 Min2 Max2 ));
if ($opt_t > 0 and abs (@do - @dn) > $opt_t) {
$_ = $clr_old . (s/$/$reset/r) for @do;
$_ = $clr_new . (s/$/$reset/r) for @dn;
}
else {
my @D = subdiff (@d, my $heu = {});
if ($opt_h and $heu->{pct} > $opt_h) {
$_ = $clr_old . (s/$/$reset/r) for @do;
$_ = $clr_new . (s/$/$reset/r) for @dn;
}
else {
@do = @{$D[0]};
@dn = @{$D[1]};
}
}
}
if ($opt_u and @s) {
print "$chr_ctx$_" for grep { defined } map { $s[$#s - $opt_u + $_] } 1..$opt_u;
}
print "$chr_lft$_" for @do;
print $sep;
print "$chr_rgt$_" for @dn;
}
if ($fh) {
select STDOUT;
close $fh;
}
} # ccdiff
sub subdiff {
my ($old, $new, $heu) = @_;
my $d = $diff_class->new (map { [ map { split m// } @$_ ] } $old, $new);
my ($d1, $d2, $x1, $x2, @h1, @h2) = ("", "", "", "");
my ($cml, $cmr) = $opt_v < 2 ? ("", "") : ($chr_cml, $chr_cmr);
my ($cmd, $cma) = ($chr_old, $chr_new);
@{$heu}{qw( old new same )} = (1, 1, 1); # prevent div/0
while ($d->Next) {
my @c = map {[ $d->Items ($_) ]} 1, 2;
my @co = @{$c[0]};
my @cn = @{$c[1]};
if ($d->Same) {
$heu->{same} += scalar @co;
my $e = $chr_eli;
my $c = join "" => @co;
if ($opt_e) {
my $join = "";
foreach my $sc (split m/\n/ => $c) {
$_ .= $join for $d1, $d2, $x1, $x2;
$join = "\n";
my $l = length $sc; # The length of this "same" chunck
my $le = $l - 2 * $opt_e; # The length of the text replaces with ellipsis
my $ee = $opt_v <= 1 ? $e : $e =~ s/^.\K(?=.$)/$le/r;
if ($le > length $ee) {
my $lsc = substr $sc, 0, $opt_e;
$d1 .= $lsc;
$d2 .= $lsc;
$lsc =~ s/\S/$chr_eql/g;
$x1 .= $lsc;
$x2 .= $lsc;
my $rsc = substr $sc, $l - $opt_e, $opt_e;
$d1 .= $clr_dbg . $ee . $reset . $rsc;
$d2 .= $clr_dbg . $ee . $reset . $rsc;
$rsc =~ s/\S/$chr_eql/g;
$x1 .= $chr_eql x length ($ee) . $rsc;
$x2 .= $chr_eql x length ($ee) . $rsc;
next;
}
else {
$d1 .= $sc;
$d2 .= $sc;
$sc =~ s/\S/$chr_eql/g;
$x1 .= $sc;
$x2 .= $sc;
}
}
next;
}
$d1 .= $c;
$d2 .= $c;
$c =~ s/\S/$chr_eql/g;
$x1 .= $c;
$x2 .= $c;
next;
}
if (@co) {
$heu->{old} += scalar @co;
$d1 .= $cml.$clr_old;
$d1 .= s/\n/$reset\n$clr_old/gr for @co;
$d1 .= $reset.$cmr;
$x1 .= $_ for map { s/[^\t\r\n]/$cmd/gr } @co;
$opt_v and push @h1, map { $opt_U ? charnames::viacode (ord) : unpack "H*"; } @co;
}
if (@cn) {
$heu->{new} += scalar @cn;
$d2 .= $cml.$clr_new;
$d2 .= s/\n/$reset\n$clr_new/gr for @cn;
$d2 .= $reset.$cmr;
$x2 .= $_ for map { s/[^\t\r\n]/$cma/gr } @cn;
$opt_v and push @h2, map { $opt_U ? charnames::viacode (ord) : unpack "H*"; } @cn;
}
}
$heu->{pct} = ($heu->{old} + $heu->{new}) / (2 * $heu->{same});
my @d = map { [ split m/(?<=\n)/ => s/\n*\z/\n/r ] } $d1, $d2;
if ($opt_m) {
$opt_v > 1 and s/(\S+)/ $1 /g for $x1, $x2;
s/[ \t]*\n*\z/\n/ for $x1, $x2;
my @x = map { /\S/ ? [ split m/(?<=\n)/ ] : [] } $x1, $x2;
foreach my $n (0, 1) {
@{$x[$n]} and $d[$n] = [ map {( $d[$n][$_], $x[$n][$_] // "" )} 0 .. (scalar @{$d[$n]} - 1) ];
}
}
if ($opt_v) {
$opt_U && $opt_v > 2 and $_ .= sprintf " (U+%06X)", charnames::vianame ($_) for @h1, @h2;
@h1 and push @{$d[0]}, sprintf " -- ${clr_dbg}verbose$reset : %s\n", join ", " => map { $clr_old.$_.$reset } @h1;
@h2 and push @{$d[1]}, sprintf " -- ${clr_dbg}verbose$reset : %s\n", join ", " => map { $clr_new.$_.$reset } @h2;
}
@d;
} # subdiff
sub read_rc {
my $home = $ENV{HOME} || $ENV{USERPROFILE} || $ENV{HOMEPATH};
foreach my $rcf (
"$home/ccdiff.rc",
"$home/.ccdiffrc",
"$home/.config/ccdiff",
) {
-s $rcf or next;
(stat $rcf)[2] & 022 and next;
open my $fh, "<", $rcf or next;
while (<$fh>) {
my ($k, $v) = (m/^\s*([-\w]+)\s*[:=]\s*(.*\S)/) or next;
$rc{ lc $k
=~ s{[-_]colou?r$}{}ir
=~ s{background}{bg}ir
=~ s{^(?:unicode|utf-?8?)$}{utf8}ir
} = $v
=~ s{U\+?([0-9A-Fa-f]{2,7})}{chr hex $1}ger
=~ s{^(?:no|false)$}{0}ir
=~ s{^(?:yes|true)$}{-1}ir; # -1 is still true
}
}
} # read_rc
# Return the known colors from Term::ANSIColor
# Stolen straight from the pm
sub tac_colors {
my %c256;
foreach my $r (0 .. 5) {
foreach my $g (0 .. 5) {
$c256{lc $_}++ for map {("RGB$r$g$_", "ON_RGB$r$g$_")} 0 .. 5;
}
}
$c256{lc $_}++ for
# Basic colors
qw(
CLEAR RESET BOLD DARK
FAINT ITALIC UNDERLINE UNDERSCORE
BLINK REVERSE CONCEALED
BLACK RED GREEN YELLOW
BLUE MAGENTA CYAN WHITE
ON_BLACK ON_RED ON_GREEN ON_YELLOW
ON_BLUE ON_MAGENTA ON_CYAN ON_WHITE
BRIGHT_BLACK BRIGHT_RED BRIGHT_GREEN BRIGHT_YELLOW
BRIGHT_BLUE BRIGHT_MAGENTA BRIGHT_CYAN BRIGHT_WHITE
ON_BRIGHT_BLACK ON_BRIGHT_RED ON_BRIGHT_GREEN ON_BRIGHT_YELLOW
ON_BRIGHT_BLUE ON_BRIGHT_MAGENTA ON_BRIGHT_CYAN ON_BRIGHT_WHITE
),
# 256 colors
(map { ("ANSI$_", "ON_ANSI$_") } 0 .. 255),
(map { ("GREY$_", "ON_GREY$_") } 0 .. 23);
my $ACV = $Term::ANSIColor::VERSION;
$ACV < 3.02 and delete @c256{grep m/italic/ => keys %c256};
$ACV < 4.00 and delete @c256{grep m/rgb|grey/ => keys %c256};
$ACV < 4.06 and delete @c256{grep m/ansi/ => keys %c256};
sort keys %c256;
} # tac_colors
1;
__END__
=encoding utf-8
=head1 NAME
ccdiff - Colored Character diff
=head1 SYNOPSIS
ccdiff [options] file1|- file2|-
ccdiff [options] dir1 dir2
ccdiff --help
ccdiff --man
ccdiff --info
=head1 DESCRIPTION
Show the diff between two files on a character by character base. In contrast to
the standard diff tools, this tool uses the diff algorithm horizontally for each
line in the vertical diff, highlighting the changes. This is very handy in hard
to spot changes like C<O> to C<0>, C<I> to C<l> or C<1> and whitespace.
If there are two argument, and both are a folder/directory, a recursive diff is
executed. This is not available whan used as a (sub)class.
=head1 OPTIONS
=head2 Command line options
=over 2
=item --help -?
Show a summary of the available command-line options and exit.
=item --version -V
Show the version and exit.
=item --man
Show this manual using pod2man and nroff.
=item --info
Show this manual using pod2text.
=item --utf-8 -U
All I/O (streams to compare and standard out) are in UTF-8.
=item --diff-class=C --dc=C --pp
Select the class used to execute the diff. By default C<ccdiff> will select
the first available out of C<Algorithm::Diff::XS> or C<Algorithm::Diff>.
Sometime the C<XS> version fails on encoding and the pure-perl version will
work just fine. You can force C<ccdiff> to use either
Select the pure-perl version with any of C<PP>, C<AD>, C<Algorthm::Diff>,
C<Algorithm-Diff>, or C<Algorithm::Diff::PP> (case insensitive).
For convenience, C<--dc=pp> can be abbreviated to C<--pp>.
--pp
--dc=pp
--dc=algorithm-diff
--diff-class=Algorithm::Diff::PP
Select the XS version with any of C<XS>, C<ADX>, C<Algorthm::Diff::XS>, or
C<Algorithm-Diff-XS> (case insensitive).
--dc=xs
--dc=algorithm-diff-xs
--diff-class=Algorithm::Diff::XS
=item --unified[=3] -u [3]
Generate a unified diff. The number of context lines is optional. When omitted
it defaults to 3. Currently there is no provision of dealing with overlapping
diff chunks. If the common part between two diff chunks is shorter than twice
the number of context lines, some lines may show twice.
The default is to use traditional diff:
5,5c5,5
< Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
---
> Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
a unified diff (-u1) would be
5,5c5,5
Tue Sep 6 05:43:59 2005,B.O.Q.S.,,1125978239,1943341
-Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
+Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
Mon Feb 23 10:37:02 2004,R.X.K.S.,van,1077529022,1654127
=item --verbose[=1] -v[1]
Show an additional line for each old or new section in a change chunk (not for
added or deleted lines) that shows the hexadecimal value of each character. If
C<--utf-8> is in effect, it will show the Unicode character name(s).
This is a debugging option, so invisible characters can still be "seen".
C<--verbose> accepts an optional verbosity-level. On level 2 and up, all
horizontal changes get left-and-right markers inserted to enable seeing the
location of the ZERO WIDTH or invisible characters. With level 3 and up and
Unicode enabled, the changed characters will also show the codepoint in hex.
An example of this:
With -Uu0v0:
1,1c1,1
- A BCDE Fg
+ A BcdE​Fg
With -Uu0v1:
1,1c1,1
- A BCDE Fg
- -- verbose : SPACE, LATIN CAPITAL LETTER C, LATIN CAPITAL LETTER D, SPACE
+ A BcdE​Fg
+ -- verbose : LATIN SMALL LETTER C, LATIN SMALL LETTER D, ZERO WIDTH SPACE
With -Uu0v2:
1,1c1,1
- A ↱ ↰B↱CD↰E↱ ↰Fg
- -- verbose : SPACE, LATIN CAPITAL LETTER C, LATIN CAPITAL LETTER D, SPACE
+ A B↱cd↰E↱​↰Fg
+ -- verbose : LATIN SMALL LETTER C, LATIN SMALL LETTER D, ZERO WIDTH SPACE
With -Uu0v3:
1,1c1,1
- A ↱ ↰B↱CD↰E↱ ↰Fg
- -- verbose : SPACE (U+000020), LATIN CAPITAL LETTER C (U+000043), LATIN CAPITAL LETTER D (U+000044), SPACE (U+000020)
+ A B↱cd↰E↱​↰Fg
+ -- verbose : LATIN SMALL LETTER C (U+000063), LATIN SMALL LETTER D (U+000064), ZERO WIDTH SPACE (U+00200B)
With -Uu0v2 --ascii:
1,1c1,1
- A > <B>CD<E> <Fg
- -- verbose : SPACE, LATIN CAPITAL LETTER C, LATIN CAPITAL LETTER D, SPACE
+ A B>cd<E>​<Fg
+ -- verbose : LATIN SMALL LETTER C, LATIN SMALL LETTER D, ZERO WIDTH SPACE
the word "verbose" and the character markers will be displayed using the
C<verbose> color. The characters used for the markers can be defined in your
configuration file as C<chr_cml> (the character used as marker on the left)
and C<chr_cmr> (the character used as marker on the right).
=item --markers -m
Use markers under each changed character in change-chunks.
C<--markers> is especially useful if the terminal does not support colors, or
if you want to copy/paste the output to (ASCII) mail. See also C<--ascii>. The
markers will have the same color as added or deleted text.
This will look like (with unified diff):
5,5c5,5
-Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
- ▼ ▼
+Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
+ ▲ ▲
The characters used for the markers can be defined in your configuration file
as C<chr_old> (the character used as marker under removed characters) and
C<chr_new> (the character used as marker under added characters).
If C<--ellipsis> is also in effect and either the C<chr_eli> is longer than
one character or C<--verbose> level is over 2, this option is automatically
disabled.
=item --ascii -a
Use (colored) ASCII indicators instead of Unicode. The default indicators are
Unicode characters that stand out better. The markers will have the same color
as added or deleted text.
For the vertical markers (C<-m>) that would look like:
5,5c5,5
-Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
- ^ ^
+Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
+ ^ ^
For the positional indicators, I did consider using U+034e (COMBINING UPWARDS
ARROW BELOW), but as most terminals are probably unable to show it due to line
height changes, I did not pursue the idea.
=item --pink -p
Change the default C<red> for deleted text to the color closest to pink that
is supported by L<Term::ANSIColor>: C<magenta>.
=item --reverse -r
Reverse/invert the foreground and background for the colored indicators.
If the foreground color has C<bold>, it will be stripped from the new background
color.
=item --swap -s
Swap the colors for new and old.
=item --list-colors
List available colors and exit.
=item --no-colors
Disable all colors. Useful for redirecting the diff output to a file that is to
be included in documentation.
This is the default if the environment variable C<$NO_COLOR> has a true value or
if the environment variable C<$CLICOLOR> is set to a false value. If set,
C<$CLICOLOR_FORCE> will overrule the default of C<$NO_COLOR>.
=item --old=color
Define the foreground color for deleted text.
=item --new=color
Define the foreground color for added text.
=item --bg=color
Define the background color for changed text.
=item --index --idx -I
Prefix position indicators with an index.
[001] 5,5c5,5
-Sat Dec 18 07:08:33 1998,I.O.D.U.,,756194433,1442539
+Sat Dec 18 07:00:33 1993,I.O.D.U.,,756194433,1442539
If a positive number is passed (C<--index=4> or C<-I 4>), display just the
chunk with that index, using the C<verbose> color:
This is useful in combination with C<--verbose>.
=item --threshold=2 -t 2
Defines the number of lines a change block may differ before the fall-back of
horizontal diff to vertical diff.
If a chunk describes a change, and the number of lines in the original block
has fewer or more lines than the new block and that difference exceeds this
threshold, C<ccdiff> will fall-back to vertical diff.
=item --heuristics=n -h n
Defines the percentage of character-changes a change block may differ before
the fall-back of horizontal diff to vertical diff.
This percentage is calculated as C<(characters removed + characters added) /
(2 * characters unchanged))>.
=item --ellipsis=n -e n
Defines the number of characters to keep on each side of a horizontal-equal
segment. The default is C<0>, meaning do not compress.
If set to a positive number, and the length of a segment of equal characters
inside a horizontal diff is longer than twice this value, the middle part is
replaced with C<┈ U02508 \N{BOX DRAWINGS LIGHT QUADRUPLE DASH HORIZONTAL}>
(instead of … U02026, as HORIZONTAL ELLIPSIS does not stand out enough).
With C<-u0me3> that would be like
5,5c5,5
-Sat┈07:08:33┈ 1998,I.┈539
- ▼ ▼
+Sat┈07:00:33┈ 1993,I.┈539
+ ▲ ▲
With C<-u0e3 -v2> like
5,5c5,5
-Sat↤9↦07:0↱0↰:33 199↱3↰,I.↤23↦539
- -- verbose : DIGIT ZERO, DIGIT THREE
+Sat↤9↦07:0↱8↰:33 199↱8↰,I.↤23↦539
+ -- verbose : DIGIT EIGHT, DIGIT EIGHT
The text used for the replaced text can be defined in your configuration file
as C<chr_eli> and/or C<chr_eli_v>.
=item --ignore-case -i
Ignore case on comparison.
=item --ignore-all-space -w
Ignore all white-space changes. This will set all options C<-b>, C<-Z>, C<-E>,
and C<-B>.
=item --ignore-trailing-space -Z
Ignore changes in trailing white-space (tabs and spaces).
=item --ignore-ws|ignore-space-change -b
Ignore changes in horizontal white-space (tabs and spaces). This does not
include white-space changes that split non-white-space or remove white-space
between two non-white-space elements.
=item --ignore-tab-expansion -E
NYI
=item --ignore-blank-lines -B
B<Just Partly Implemented> (WIP)
=back
=head2 Configuration files
In order to be able to overrule the defaults set in C<ccdiff>, one can set
options specific for this login. The following option files are looked for
in this order:
- $HOME/ccdiff.rc
- $HOME/.ccdiffrc
- $HOME/.config/ccdiff
and evaluated in that order. Any options specified in a file later in that
chain will overwrite previously set options.
Option files are only read and evaluated if they are not empty and not writable
by others than the owner.
The syntax of the file is one option per line, where leading and trailing
white-space is ignored. If that line then starts with one of the options
listed below, followed by optional white-space followed by either an C<=> or
a C<:>, followed by optional white-space and the values, the value is assigned
to the option. The values C<no> and C<false> (case insensitive) are aliases
for C<0>. The values C<yes> and C<true> are aliases to C<-1> (C<-1> being a
true value).
Between parens is the corresponding command-line option.
=over 2
=item unified (-u)
If you prefer unified-diff over old-style diff by default, set this to the
desired number of context lines:
unified : 3
The default is undefined
=item markers (-m)
markers : false
Defines if markers should be used under changed characters. The default is to
use colors only. The C<-m> command line option will toggle the option when set
from a configuration file.
=item ascii (-a)
ascii : false
Defines to use ASCII markers instead of Unicode markers. The default is to use
Unicode markers.
=item reverse (-r)
reverse : false
Defines if changes are displayed as foreground-color over background-color
or background-color over foreground-color. The default is C<false>, so it will
color the changes with the appropriate color (C<new> or C<old>) over the
default background color.
=item swap (-s)
swap : false
Swap the colors for new and old.
=item new (--new)
new : green
Defines the color to be used for added text. The default is C<green>.
The color C<none> is also accepted and disables this color.
Any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning. This option can include C<bold> either as prefix or
as suffix.
This option may also be specified as
new-color
new_color
new-colour
new_colour
=item old (--old)
old : red
Defines the color to be used for deleted text. The default is C<red>.
The color C<none> is also accepted and disables this color.
Any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning. This option can include C<bold> either as prefix or
as suffix.
This option may also be specified as
old-color
old_color
old-colour
old_colour
=item bg (--bg)
bg : white
Defines the color to be used as background for changed text. The default is
C<white>.
The color C<none> is also accepted and disables this color.
Any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning. The C<bold> attribute is not allowed.
This option may also be specified as
bg-color
bg_color
bg-colour
bg_colour
background
background-color
background_color
background-colour
background_colour
=item header (-H --header --HC=color --header-color=color)
header : 1
header : blue_on_white
Defines if a header is displayed above the diff (default is 1), supported
colors are allowed.
If the value is a valid supported color, it will show the header in that
color scheme. To disable the header set it to C<0> in the RC file or use
C<--no-header> as a command line argument.
=item verbose
verbose : cyan
Defines the color to be used as color for the verbose tag. The default is
C<cyan>. This color will only be used under C<--verbose>.
The color C<none> is also accepted and disables this color.
Any color accepted by L<Term::ANSIColor> is allowed. Any other color will
result in a warning.
This option may also be specified as
verbose-color
verbose_color
verbose-colour
verbose_colour
=item utf8 (-U)
utf8 : yes
Defines whether all I/O is to be interpreted as UTF-8. The default is C<no>.
This option may also be specified as
unicode
utf
utf-8
=item index (-I)
index : no
Defines if the position indication for a change chunk is prefixed with an
index number. The default is C<no>. The index is 1-based.
Without this option, the position indication would be like
5,5c5,5
19,19d18
42a42,42
with this option, it would be
[001] 5,5c5,5
[002] 19,19d18
[005] 42a42,42
When this option contains a positive integer, C<ccdiff> will only show the
diff chunk with that index.
=item emacs
emacs : no
If this option is yes/true, calling C<ccdiff> with just one single argument,
and that argument being an existing file, the arguments will act as
$ ccdiff file~ file
if file~ exists.
=item threshold (-t)
threshold : 2
Defines the number of lines a change block may differ before the fall-back of
horizontal diff to vertical diff.
=item heuristics (-h)
heuristics : 40
Defines the percentage of character-changes a change block may differ before
the fall-back of horizontal diff to vertical diff. The default is undefined,
meaning no fallback based on heuristics.
=item ellipsis (-e)
ellipsis : 0
Defines the number of characters to keep on each side of a horizontal-equal
segment. The default is C<0>, meaning to not compress. See also C<chr_eli>.
=item chr_old
chr_old : U+25BC
Defines the character used to indicate the position of removed text on the
line below the text when option C<-m> is in effect.
=item chr_new
chr_new : U+25B2
Defines the character used to indicate the position of added text on the
line below the text when option C<-m> is in effect.
=item chr_cml
chr_cml : U+21B1
Defines the character used to indicate the starting position of changed text
in a line when verbose level is 3 and up.
=item chr_cmr
chr_cmr : U+21B0
Defines the character used to indicate the ending position of changed text
in a line when verbose level is 3 and up.
=item chr_eli
chr_eli : U+2508
Defines the character used to indicate omitted text in large unchanged text
when C<--ellipsis>/C<-e> is in effect.
This character is not equally well visible on all terminals or in all fonts,
so you might want to change it to something that stands out better in your
environment. Possible suggestions:
… U+2026 HORIZONTAL ELLIPSIS
‴ U+2034 TRIPLE PRIME
‷ U+2037 REVERSED TRIPLE PRIME
↔ U+2194 LEFT RIGHT ARROW
↭ U+21ad LEFT RIGHT WAVE ARROW
↮ U+21ae LEFT RIGHT ARROW WITH STROKE
↹ U+21b9 LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR
⇄ U+21c4 RIGHTWARDS ARROW OVER LEFTWARDS ARROW
⇆ U+21c6 LEFTWARDS ARROW OVER RIGHTWARDS ARROW
⇎ U+21ce LEFT RIGHT DOUBLE ARROW WITH STROKE
⇔ U+21d4 LEFT RIGHT DOUBLE ARROW
⇹ U+21f9 LEFT RIGHT ARROW WITH VERTICAL STROKE
⇼ U+21fc LEFT RIGHT ARROW WITH DOUBLE VERTICAL STROKE
⇿ U+21ff LEFT RIGHT OPEN-HEADED ARROW
≋ U+224b TRIPLE TILDE
┄ U+2504 BOX DRAWINGS LIGHT TRIPLE DASH HORIZONTAL
┅ U+2505 BOX DRAWINGS HEAVY TRIPLE DASH HORIZONTAL
┈ U+2508 BOX DRAWINGS LIGHT QUADRUPLE DASH HORIZONTAL
┉ U+2509 BOX DRAWINGS HEAVY QUADRUPLE DASH HORIZONTAL
⧻ U+29fb TRIPLE PLUS
⬌ U+2b0c LEFT RIGHT BLACK ARROW
=item chr_eli_v
chr_eli_v : U+21A4U+21A6
When using C<--ellipsis> with C<--verbose> level 2 or up, the single character
indicator will be replaced with this character. If it is 2 characters wide, the
length of the compressed part is put between the characters.
A suggested alternative might be U+21E4U+21E5
=item iwbZusePP
As L<Algorithm::Diff::XS> is fast but is not 100% drop-in compliant with
L<Algorithm::Diff> and options C<-i>, C<-w>, C<-b>, and C<-Z> are likely to
not work in the C<XS> version, this option allows automatic switching to
the slower version if any of these options is selected.
=back
=head1 Git integration
You can use ccdiff to show diffs in git. It may work like this:
$ git config --global diff.tool ccdiff
$ git config --global difftool.prompt false
$ git config --global difftool.ccdiff.cmd 'ccdiff --utf-8 -u -r $LOCAL $REMOTE'
$ git difftool SHA~..SHA
-O ~/bin/git-ccdiff
$ perl -pi -e 's{/pro/bin/perl}{/usr/bin/env perl}' ~/bin/git-ccdiff
$ chmod 755 ~/bin/git-ccdiff
$ git ccdiff SHA
Of course you can use C<curl> instead of C<wget> and you can choose your own
(fixed) path to C<perl> instead of using C</usr/bin/env>.
From then on you can do
$ git ccdiff
$ git ccdiff 5c5a39f2
=head1 CAVEATS
Due to the implementation, where both sides of the comparison are completely
kept in memory, this tool might not be able to deal with (very) large datasets.
=head2 Speed
There are situations where L<Algorithm::Diff> takes considerably more time
compared to e.g. GNU diff. Installing L<Algorithm::Diff::XS> will make
C<ccdiff> a lot faster. C<ccdiff> will choose L<Algorithm::Diff::XS> if
available.
Note however that options like C<-i>, C<-w>, C<-b>, and C<-Z> are likely to
be a no-op in L<Algorithm::Diff::XS>, as that has not been implemented and
it is rather unlikely it will be. Choose C<--dc=pp> then. If you want this
switch always, set C<iwbZusePP = 1> in one of your L</Configuration files>.
=head1 SEE ALSO
L<Algorithm::Diff::XS>, L<Algorithm::Diff>, L<Text::Diff>
=head1 AUTHOR
H.Merijn Brand
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2018-2025 H.Merijn Brand. All rights reserved.
This library is free software; you can redistribute and/or modify it under
the same terms as The Artistic License 2.0.
=for elvis
:ex:se gw=75|color guide #ff0000:
=cut