#!/usr/bin/perl
Algorithm::Diff->
import
(
'diff'
);
my
$File_Length_Difference
= 0;
my
@Ed_Hunks
= ();
my
$usage
= <<
"ENDUSAGE"
;
Usage: $0 [{-c | -C lines -e | -f | -u | -U lines}] oldfile newfile
-c
do
a context diff
with
3 lines of context
-C
do
a context diff
with
'lines'
lines of context (implies -c)
-e create a script
for
the ed editor to change oldfile to newfile
-f like -e but in
reverse
order
-u
do
a unified diff
with
3 lines of context
-U
do
a unified diff
with
'lines'
lines of context (implies -u)
-
q report
only whether or not the files differ
By
default
it will
do
an
"old-style"
diff,
with
output like UNIX diff
ENDUSAGE
my
$Context_Lines
= 0;
my
$Diff_Type
=
"OLD"
;
my
(
$opt_c
,
$opt_u
,
$opt_e
,
$opt_f
,
$opt_q
);
while
(
$ARGV
[0] =~ /^-/) {
my
$opt
=
shift
;
last
if
$opt
eq
'--'
;
if
(
$opt
=~ /^-C(.*)/) {
$Context_Lines
= $1 ||
shift
;
$opt_c
= 1;
$Diff_Type
=
"CONTEXT"
;
}
elsif
(
$opt
=~ /^-c$/) {
$Context_Lines
= 3;
$opt_c
= 1;
$Diff_Type
=
"CONTEXT"
;
}
elsif
(
$opt
=~ /^-e$/) {
$opt_e
= 1;
$Diff_Type
=
"ED"
;
}
elsif
(
$opt
=~ /^-f$/) {
$opt_f
= 1;
$Diff_Type
=
"REVERSE_ED"
;
}
elsif
(
$opt
=~ /^-U(.*)$/) {
$Context_Lines
= $1 ||
shift
;
$opt_u
= 1;
$Diff_Type
=
"UNIFIED"
;
}
elsif
(
$opt
=~ /^-u$/) {
$Context_Lines
= 3;
$opt_u
= 1;
$Diff_Type
=
"UNIFIED"
;
}
elsif
(
$opt
=~ /^-
q$/) {
$
Context_Lines = 0;
$opt_q
= 1;
$opt_e
= 1;
$Diff_Type
=
"ED"
;
}
else
{
$opt
=~ s/^-//;
bag(
"Illegal option -- $opt"
);
}
}
if
(
$opt_q
and
grep
(
$_
,(
$opt_c
,
$opt_f
,
$opt_u
)) > 1) {
bag(
"Combining -q with other options is nonsensical"
);
}
if
(
grep
(
$_
,(
$opt_c
,
$opt_e
,
$opt_f
,
$opt_u
)) > 1) {
bag(
"Only one of -c, -u, -f, -e are allowed"
);
}
bag(
$usage
)
unless
@ARGV
== 2;
my
(
$file1
,
$file2
) =
@ARGV
;
my
(
$char1
,
$char2
);
if
(
$Diff_Type
eq
"CONTEXT"
) {
$char1
=
'*'
x 3;
$char2
=
'-'
x 3;
}
elsif
(
$Diff_Type
eq
"UNIFIED"
) {
$char1
=
'-'
x 3;
$char2
=
'+'
x 3;
}
open
(F1,
'<'
,
$file1
) or bag(
"Couldn't open $file1: $!"
);
open
(F2,
'<'
,
$file2
) or bag(
"Couldn't open $file2: $!"
);
my
(
@f1
,
@f2
);
chomp
(
@f1
= <F1>);
close
F1;
chomp
(
@f2
= <F2>);
close
F2;
my
$diffs
= diff(\
@f1
, \
@f2
);
exit
0
unless
@$diffs
;
if
(
$opt_q
and
@$diffs
) {
print
"Files $file1 and $file2 differ\n"
;
exit
1;
}
if
(
$Diff_Type
=~ /UNIFIED|CONTEXT/) {
my
@st
=
stat
(
$file1
);
my
$MTIME
= 9;
print
"$char1 $file1\t"
,
scalar
localtime
(
$st
[
$MTIME
]),
"\n"
;
@st
=
stat
(
$file2
);
print
"$char2 $file2\t"
,
scalar
localtime
(
$st
[
$MTIME
]),
"\n"
;
}
my
(
$hunk
,
$oldhunk
);
foreach
my
$piece
(
@$diffs
) {
$hunk
= Hunk->new(
$piece
,
$Context_Lines
);
next
unless
$oldhunk
;
if
(
$Context_Lines
&&
$hunk
->does_overlap(
$oldhunk
)) {
$hunk
->prepend_hunk(
$oldhunk
);
}
else
{
$oldhunk
->output_diff(\
@f1
, \
@f2
,
$Diff_Type
);
}
}
continue
{
$oldhunk
=
$hunk
;
}
$oldhunk
->output_diff(\
@f1
, \
@f2
,
$Diff_Type
);
map
{
$_
->output_ed_diff(\
@f1
, \
@f2
,
$Diff_Type
)}
@Ed_Hunks
if
@Ed_Hunks
;
exit
1;
sub
bag {
my
$msg
=
shift
;
$msg
.=
"\n"
;
warn
$msg
;
exit
2;
}
{
package
Hunk;
sub
new {
my
(
$class
,
$piece
,
$context_items
) =
@_
;
my
$block
= Block->new(
$piece
);
my
$before_diff
=
$File_Length_Difference
;
my
$after_diff
=
$before_diff
+
$block
->{
"length_diff"
};
$File_Length_Difference
+=
$block
->{
"length_diff"
};
my
@remove_array
=
$block
->remove;
my
@insert_array
=
$block
->insert;
my
(
$a1
,
$a2
,
$b1
,
$b2
,
$start1
,
$start2
,
$end1
,
$end2
);
$a1
=
@remove_array
?
$remove_array
[0 ]->{
"item_no"
} : -1;
$a2
=
@remove_array
?
$remove_array
[-1]->{
"item_no"
} : -1;
$b1
=
@insert_array
?
$insert_array
[0 ]->{
"item_no"
} : -1;
$b2
=
@insert_array
?
$insert_array
[-1]->{
"item_no"
} : -1;
$start1
=
$a1
== -1 ?
$b1
-
$before_diff
:
$a1
;
$end1
=
$a2
== -1 ?
$b2
-
$after_diff
:
$a2
;
$start2
=
$b1
== -1 ?
$a1
+
$before_diff
:
$b1
;
$end2
=
$b2
== -1 ?
$a2
+
$after_diff
:
$b2
;
my
$hunk
= {
"start1"
=>
$start1
,
"start2"
=>
$start2
,
"end1"
=>
$end1
,
"end2"
=>
$end2
,
"blocks"
=> [
$block
],
};
bless
$hunk
,
$class
;
$hunk
->flag_context(
$context_items
);
return
$hunk
;
}
sub
flag_context {
my
(
$hunk
,
$context_items
) =
@_
;
return
unless
$context_items
;
my
$start1
=
$hunk
->{
"start1"
};
my
$num_added
=
$context_items
>
$start1
?
$start1
:
$context_items
;
$hunk
->{
"start1"
} -=
$num_added
;
$hunk
->{
"start2"
} -=
$num_added
;
my
$end1
=
$hunk
->{
"end1"
};
$num_added
= (
$end1
+
$context_items
>
$#f1
) ?
$#f1
-
$end1
:
$context_items
;
$hunk
->{
"end1"
} +=
$num_added
;
$hunk
->{
"end2"
} +=
$num_added
;
}
sub
does_overlap {
my
(
$hunk
,
$oldhunk
) =
@_
;
return
""
unless
$oldhunk
;
return
(
$hunk
->{
"start1"
} -
$oldhunk
->{
"end1"
} <= 1 ||
$hunk
->{
"start2"
} -
$oldhunk
->{
"end2"
} <= 1);
}
sub
prepend_hunk {
my
(
$hunk
,
$oldhunk
) =
@_
;
$hunk
->{
"start1"
} =
$oldhunk
->{
"start1"
};
$hunk
->{
"start2"
} =
$oldhunk
->{
"start2"
};
unshift
(@{
$hunk
->{
"blocks"
}}, @{
$oldhunk
->{
"blocks"
}});
}
sub
output_diff {
my
$diff_type
=
$_
[-1];
my
%funchash
= (
"OLD"
=> \
&output_old_diff
,
"CONTEXT"
=> \
&output_context_diff
,
"ED"
=> \
&store_ed_diff
,
"REVERSE_ED"
=> \
&output_ed_diff
,
"UNIFIED"
=> \
&output_unified_diff
,
);
if
(
exists
$funchash
{
$diff_type
}) {
&{
$funchash
{
$diff_type
}}(
@_
);
}
else
{
die
"unknown diff type $diff_type"
}
}
sub
output_old_diff {
my
(
$hunk
,
$fileref1
,
$fileref2
) =
@_
;
my
%op_hash
= (
'+'
=>
'a'
,
'-'
=>
'd'
,
'!'
=>
'c'
);
my
@blocklist
= @{
$hunk
->{
"blocks"
}};
warn
(
"Expecting one block in an old diff hunk!"
)
if
scalar
@blocklist
!= 1;
my
$block
=
$blocklist
[0];
my
$op
=
$block
->op;
my
$range1
=
$hunk
->context_range(1);
my
$range2
=
$hunk
->context_range(2);
my
$action
=
$op_hash
{
$op
} ||
warn
"unknown op $op"
;
print
"$range1$action$range2\n"
;
if
(
$block
->remove) {
my
@outlist
=
@$fileref1
[
$hunk
->{
"start1"
}..
$hunk
->{
"end1"
}];
map
{
$_
=
"< $_\n"
}
@outlist
;
print
@outlist
;
}
print
"---\n"
if
$op
eq
'!'
;
if
(
$block
->insert) {
my
@outlist
=
@$fileref2
[
$hunk
->{
"start2"
}..
$hunk
->{
"end2"
}];
map
{
$_
=
"> $_\n"
}
@outlist
;
print
@outlist
;
}
}
sub
output_unified_diff {
my
(
$hunk
,
$fileref1
,
$fileref2
) =
@_
;
my
@blocklist
;
my
$range1
=
$hunk
->unified_range(1);
my
$range2
=
$hunk
->unified_range(2);
print
"@@ -$range1 +$range2 @@\n"
;
my
$low
=
$hunk
->{
"start1"
};
my
$hi
=
$hunk
->{
"end1"
};
my
(
$num_added
,
$num_removed
) = (0,0);
my
@outlist
=
@$fileref1
[
$low
..
$hi
];
map
{s/^/ /}
@outlist
;
foreach
my
$block
(@{
$hunk
->{
"blocks"
}}) {
foreach
my
$item
(
$block
->remove) {
my
$op
=
$item
->{
"sign"
};
my
$offset
=
$item
->{
"item_no"
} -
$low
+
$num_added
;
$outlist
[
$offset
] =~ s/^ /
$op
/;
$num_removed
++;
}
foreach
my
$item
(
$block
->insert) {
my
$op
=
$item
->{
"sign"
};
my
$i
=
$item
->{
"item_no"
};
my
$offset
=
$i
-
$hunk
->{
"start2"
} +
$num_removed
;
splice
(
@outlist
,
$offset
,0,
"$op$$fileref2[$i]"
);
$num_added
++;
}
}
map
{s/$/\n/}
@outlist
;
print
@outlist
;
}
sub
output_context_diff {
my
(
$hunk
,
$fileref1
,
$fileref2
) =
@_
;
my
@blocklist
;
print
"***************\n"
;
my
$range1
=
$hunk
->context_range(1);
my
$range2
=
$hunk
->context_range(2);
print
"*** $range1 ****\n"
;
my
$low
=
$hunk
->{
"start1"
};
my
$hi
=
$hunk
->{
"end1"
};
if
(
@blocklist
=
grep
{
$_
->remove} @{
$hunk
->{
"blocks"
}}) {
my
@outlist
=
@$fileref1
[
$low
..
$hi
];
map
{s/^/ /}
@outlist
;
foreach
my
$block
(
@blocklist
) {
my
$op
=
$block
->op;
foreach
my
$item
(
$block
->remove) {
$outlist
[
$item
->{
"item_no"
} -
$low
] =~ s/^ /
$op
/;
}
}
map
{s/$/\n/}
@outlist
;
print
@outlist
;
}
print
"--- $range2 ----\n"
;
$low
=
$hunk
->{
"start2"
};
$hi
=
$hunk
->{
"end2"
};
if
(
@blocklist
=
grep
{
$_
->insert} @{
$hunk
->{
"blocks"
}}) {
my
@outlist
=
@$fileref2
[
$low
..
$hi
];
map
{s/^/ /}
@outlist
;
foreach
my
$block
(
@blocklist
) {
my
$op
=
$block
->op;
foreach
my
$item
(
$block
->insert) {
$outlist
[
$item
->{
"item_no"
} -
$low
] =~ s/^ /
$op
/;
}
}
map
{s/$/\n/}
@outlist
;
print
@outlist
;
}
}
sub
store_ed_diff {
my
$hunk
=
shift
;
unshift
@Ed_Hunks
,
$hunk
;
}
sub
output_ed_diff {
my
$diff_type
=
$_
[-1];
my
(
$hunk
,
$fileref1
,
$fileref2
) =
@_
;
my
%op_hash
= (
'+'
=>
'a'
,
'-'
=>
'd'
,
'!'
=>
'c'
);
my
@blocklist
= @{
$hunk
->{
"blocks"
}};
warn
(
"Expecting one block in an ed diff hunk!"
)
if
scalar
@blocklist
!= 1;
my
$block
=
$blocklist
[0];
my
$op
=
$block
->op;
my
$range1
=
$hunk
->context_range(1);
$range1
=~ s/,/ /
if
$diff_type
eq
"REVERSE_ED"
;
my
$action
=
$op_hash
{
$op
} ||
warn
"unknown op $op"
;
print
(
$diff_type
eq
"ED"
?
"$range1$action\n"
:
"$action$range1\n"
);
if
(
$block
->insert) {
my
@outlist
=
@$fileref2
[
$hunk
->{
"start2"
}..
$hunk
->{
"end2"
}];
map
{s/$/\n/}
@outlist
;
print
@outlist
;
print
".\n"
;
}
}
sub
context_range {
my
(
$hunk
,
$flag
) =
@_
;
my
(
$start
,
$end
) = (
$hunk
->{
"start$flag"
},
$hunk
->{
"end$flag"
});
$start
++;
$end
++;
my
$range
= (
$start
<
$end
) ?
"$start,$end"
:
$end
;
return
$range
;
}
sub
unified_range {
my
(
$hunk
,
$flag
) =
@_
;
my
(
$start
,
$end
) = (
$hunk
->{
"start$flag"
},
$hunk
->{
"end$flag"
});
$start
++;
$end
++;
my
$length
=
$end
-
$start
+ 1;
my
$first
=
$length
< 2 ?
$end
:
$start
;
my
$range
=
$length
== 1 ?
$first
:
"$first,$length"
;
return
$range
;
}
}
{
package
Block;
sub
new {
my
(
$class
,
$chunk
) =
@_
;
my
@changes
= ();
foreach
my
$item
(
@$chunk
) {
my
(
$sign
,
$item_no
,
$text
) =
@$item
;
my
$hashref
= {
"sign"
=>
$sign
,
"item_no"
=>
$item_no
};
push
@changes
,
$hashref
;
}
my
$block
= {
"changes"
=> \
@changes
};
bless
$block
,
$class
;
$block
->{
"length_diff"
} =
$block
->insert -
$block
->remove;
return
$block
;
}
sub
op {
my
$block
=
shift
;
my
$insert
=
$block
->insert;
my
$remove
=
$block
->remove;
$remove
&&
$insert
and
return
'!'
;
$remove
and
return
'-'
;
$insert
and
return
'+'
;
warn
"unknown block type"
;
return
'^'
;
}
sub
remove {
return
grep
{
$_
->{
"sign"
} eq
'-'
} @{
shift
->{
"changes"
}}; }
sub
insert {
return
grep
{
$_
->{
"sign"
} eq
'+'
} @{
shift
->{
"changes"
}}; }
}
package
Algorithm::Diff;
BEGIN {
$Algorithm::Diff::VERSION
=
'0.57'
;
%Algorithm::Diff::EXPORT_OK
= (
LCS
=> 1,
diff
=> 1,
traverse_sequences
=> 1,
);
}
sub
import
{
no
strict;
my
$package
=
shift
;
my
$caller
=
caller
;
for
my
$func
(
@_
) {
unless
($ {
$package
.
'::EXPORT_OK'
}{
$func
}) {
Carp::croak(
"$package does not export function `$func'; aborting"
);
}
*{
"$ {caller}::$func"
} = \&{
"$ {package}::$func"
};
}
1;
}
sub
LCS_matrix {
my
@x
;
my
$a
;
my
$b
;
$a
=
shift
or usage();
$b
=
shift
or usage();
(
ref
$a
eq
'ARRAY'
) or usage();
(
ref
$b
eq
'ARRAY'
) or usage();
my
$eq
=
shift
;
my
(
$al
,
$bl
);
$al
=
@$a
;
$bl
=
@$b
;
my
(
$i
,
$j
);
$x
[0] = [(0) x (
$bl
+1)];
for
(
$i
=1;
$i
<=
$al
;
$i
++) {
my
$r
=
$x
[
$i
] = [];
$r
->[0] = 0;
for
(
$j
=1;
$j
<=
$bl
;
$j
++) {
if
(
defined
$eq
?
$eq
->(
$a
->[-
$i
],
$b
->[-
$j
])
:
$a
->[-
$i
] eq
$b
->[-
$j
]
) {
$r
->[
$j
] = 1 +
$x
[
$i
-1][
$j
-1];
}
else
{
my
$pi
=
$x
[
$i
][
$j
-1];
my
$pj
=
$x
[
$i
-1][
$j
];
$r
->[
$j
] = (
$pi
>
$pj
?
$pi
:
$pj
);
}
}
}
\
@x
;
}
sub
traverse_sequences {
my
$dispatcher
=
shift
;
my
$a
=
shift
;
my
$b
=
shift
;
my
$equal
=
shift
;
my
$x
= LCS_matrix(
$a
,
$b
,
$equal
);
my
(
$al
,
$bl
) = (
scalar
(
@$x
)-1,
scalar
(@{
$x
->[0]})-1);
my
(
$ap
,
$bp
) = (
$al
,
$bl
);
my
$dispf
;
while
(1) {
$dispf
=
undef
;
my
(
$ai
,
$bi
) = (
$al
-
$ap
,
$bl
-
$bp
);
if
(
$ap
== 0) {
$dispf
=
$dispatcher
->{A_FINISHED} ||
$dispatcher
->{DISCARD_B};
$bp
--;
}
elsif
(
$bp
== 0) {
$dispf
=
$dispatcher
->{B_FINISHED} ||
$dispatcher
->{DISCARD_A};
$ap
--;
}
elsif
(
defined
(
$equal
)
?
$equal
->(
$a
->[
$ai
],
$b
->[
$bi
])
:
$a
->[
$ai
] eq
$b
->[
$bi
]
) {
$dispf
=
$dispatcher
->{MATCH};
$ap
--;
$bp
--;
}
else
{
if
(
$x
->[
$ap
][
$bp
] ==
$x
->[
$ap
-1][
$bp
] + 1) {
$dispf
=
$dispatcher
->{DISCARD_B};
$bp
--;
}
else
{
$dispf
=
$dispatcher
->{DISCARD_A};
$ap
--;
}
}
$dispf
->(
$ai
,
$bi
,
@_
)
if
defined
$dispf
;
return
1
if
$ap
== 0 &&
$bp
== 0;
}
}
sub
LCS {
my
$lcs
= [];
my
(
$a
,
$b
);
my
$functions
= {
MATCH
=>
sub
{
push
@$lcs
,
$a
->[
$_
[0]]} };
traverse_sequences(
$functions
,
@_
);
wantarray
?
@$lcs
:
$lcs
;
}
sub
diff {
my
(
$a
,
$b
) =
@_
;
my
@cur_diff
= ();
my
@diffs
= ();
my
$functions
=
{
DISCARD_A
=>
sub
{
push
@cur_diff
, [
'-'
,
$_
[0],
$a
->[
$_
[0]]]},
DISCARD_B
=>
sub
{
push
@cur_diff
, [
'+'
,
$_
[1],
$b
->[
$_
[1]]]},
MATCH
=>
sub
{
push
@diffs
, [
@cur_diff
]
if
@cur_diff
;
@cur_diff
= ()
},
};
traverse_sequences(
$functions
,
@_
);
push
@diffs
, \
@cur_diff
if
@cur_diff
;
wantarray
?
@diffs
: \
@diffs
;
}
sub
usage {
Carp::croak(
"Usage: LCS([...], [...]); aborting"
);
}
1;