#!/usr/bin/env perl
no
if
$] >= 5.018,
warnings
=>
'experimental::smartmatch'
;
my
$argv_signature
=
join
' '
,
@ARGV
;
my
$o
= School::Code::Compare::Options->new(
@ARGV
);
my
@dir
=
defined
$o
->{dir} ? @{
$o
->{dir}} : ();
my
$file
=
$o
->{file};
my
$lang
=
$o
->{in};
my
$output_format
=
$o
->{out};
my
$to_file
=
$o
->{persist};
my
$verbose
=
$o
->{verbose};
my
$algo
=
$o
->{charset};
my
$do_prompt
= !
$o
->{yes};
my
$hide_skipped
= !
$o
->{all};
my
$mime_match
=
$o
->{mime};
my
$sort
=
$o
->{
sort
};
my
$split
=
$o
->{
split
};
my
$basedir
=
$o
->{basedir};
chop
$basedir
if
(
defined
$basedir
and
$basedir
=~ /\/$/);
if
(
$algo
!~ /^visibles$|^numsignes$|^signes$/) {
say
"charset not supported"
;
exit
1;
}
if
(
$lang
!~ /hashy|python|perl|bash|slashy|php|js|cpp|cs|c|java|html|xml|txt/) {
say
"lang not supported"
;
exit
1;
}
my
$magic
;
if
(
$mime_match
) {
if
(module_installed
'File::LibMagic'
) {
load File::LibMagic;
$magic
= File::LibMagic->new();
}
else
{
say
'Option --mime needs the module File::LibMagic installed'
;
exit
1;
}
}
my
@FILE_LIST
= ();
if
(
defined
$file
) {
@FILE_LIST
= read_file(
$file
,
binmode
=>
':utf8'
);
}
elsif
(
@dir
) {
@FILE_LIST
= File::Find::Rule->file()->in(
@dir
);
}
else
{
@FILE_LIST
= <STDIN>;
}
say
scalar
@FILE_LIST
.
' files to compare, aborting...'
and
exit
1
if
(
@FILE_LIST
<= 1);
my
$comparison_count
= 0;
for
(
my
$i
=0;
$i
<
@FILE_LIST
- 1;
$i
++) {
for
(
my
$j
=
$i
+1;
$j
<
@FILE_LIST
;
$j
++) {
$comparison_count
++;
}
}
close
STDIN;
open
STDIN,
"<"
,
"/dev/tty"
;
if
(
$do_prompt
) {
my
$answer
= prompt(
"$comparison_count comparisons needed, continue? [Y/n]"
, {
output
=>
*STDERR
});
exit
0
if
(
$answer
=~ /n/);
}
close
STDIN;
say
STDERR
'reading and preparing files...'
;
my
@files
= ();
foreach
my
$filepath
(
@FILE_LIST
) {
chomp
(
$filepath
);
chop
(
$filepath
)
if
(
$filepath
=~ m/\r$/);
my
@content
;
if
(
$o
->{
split
}) {
local
$/ =
$o
->{
split
};
@content
= read_file(
$filepath
,
binmode
=>
':utf8'
) ;
}
else
{
@content
= read_file(
$filepath
,
binmode
=>
':utf8'
) ;
}
my
$mimetype
=
''
;
my
$mini_data
=
''
;
if
(
$mime_match
) {
$mini_data
.=
$content
[0]
if
(
defined
$content
[0]);
$mini_data
.=
$content
[1]
if
(
defined
$content
[1]);
$mini_data
.=
$content
[2]
if
(
defined
$content
[2]);
$mimetype
=
$magic
->info_from_string(
$mini_data
)->{mime_type};
}
my
$charset
= School::Code::Compare::Charset->new();
$charset
->set_language(
$lang
);
my
$filtered
;
$filtered
=
$charset
->get_visibles (\
@content
)
if
(
$algo
eq
'visibles'
);
$filtered
=
$charset
->get_numsignes(\
@content
)
if
(
$algo
eq
'numsignes'
);
$filtered
=
$charset
->get_signes (\
@content
)
if
(
$algo
eq
'signes'
);
$filtered
=
$charset
->sort_by_lines(
$filtered
)
if
(
$sort
);
my
$info
= {};
$info
->{
"code_$algo"
} =
join
''
, @{
$filtered
};
$info
->{path} =
$filepath
;
$info
->{mime_type} =
$mimetype
if
(
$mimetype
);
push
@files
,
$info
;
}
my
$now
= DateTime->now;
my
$comparer
= School::Code::Compare->new()
->set_max_relative_difference(1.4)
->set_min_char_total (20)
->set_max_relative_distance(0.6);
my
%info
= (
visibles
=>
"All visible chars. Whitespace removed."
,
numsignes
=>
"Words and numbers ignored in meaning, but not in position. Whitespace removed"
,
signes
=>
"Only special chars. Whitespace, letters, numbers and underscore removed."
,
);
print
STDERR
"working on $algo... "
;
my
@result
= ();
my
$judge
= School::Code::Compare::Judge->new();
my
$skip_report
=
''
;
my
$skip_count
= 0;
my
$lift_count
= 0;
for
(
my
$i
=0;
$i
<
@files
- 1;
$i
++) {
for
(
my
$j
=
$i
+1;
$j
<
@files
;
$j
++) {
my
$comparison
= {};
my
$do_comparison
= 1;
if
(
$basedir
) {
my
$path1
=
$files
[
$i
]->{path};
my
$path2
=
$files
[
$j
]->{path};
chop
$path1
if
(
$path1
=~ /\/$/);
chop
$path2
if
(
$path2
=~ /\/$/);
my
$project1
=
''
;
if
(
$path1
=~
qr!^$basedir/([^/]+)/!
){
$project1
= $1
}
my
$project2
=
''
;
if
(
$path2
=~
qr!^$basedir/([^/]+)/!
){
$project2
= $1
}
if
(
$project1
eq
$project2
) {
$comparison
= {
distance
=>
undef
,
ratio
=>
undef
,
length1
=>
undef
,
length2
=>
undef
,
delta_length
=>
undef
,
comment
=>
"skipped: same project: $project1"
,
};
$do_comparison
= 0;
}
}
if
(
$mime_match
and
$files
[
$i
]->{mime_type}
ne
$files
[
$j
]->{mime_type}) {
$comparison
= {
distance
=>
undef
,
ratio
=>
undef
,
length1
=>
undef
,
length2
=>
undef
,
delta_length
=>
undef
,
comment
=>
'skipped: different mime:'
.
$files
[
$i
]->{mime_type}
.
' ; '
.
$files
[
$j
]->{mime_type},
};
$do_comparison
= 0;
}
if
(
$do_comparison
) {
$comparison
=
$comparer
->measure(
$files
[
$i
]->{
"code_$algo"
},
$files
[
$j
]->{
"code_$algo"
}
);
}
$do_comparison
= 1;
if
(
$verbose
) {
say
STDERR
''
;
say
STDERR
"---comparison $algo $i;$j---"
;
say
STDERR
'path1: '
.
$files
[
$i
]->{path};
say
STDERR
'path2: '
.
$files
[
$j
]->{path};
say
STDERR
'mime1:'
.
$files
[
$i
]->{mime_type}
if
defined
$files
[
$i
]->{mime_type};
say
STDERR
'mime2:'
.
$files
[
$j
]->{mime_type}
if
defined
$files
[
$j
]->{mime_type};
say
STDERR
'data1: '
.
$files
[
$i
]->{
"code_$algo"
}
if
defined
$files
[
$i
]->{
"code_$algo"
};
say
STDERR
'data2: '
.
$files
[
$j
]->{
"code_$algo"
}
if
defined
$files
[
$j
]->{
"code_$algo"
};
say
STDERR
'distance: '
.
$comparison
->{distance}
if
defined
$comparison
->{distance};
say
STDERR
'length1: '
.
$comparison
->{length1}
if
defined
$comparison
->{length1};
say
STDERR
'length2: '
.
$comparison
->{length2}
if
defined
$comparison
->{length2};
say
STDERR
'similarity: '
.
$comparison
->{ratio}
if
defined
$comparison
->{ratio};
say
STDERR
'comment: '
.
$comparison
->{comment};
}
if
(
$comparison
->{comment} =~ /^skipped/ ) {
$skip_count
++;
next
if
(
$hide_skipped
);
}
else
{
$lift_count
++;
}
$comparison
->{file1} =
$files
[
$i
]->{path};
$comparison
->{file2} =
$files
[
$j
]->{path};
$judge
->look(
$comparison
);
push
@result
,
$comparison
;
}
}
say
STDERR
"\tdone"
;
print
STDERR
"rendering..."
;
my
$format
=
'CSV'
;
for
(
$output_format
) {
$format
=
'CSV'
when
/^csv/;
$format
=
'HTML'
when
/^html/;
$format
=
'TAB'
when
/^tab/;
}
my
$filename
=
undef
;
if
(
$to_file
) {
$filename
=
'comparison_'
.
$now
->ymd() .
'_'
.
$now
->hms(
'-'
) .
'_'
.
$algo
.
'.'
.
lc
$format
;
}
my
$version
=
defined
$School::Code::Compare::VERSION
?
$School::Code::Compare::VERSION
:
'unknown'
;
my
$signature
=
'Created at '
.
$now
->ymd() .
' '
.
$now
->hms(
'-'
) .
' '
.
"with '$0 $argv_signature' in '$version' version."
;
$skip_report
=
"From a total of $comparison_count possible comparisons,"
.
" $skip_count where skipped and $lift_count actually compared."
;
$skip_report
.=
$hide_skipped
?
' Skipped results are not shown.'
:
' All results listed.'
;
my
$out
= School::Code::Compare::Out->new();
$out
->set_name(
$filename
)
->set_format(
$format
)
->set_lines(\
@result
)
->set_title(
$algo
)
->set_description(
$info
{
$algo
})
->set_signature(
$signature
)
->set_endreport(
$skip_report
)
;
$out
->
write
();
if
(
defined
$filename
) {
say
STDERR
"\tdone. See $filename"
;
}
else
{
say
STDERR
"\tdone."
;
}