class App::Licensecheck;
format
=>
sub
{
join
' '
,
$_
->[0] || (),
$_
->[1] || () }
};
threshold_after
=> 5,
format
=>
sub
{
join
' '
,
$_
->[0] || (),
$_
->[1] || () },
},
'copyright'
=> {
-as
=>
'copyright_optimistic'
};
$PerlIO::encoding::fallback
= Encode::FB_CROAK;
no
if
( $] >= 5.034 ),
warnings
=>
"experimental::try"
;
field
$log
;
field
$path
;
field
$naming
:param =
undef
;
field
$top_lines
:param //= 60;
field
$end_bytes
:param //= 5000;
field
$encoding
:param =
undef
;
field
$fh
;
field
$content
:param =
undef
;
field
$tail_content
;
field
$offset
;
field
$license
;
field
$copyrights
;
ADJUST {
$log
= Log::Any->get_logger;
if
(
defined
$naming
) {
croak
$log
->fatal(
'parameter "naming" must be a String::License::Naming object'
)
unless
defined
blessed(
$naming
)
and
$naming
->isa(
'String::License::Naming'
);
}
else
{
$naming
= String::License::Naming::SPDX->new;
}
if
(
$encoding
and not
ref
(
$encoding
) eq
'OBJECT'
) {
$encoding
= find_encoding(
$encoding
);
}
}
method parse
{
(
$path
) =
@_
;
$path
= Path::Tiny::path(
$path
);
try
{
return
$self
->parse_file;
}
catch
(
$e
) {
if
(
$encoding
and
$e
=~ /does not
map
to Unicode/ ) {
$log
->debugf(
'decoding error: %s'
,
$e
);
$log
->warnf(
'failed decoding file %s as %s, will try iso-8859-1'
,
$path
,
$encoding
->name
);
try
{
$encoding
= find_encoding(
'iso-8859-1'
);
return
$self
->parse_file;
}
catch
(
$e
) {
if
(
$e
=~ /does not
map
to Unicode/ ) {
$log
->debugf(
'decoding error: %s'
,
$e
);
$log
->warnf(
'failed decoding file %s as iso-8859-1, will try raw'
,
$path
);
$encoding
=
undef
;
return
$self
->parse_file;
}
else
{
die
$log
->fatalf(
'unknown error: %s'
,
$e
);
}
}
}
else
{
die
$log
->fatalf(
'unknown error: %s'
,
$e
);
}
}
}
method parse_file
{
$content
=
undef
;
$license
=
undef
;
$copyrights
=
undef
;
if
(
$top_lines
== 0 ) {
my
$licensed
= String::License->new(
string
=>
$self
->content_extracleaned,
naming
=>
$naming
,
);
$license
=
$licensed
->as_text;
$copyrights
= copyright(
$self
->content_cleaned );
}
else
{
my
$licensed
= String::License->new(
string
=>
$self
->content_extracleaned,
naming
=>
$naming
,
);
$license
=
$licensed
->as_text;
$copyrights
= copyright_optimistic(
$self
->content_cleaned );
if
(
$offset
and not
$copyrights
and
$license
eq
'UNKNOWN'
) {
$tail_content
=
undef
;
my
$licensed
= String::License->new(
string
=>
$self
->content_extracleaned,
naming
=>
$naming
,
);
$license
=
$licensed
->as_text;
$copyrights
= copyright_optimistic(
$self
->content_cleaned );
}
$fh
->
close
;
}
return
(
$license
,
$copyrights
);
}
method content
{
if
(
$top_lines
== 0 ) {
return
$content
if
defined
(
$content
);
if
( not
defined
(
$encoding
) ) {
$log
->debugf(
'reading whole file %s as raw bytes'
,
$path
);
$content
=
$path
->slurp_raw;
}
else
{
my
$id
=
$encoding
->name;
$log
->debugf(
'decoding whole file %s as %s'
,
$path
,
$id
);
$content
=
$path
->slurp( {
binmode
=>
":encoding($id)"
} );
}
$log
->trace(
"----- content -----\n$content----- end content -----"
)
if
$log
->is_trace;
}
elsif
( not
defined
(
$license
) or not
defined
(
$copyrights
) ) {
return
$content
if
defined
(
$content
);
$content
=
''
;
if
( not
defined
(
$encoding
) ) {
$log
->debugf(
'reading part(s) of file %s as raw bytes'
,
$path
);
$fh
=
$path
->openr_raw;
}
else
{
my
$id
=
$encoding
->name;
$log
->debugf(
'decoding part(s) of file %s as %s'
,
$path
,
$id
);
$fh
=
$path
->openr(
":encoding($id)"
);
}
while
(
my
$line
=
$fh
->getline ) {
last
if
(
$fh
->input_line_number >
$top_lines
);
$content
.=
$line
;
}
$log
->trace(
"----- header -----\n$content----- end header -----"
)
if
$log
->is_trace;
if
(
$end_bytes
) {
my
$position
=
$fh
->
tell
;
my
$filesize
=
$path
->
stat
->size;
if
(
$position
>=
$filesize
-
$end_bytes
) {
if
(
$position
<
$filesize
) {
$log
->debugf(
'tail offset set to %s (end of header)'
,
$position
);
$offset
=
$position
;
}
elsif
(
$position
=
$filesize
) {
$log
->debug(
'header end matches file size'
);
$offset
= 0;
}
else
{
$log
->error(
'header end beyond file size'
);
$offset
= 0;
}
}
elsif
(
$position
> 0 ) {
$offset
=
$filesize
-
$end_bytes
;
$log
->debugf(
'tail offset set to %s'
,
$offset
);
}
elsif
(
$position
< 0 ) {
$log
->error(
'header end could not be resolved'
);
$offset
= 0;
}
else
{
$log
->error(
'header end oddly at beginning of file'
);
$offset
= 0;
}
}
}
elsif
(
$offset
) {
return
$content
if
defined
(
$tail_content
);
$tail_content
=
''
;
$fh
->
seek
(
$offset
, SEEK_SET );
$tail_content
.=
join
(
''
,
$fh
->getlines );
$log
->trace(
"----- tail -----\n$tail_content----- end tail -----"
)
if
$log
->is_trace;
$content
=
$tail_content
;
}
else
{
$log
->errorf(
'tail offset not usable: %s'
,
$offset
);
return
''
;
}
local
$_
=
$content
or
return
''
;
my
@matches
= m/^[ \t]*([^a-zA-Z0-9\s]{1,3})[ \t]+\S/mg;
if
(
@matches
>= 4 ) {
my
$comment_re
=
qr/^[ \t]*[\Q$matches[0]\E]{1,3}[ \t]*/
m;
s/
$comment_re
//g;
}
my
@wordmatches
= m/^[ \t]*(dnl|REM|COMMENT)[ \t]+\S/mg;
if
(
@wordmatches
>= 4 ) {
my
$comment_re
=
qr/^[ \t]*\Q$wordmatches[0]\E[ \t]*/
m;
s/
$comment_re
//g;
}
s/[ \t]*[*
s/^[cC]$//gm;
s/^[cC] //gm;
s
s/\s*\\n\s*/ /g;
$content
=
$_
;
return
$content
;
}
my
$html_xml_tags_re
=
qr/<\/
?(?:p|br|
ref
)(?:\s[^>]*)?>/i;
method content_cleaned
{
local
$_
=
$self
->content or
return
''
;
s/
$html_xml_tags_re
//g;
s/\xcb\x97|\xe2\x80[\x90-\x95|\xe2\x81\x83|\xe2\x88\x92|\xef\x89\xa3|\xef\xbc\x8d]|[&](?:ndash|mdash|horbar|minus|[
s/\x58\xa9|\xc2\xa9|\xe2\x92\x9e|\xe2\x92\xb8|\xe2\x93\x92|\xf0\x9f\x84\x92|\xf0\x9f\x84\xab|\xf0\x9f\x85\x92|[&](?:copy|[
s/\\//gm;
return
$_
;
}
method content_extracleaned
{
local
$_
=
$self
->content or
return
''
;
s/-\r?\n//g;
s/
$html_xml_tags_re
//g;
tr
/\t\r\n/ /;
tr
% A-Za-z.,:@;0-9\(\)/-%
%cd
;
tr
/ //s;
return
$_
;
}
1;