#!perl
$| = 1;
my
%ignore
;
while
(
my
$line
= <main::DATA> ) {
chomp
$line
;
next
if
$line
=~ /^
next
unless
$line
;
$ignore
{
$line
} = 1;
}
my
$ua
= LWP::UserAgent->new(
ssl_opts
=> {
verify_hostname
=> 0 });
$ua
->timeout(58);
$ua
->env_proxy;
my
@filenames
=
@ARGV
;
@filenames
=
sort
grep
{
$_
!~ /^\.git/ } File::Find::Rule->new->file->in(
'.'
)
unless
@filenames
;
my
$total_bytes
= sum
map
{-s}
@filenames
;
my
$extract_progress
= Term::ProgressBar::Simple->new(
{
count
=>
$total_bytes
,
name
=>
'Extracting URIs'
,
}
);
my
%uris
;
foreach
my
$filename
(
@filenames
) {
next
if
$filename
=~ /uris\.txt/;
next
if
$filename
=~ /check_uris/;
next
if
$filename
=~ /\.patch$/;
next
if
$filename
=~
'cpan/Pod-Simple/t/perlfaqo?\.pod'
;
next
if
$filename
=~ /checkURL\.pl$/;
my
$contents
= File::Slurp::read_file(
$filename
);
my
@uris
= URI::Find::Simple::list_uris(
$contents
);
foreach
my
$uri
(
@uris
) {
next
unless
$uri
=~ /^(http|ftp)/;
next
if
$ignore
{
$uri
};
next
if
$uri
=~ m{^https?://rt.perl.org/(?:rt3/)?Ticket/Display.html?id=\d+$};
next
if
$uri
=~ m{^https?://(?:www\.)?github\.com/[pP]erl/perl5/issues/\d+$};
next
if
$uri
=~ m{^https?://rt.cpan.org/Public/Bug/Display.html?id=\d+$};
next
push
@{
$uris
{
$uri
} },
$filename
;
}
$extract_progress
+= -s
$filename
;
}
my
$bw
= Parallel::Fork::BossWorkerAsync->new(
work_handler
=> \
&work_alarmed
,
global_timeout
=> 120,
worker_count
=> 20,
);
foreach
my
$uri
(
keys
%uris
) {
my
@filenames
= @{
$uris
{
$uri
} };
$bw
->add_work( {
uri
=>
$uri
,
filenames
=> \
@filenames
} );
}
undef
$extract_progress
;
my
$fetch_progress
= Term::ProgressBar::Simple->new(
{
count
=>
scalar
(
keys
%uris
),
name
=>
'Fetching URIs'
,
}
);
my
%filenames
;
while
(
$bw
->pending() ) {
my
$response
=
$bw
->get_result();
my
$uri
=
$response
->{uri};
my
@filenames
= @{
$response
->{filenames} };
my
$is_success
=
$response
->{is_success};
my
$message
=
$response
->{message};
unless
(
$is_success
) {
foreach
my
$filename
(
@filenames
) {
push
@{
$filenames
{
$filename
} },
{
uri
=>
$uri
,
message
=>
$message
};
}
}
$fetch_progress
++;
}
$bw
->shut_down();
my
$fh
= IO::File->new(
'> uris.txt'
);
foreach
my
$filename
(
sort
keys
%filenames
) {
$fh
->
say
(
"* $filename"
);
my
@bits
= @{
$filenames
{
$filename
} };
foreach
my
$bit
(
@bits
) {
my
$uri
=
$bit
->{uri};
my
$message
=
$bit
->{message};
$fh
->
say
(
" $uri"
);
$fh
->
say
(
" $message"
);
}
}
$fh
->
close
;
say
'Finished, see uris.txt'
;
sub
work_alarmed {
my
$conf
=
shift
;
eval
{
local
$SIG
{ALRM} =
sub
{
die
"alarm\n"
};
alarm
60;
$conf
= work(
$conf
);
alarm
0;
};
if
($@) {
$conf
->{is_success} = 0;
$conf
->{message} =
'Timed out'
;
}
return
$conf
;
}
sub
work {
my
$conf
=
shift
;
my
$uri
=
$conf
->{uri};
my
@filenames
= @{
$conf
->{filenames} };
if
(
$uri
=~ /^http/ ) {
my
$uri_without_fragment
= URI->new(
$uri
);
my
$fragment
=
$uri_without_fragment
->fragment(
undef
);
my
$response
=
$ua
->head(
$uri_without_fragment
);
$conf
->{is_success} =
$response
->is_success;
$conf
->{message} =
$response
->status_line;
return
$conf
;
}
else
{
my
$uri_object
= URI->new(
$uri
);
my
$host
=
$uri_object
->host;
my
$path
=
$uri_object
->path;
my
(
$volume
,
$directories
,
$filename
)
= File::Spec->splitpath(
$path
);
my
$ftp
= Net::FTP->new(
$host
,
Passive
=> 1,
Timeout
=> 60 );
unless
(
$ftp
) {
$conf
->{is_succcess} = 0;
$conf
->{message} =
"Can not connect to $host: $@"
;
return
$conf
;
}
my
$can_login
=
$ftp
->login(
"anonymous"
,
'-anonymous@'
);
unless
(
$can_login
) {
$conf
->{is_success} = 0;
$conf
->{message} =
"Can not login "
,
$ftp
->message;
return
$conf
;
}
my
$can_binary
=
$ftp
->binary();
unless
(
$can_binary
) {
$conf
->{is_success} = 0;
$conf
->{message} =
"Can not binary "
,
$ftp
->message;
return
$conf
;
}
my
$can_cwd
=
$ftp
->cwd(
$directories
);
unless
(
$can_cwd
) {
$conf
->{is_success} = 0;
$conf
->{message} =
"Can not cwd to $directories "
,
$ftp
->message;
return
$conf
;
}
if
(
$filename
) {
my
$can_size
=
$ftp
->size(
$filename
);
unless
(
$can_size
) {
$conf
->{is_success} = 0;
$conf
->{message}
=
"Can not size $filename in $directories"
,
$ftp
->message;
return
$conf
;
}
}
else
{
my
(
$can_dir
) =
$ftp
->dir;
unless
(
$can_dir
) {
my
(
$can_ls
) =
$ftp
->ls;
unless
(
$can_ls
) {
$conf
->{is_success} = 0;
$conf
->{message}
=
"Can not dir or ls in $directories "
,
$ftp
->message;
return
$conf
;
}
}
}
$conf
->{is_success} = 1;
return
$conf
;
}
}