diff --git a/bin/external.pl b/bin/external.pl new file mode 100644 index 0000000..a767add --- /dev/null +++ b/bin/external.pl @@ -0,0 +1,120 @@ +use strict; +use warnings; +use v5.36; + +use Email::Sender::Simple (); +use Email::Simple (); +use Getopt::Long; +use MetaCPAN::Logger qw< :log :dlog >; + +use MetaCPAN::ES; +use MetaCPAN::External::Cygwin qw< run_cygwin >; +use MetaCPAN::External::Debian qw< run_debian >; + +# with( +# 'MetaCPAN::Script::Role::External::Cygwin', +# 'MetaCPAN::Script::Role::External::Debian', +# ); + +# args +my ( $email_to, $external_source ); +GetOptions( + "email_to=s" => \$email_to, + "external_source=s" => \$external_source, +); + +die "wrong external source: $external\n" + unless $external_source + and grep { $_ eq $external_source } qw< cygwin debian >; + +# setup +my $es = MetaCPAN::ES->new( type => "author" ); + +my $ret; + +$ret = run_cygwin() if $external_source eq 'cygwin'; +$ret = run_debian() if $external_source eq 'debian'; + +my $email_body = $ret->{errors_email_body}; +if ( $email_to and $email_body ) { + my $email = Email::Simple->create( + header => [ + 'Content-Type' => 'text/plain; charset=utf-8', + To => $email_to, + From => 'noreply@metacpan.org', + Subject => "Package mapping failures report for $external_source", + 'MIME-Version' => '1.0', + ], + body => $email_body, + ); + Email::Sender::Simple->send($email); + + log_debug { "Sending email to " . $email_to . ":" }; + log_debug {"Email body:"}; + log_debug {$email_body}; +} + +my $scroll = $es->scroll( + type => 'distribution', + scroll => '10m', + body => { + query => { + exists => { field => "external_package." . $external_source } + } + }, +); + +my @to_remove; + +while ( my $s = $scroll->next ) { + my $name = $s->{_source}{name}; + next unless $name; + + if ( exists $dist->{$name} ) { + delete $dist->{$name} + if $dist->{$name} eq + $s->{_source}{external_package}{$external_source}; + } + else { + push @to_remove => $name; + } +} + +my $bulk = $es->bulk( type => 'distribution' ); + +for my $d ( keys %{$dist} ) { + log_debug {"[$external_source] adding $d"}; + $bulk->update( { + id => $d, + doc => +{ + 'external_package' => { + $external_source => $dist->{$d} + } + }, + doc_as_upsert => 1, + } ); +} + +for my $d (@to_remove) { + log_debug {"[$external_source] removing $d"}; + $bulk->update( { + id => $d, + doc => +{ + 'external_package' => { + $external_source => undef + } + } + } ); +} + +$bulk->flush; + +1; + +=pod + +=head1 SYNOPSIS + + # bin/external.pl --external_source SOURCE --email_to EMAIL + +=cut diff --git a/lib/MetaCPAN/External/Cygwin.pm b/lib/MetaCPAN/External/Cygwin.pm new file mode 100644 index 0000000..c7b7f2b --- /dev/null +++ b/lib/MetaCPAN/External/Cygwin.pm @@ -0,0 +1,65 @@ +package MetaCPAN::External::Cygwin; + +use List::Util qw< shuffle >; +use MetaCPAN::Logger qw< :log :dlog >; + +use MetaCPAN::Ingest qw< ua >; + +use Sub::Exporter -setup => { + exports => [ qw< + run_cygwin + > ] +}; + +sub run_cygwin () { + my $ret = {}; + + my $ua = ua(); + my $mirrors = get_mirrors($ua); + + my @mirrors = @{ $mirrors }; + my $timeout = $ua->timeout(10); + + MIRROR: { + my $mirror = shift @mirrors or die "Ran out of mirrors"; + log_debug {"Trying mirror: $mirror"}; + my $res = $ua->get( $mirror . 'x86_64/setup.ini' ); + redo MIRROR unless $res->is_success; + + my @packages = split /^\@ /m, $res->decoded_content; + shift @packages; # drop headers + + log_debug { sprintf "Got %d cygwin packages", scalar @packages }; + + for my $desc (@packages) { + next if substr( $desc, 0, 5 ) ne 'perl-'; + my ( $pkg, %attr ) = map s/\A"|"\z//gr, map s/ \z//r, + map s/\n+/ /gr, split /^([a-z]+): /m, $desc; + $attr{category} = [ split / /, $attr{category} ]; + next if grep /^(Debug|_obsolete)$/, @{ $attr{category} }; + $ret->{dist}{ $pkg =~ s/^perl-//r } = $pkg; + } + } + $ua->timeout($timeout); + + log_debug { + sprintf "Found %d cygwin-CPAN packages", + scalar keys %{ $ret->{dist} } + }; + + return $ret; +} + +sub _get_mirrors ( $ua ) { + log_debug {"Fetching mirror list"}; + my $res = $ua->get('https://cygwin.com/mirrors.lst'); + die "Failed to fetch mirror list: " . $res->status_line + unless $res->is_success; + my @mirrors = shuffle map +( split /;/ )[0], split /\n/, + $res->decoded_content; + + log_debug { sprintf "Got %d mirrors", scalar @mirrors }; + return \@mirrors; +} + +1; diff --git a/lib/MetaCPAN/External/Debian.pm b/lib/MetaCPAN/External/Debian.pm new file mode 100644 index 0000000..e580b83 --- /dev/null +++ b/lib/MetaCPAN/External/Debian.pm @@ -0,0 +1,112 @@ +package MetaCPAN::External::Debian; + +use strict; +use warnings; +use v5.36; + +use CPAN::DistnameInfo (); +use DBI (); + +use MetaCPAN::ES; + +use Sub::Exporter -setup => { + exports => [ qw< + run_debian + > ] +}; + +sub run_debian () { + my $ret = {}; + + my $host_regex = _get_host_regex(); + + # connect to the database + my $dbh = DBI->connect( "dbi:Pg:host=udd-mirror.debian.net;dbname=udd", + 'udd-mirror', 'udd-mirror' ); + + # special cases + my %skip = ( 'libbssolv-perl' => 1 ); + + # multiple queries are needed + my @sql = ( + + # packages with upstream identified as CPAN + q{select u.source, u.upstream_url from upstream_metadata um join upstream u on um.source = u.source where um.key='Archive' and um.value='CPAN'}, + + # packages which upstream URL pointing to CPAN + qq{select source, upstream_url from upstream where upstream_url ~ '${\$host_regex}'}, + ); + + my @failures; + + for my $sql (@sql) { + my $sth = $dbh->prepare($sql); + $sth->execute(); + + # map Debian source package to CPAN distro + while ( my ( $source, $url ) = $sth->fetchrow ) { + next if $skip{$source}; + if ( my $dist = dist_for_debian( $source, $url ) ) { + $ret->{dist}{$dist} = $source; + } + else { + push @failures => [ $source, $url ]; + } + } + } + + if (@failures) { + my $ret->{errors_email_body} = join "\n" => + map { sprintf "%s %s", $_->[0], $_->[1] // '' } @failures; + } + + return $ret; +} + +sub dist_for_debian ( $source, $url ) { + my %alias = ( + 'datapager' => 'data-pager', + 'html-format' => 'html-formatter', + ); + + my $dist = CPAN::DistnameInfo->new($url); + if ( $dist->dist ) { + return $dist->dist; + } + elsif ( $source =~ /^lib(.*)-perl$/ ) { + my $es = MetaCPAN::ES->new( type => 'release' ); + my $res = $es->scroll( + body => { + query => { + term => { 'distribution.lowercase' => $alias{$1} // $1 } + }, + sort => [ { 'date' => 'desc' } ], + } + )->next; + + return $res->{_source}{distribution} + if $res; + } + + return; +} + +sub _get_host_regex () { + my @cpan_hosts = qw< + backpan.cpan.org + backpan.perl.org + cpan.metacpan.org + cpan.noris.de + cpan.org + cpan.perl.org + search.cpan.org + www.cpan.org + www.perl.com + >; + + return + '^(https?|ftp)://(' + . join( '|', map {s/\./\\./r} @cpan_hosts ) . ')/'; +} + +1;