Skip to content

Commit

Permalink
Merge pull request #62 from metacpan/mickey/es_syntax
Browse files Browse the repository at this point in the history
More ES syntax updates
  • Loading branch information
mickeyn authored Dec 16, 2024
2 parents 8cc107e + 526f702 commit e5ec140
Show file tree
Hide file tree
Showing 12 changed files with 169 additions and 145 deletions.
32 changes: 19 additions & 13 deletions bin/backpan.pl
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,17 @@ ()
log_info {"find_releases"};

my $scroll = $es_release->scroll(
fields => [qw< author archive name >],
body => get_release_query(),
body => {
%{ get_release_query() },
size => 500,
_source => [qw< author archive name >],
},
);

while ( my $release = $scroll->next ) {
my $author = $release->{fields}{author}[0];
my $archive = $release->{fields}{archive}[0];
my $name = $release->{fields}{name}[0];
my $author = $release->{_source}{author};
my $archive = $release->{_source}{archive};
my $name = $release->{_source}{name};
next unless $name; # bypass some broken releases

$release_status{$author}{$name} = [
Expand All @@ -64,8 +67,10 @@ ()
unless ($undo) {
return +{
query => {
not => { term => { status => 'backpan' } }
}
bool => {
must_not => [ { term => { status => 'backpan' } }, ],
},
},
};
}

Expand Down Expand Up @@ -118,23 +123,24 @@ ( $author, $author_releases )

my $scroll_file = $es_file->scroll(
scroll => '5m',
fields => [qw< release >],
body => {
query => {
bool => {
must => [
{ term => { author => $author } },
{ terms => { release => $author_releases } }
]
}
}
{ terms => { release => $author_releases } },
],
},
},
size => 500,
_source => [qw< release >],
},
);

$bulk{file} ||= $es_file->bulk( timeout => '5m' );

while ( my $file = $scroll_file->next ) {
my $release = $file->{fields}{release}[0];
my $release = $file->{_source}{release};
$bulk{file}->update( {
id => $file->{_id},
doc => {
Expand Down
11 changes: 7 additions & 4 deletions bin/backup.pl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
use Try::Tiny qw< catch try >;

use MetaCPAN::ES;
use MetaCPAN::Ingest qw< home >;
use MetaCPAN::Ingest qw< home true >;

# config

Expand Down Expand Up @@ -82,7 +82,7 @@ ()
$bulk_store{$key} ||= $es->bulk( max_count => $batch_size );
my $bulk = $bulk_store{$key};

my $parent = $raw->{fields}{_parent};
my $parent = $raw->{_parent};

if ( $raw->{_type} eq 'author' ) {

Expand Down Expand Up @@ -169,9 +169,12 @@ sub run_backup {
( $type ? ( type => $type ) : () )
);
my $scroll = $es->scroll(
size => $size,
fields => [qw< _parent _source >],
scroll => '1m',
body => {
_source => true,
size => $size,
sort => '_doc',
},
);

log_info { 'Backing up ', $scroll->total, ' documents' };
Expand Down
66 changes: 41 additions & 25 deletions bin/check.pl
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@

# look up this module in ElasticSearch and see what we have on it
my $results = $es_file->search(
size => 100, # shouldn't get more than this
fields => [
qw< name release author distribution version authorized indexed maturity date >
],
query => {
query => {
bool => {
must => [
{ term => { 'module.name' => $pkg } },
Expand All @@ -51,22 +47,38 @@
],
},
},
size => 100, # shouldn't get more than this
_source => [ qw<
name
release
author
distribution
version
authorized
indexed
maturity
date
> ],

);
my @files = @{ $results->{hits}{hits} };

# now find the first latest releases for these files
foreach my $file (@files) {
my $release_results = $es_release->search(
size => 1,
fields => [qw< name status authorized version id date >],
query => {
query => {
bool => {
must => [
{ term => { name => $file->{fields}{release} } },
{
term =>
{ name => $file->{_source}{release} }
},
{ term => { status => 'latest' } },
],
},
},
size => 1,
_source => [qw< name status authorized version id date >],
);

push @releases, $release_results->{hits}{hits}[0]
Expand All @@ -78,16 +90,20 @@
if ( !@releases ) {
foreach my $file (@files) {
my $release_results = $es_release->search(
size => 1,
fields =>
[qw< name status authorized version id date >],
query => {
query => {
bool => {
must => [
{ term => { name => $file->{fields}{release} } },
{
term => {
name => $file->{_source}{release}
}
},
],
},
},
size => 1,
_source =>
[qw< name status authorized version id date >],
);

push @releases, @{ $release_results->{hits}{hits} };
Expand All @@ -97,22 +113,22 @@
# if we found the releases tell them about it
if (@releases) {
if ( @releases == 1
and $releases[0]->{fields}{status} eq 'latest' )
and $releases[0]->{_source}{status} eq 'latest' )
{
log_info {
"Found latest release $releases[0]->{fields}{name} for $pkg"
"Found latest release $releases[0]->{_source}{name} for $pkg"
}
unless $errors_only;
}
else {
log_error {"Could not find latest release for $pkg"};
foreach my $rel (@releases) {
log_warn {" Found release $rel->{fields}{name}"};
log_warn {" STATUS : $rel->{fields}{status}"};
log_warn {" Found release $rel->{_source}{name}"};
log_warn {" STATUS : $rel->{_source}{status}"};
log_warn {
" AUTORIZED : $rel->{fields}{authorized}"
" AUTORIZED : $rel->{_source}{authorized}"
};
log_warn {" DATE : $rel->{fields}{date}"};
log_warn {" DATE : $rel->{_source}{date}"};
}

$error_count++;
Expand All @@ -123,13 +139,13 @@
"Module $pkg doesn't have any releases in ElasticSearch!"
};
foreach my $file (@files) {
log_warn {" Found file $file->{fields}{name}"};
log_warn {" RELEASE : $file->{fields}{release}"};
log_warn {" AUTHOR : $file->{fields}{author}"};
log_warn {" Found file $file->{_source}{name}"};
log_warn {" RELEASE : $file->{_source}{release}"};
log_warn {" AUTHOR : $file->{_source}{author}"};
log_warn {
" AUTHORIZED : $file->{fields}{authorized}"
" AUTHORIZED : $file->{_source}{authorized}"
};
log_warn {" DATE : $file->{fields}{date}"};
log_warn {" DATE : $file->{_source}{date}"};
}
$error_count++;
}
Expand Down
16 changes: 8 additions & 8 deletions bin/checksum.pl
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@
not => {
exists => {
field => "checksum_md5"
}
}
}
},
},
},
_source => [qw< id name download_url >],
},
fields => [qw< id name download_url >],
);

log_warn { "Found " . $scroll->total . " releases" };
Expand All @@ -50,11 +50,11 @@
last;
}

log_info { "Adding checksums for " . $p->{fields}{name}[0] };
log_info { "Adding checksums for " . $p->{_source}{name} };

if ( my $download_url = $p->{fields}{download_url} ) {
if ( my $download_url = $p->{_source}{download_url} ) {
my $file
= cpan_dir . "/authors" . $p->{fields}{download_url}[0]
= cpan_dir . "/authors" . $p->{_source}{download_url}
=~ s/^.*authors//r;
my $checksum_md5 = digest_file_hex( $file, 'MD5' );
my $checksum_sha256 = digest_file_hex( $file, 'SHA-256' );
Expand All @@ -75,7 +75,7 @@
}
}
else {
log_info { $p->{fields}{name}[0] . " is missing a download_url" };
log_info { $p->{_source}{name} . " is missing a download_url" };
}
}

Expand Down
31 changes: 14 additions & 17 deletions bin/cve.pl
Original file line number Diff line number Diff line change
Expand Up @@ -134,32 +134,29 @@

if (@filters) {
my $query = {
query => {
bool => {
must => [
{ term => { distribution => $dist } }, @filters,
]
}
},
bool => {
must =>
[ { term => { distribution => $dist } }, @filters, ]
}
};

my $releases = $es->search(
index => 'cpan',
type => 'release',
body => $query,
fields => [ "version", "name", "author", ],
size => 2000,
index => 'cpan',
type => 'release',
body => {
query => $query,
_source => [qw< version name author >],
size => 2000,
},
);

if ( $releases->{hits}{total} ) {
## no critic (ControlStructures::ProhibitMutatingListFunctions)
@matches = map { $_->[0] }
sort { $a->[1] <=> $b->[1] }
map {
my %fields = %{ $_->{fields} };
ref $_ and $_ = $_->[0] for values %fields;
[ \%fields, numify_version( $fields{version} ) ];
} @{ $releases->{hits}{hits} };
map { [ $_->{_source},
numify_version( $_->{_source}{version} ) ] }
@{ $releases->{hits}{hits} };
}
else {
log_debug {
Expand Down
Loading

0 comments on commit e5ec140

Please sign in to comment.