Skip to content

Commit

Permalink
Merge pull request #1170 from metacpan/haarg/author-data-from-cpan-only
Browse files Browse the repository at this point in the history
Only use author json files from CPAN, not BackPAN
  • Loading branch information
mickeyn authored Feb 25, 2024
2 parents 5e04dad + f215c93 commit b1625ce
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 49 deletions.
2 changes: 2 additions & 0 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,13 @@ requires 'EV';
requires 'Exporter', '5.74';
requires 'ExtUtils::HasCompiler';
requires 'File::Basename';
requires 'File::Copy';
requires 'File::Find';
requires 'File::Find::Rule';
requires 'File::Find::Rule::Perl';
requires 'File::Spec';
requires 'File::Spec::Functions';
requires 'File::pushd';
requires 'File::stat';
requires 'File::Temp';
requires 'FindBin';
Expand Down
26 changes: 26 additions & 0 deletions lib/MetaCPAN/Role/Script.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ has cpan => (
'Location of a local CPAN mirror, looks for $ENV{MINICPAN} and ~/CPAN',
);

has cpan_file_map => (
is => 'ro',
isa => HashRef,
lazy => 1,
builder => '_build_cpan_file_map',
traits => ['NoGetopt'],
);

has die_on_error => (
is => 'ro',
isa => Bool,
Expand Down Expand Up @@ -225,6 +233,24 @@ sub _build_cpan {

}

sub _build_cpan_file_map {
my $self = shift;
my $ls = $self->cpan->child(qw(indices find-ls.gz));
unless ( -e $ls ) {
die "File $ls does not exist";
}
log_info {"Reading $ls"};
my $cpan = {};
open my $fh, "<:gzip", $ls;
while (<$fh>) {
my $path = ( split(/\s+/) )[-1];
next unless ( $path =~ /^authors\/id\/\w+\/\w+\/(\w+)\/(.*)$/ );
$cpan->{$1}{$2} = 1;
}
close $fh;
return $cpan;
}

sub _build_quarantine {
my $path = "$ENV{HOME}/QUARANTINE";
if ( !-d $path ) {
Expand Down
4 changes: 4 additions & 0 deletions lib/MetaCPAN/Script/Author.pm
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,13 @@ sub author_config {
return undef
unless $dir->is_dir;

my $author_cpan_files = $self->cpan_file_map->{$pauseid}
or return undef;

# Get the most recent version
my ($file) = map $_->[0], sort { $a->[1] <=> $b->[1] }
map [ $_ => $_->stat->mtime ],
grep $author_cpan_files->{ $_->basename },
$dir->children(qr/\Aauthor-.*\.json\z/);

return undef
Expand Down
28 changes: 1 addition & 27 deletions lib/MetaCPAN/Script/Backpan.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,6 @@ has files_only => (
documentation => 'only update the "file" index',
);

has _cpan_files_list => (
is => 'ro',
isa => HashRef,
lazy => 1,
builder => '_build_cpan_files_list',
);

has _release_status => (
is => 'ro',
isa => HashRef,
Expand All @@ -50,25 +43,6 @@ has _bulk => (
default => sub { +{} },
);

sub _build_cpan_files_list {
my $self = shift;
my $ls = $self->cpan->child(qw(indices find-ls.gz));
unless ( -e $ls ) {
log_error {"File $ls does not exist"};
exit;
}
log_info {"Reading $ls"};
my $cpan = {};
open my $fh, "<:gzip", $ls;
while (<$fh>) {
my $path = ( split(/\s+/) )[-1];
next unless ( $path =~ /^authors\/id\/\w+\/\w+\/(\w+)\/(.*)$/ );
$cpan->{$1}{$2} = 1;
}
close $fh;
return $cpan;
}

sub run {
my $self = shift;

Expand Down Expand Up @@ -106,7 +80,7 @@ sub build_release_status_map {
$self->_release_status->{$author}{$name} = [
(
$self->undo
or exists $self->_cpan_files_list->{$author}{$archive}
or exists $self->cpan_file_map->{$author}{$archive}
)
? 'cpan'
: 'backpan',
Expand Down
23 changes: 2 additions & 21 deletions lib/MetaCPAN/Script/Release.pm
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ sub run {
my @module_to_purge_dists = map { CPAN::DistnameInfo->new($_) } @files;

$self->index;
$self->_cpan_files_list if ( $self->detect_backpan );
$self->cpan_file_map if ( $self->detect_backpan );
$self->perms;
my @pid;

Expand Down Expand Up @@ -388,29 +388,10 @@ sub import_archive {
$self->update_release_contirbutors($contrib_data);
}

sub _build_cpan_files_list {
my $self = shift;
my $ls = $self->cpan->child(qw(indices find-ls.gz));
unless ( -e $ls ) {
log_error {"File $ls does not exist"};
exit;
}
log_info {"Reading $ls"};
my $cpan = {};
open my $fh, "<:gzip", $ls;
while (<$fh>) {
my $path = ( split(/\s+/) )[-1];
next unless ( $path =~ /^authors\/id\/\w+\/\w+\/(.*)$/ );
$cpan->{$1} = 1;
}
close $fh;
return $cpan;
}

sub detect_status {
my ( $self, $author, $archive ) = @_;
return $self->status unless ( $self->detect_backpan );
if ( $self->_cpan_files_list->{ join( '/', $author, $archive ) } ) {
if ( $self->cpan_file_map->{$author}{$archive} ) {
return 'cpan';
}
else {
Expand Down
3 changes: 3 additions & 0 deletions t/00_setup.t
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use MetaCPAN::TestHelpers qw(
fakecpan_dir
get_config
tmp_dir
write_find_ls
);
use MetaCPAN::TestServer ();
use Module::Faker 0.015 (); # Generates META.json.
Expand Down Expand Up @@ -94,6 +95,8 @@ $src_dir->child('bugs.tsv')->copy( $fakecpan_dir->child('bugs.tsv') );
$src_dir->child('mirrors.json')
->copy( $fakecpan_dir->child(qw(indices mirrors.json)) );

write_find_ls($fakecpan_dir);

$server->index_permissions;
$server->index_packages;
$server->index_releases;
Expand Down
3 changes: 2 additions & 1 deletion t/lib/MetaCPAN/DarkPAN.pm
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package MetaCPAN::DarkPAN;
use MetaCPAN::Moose;

use CPAN::Repository::Perms;
use MetaCPAN::TestHelpers qw( get_config );
use MetaCPAN::TestHelpers qw( get_config write_find_ls );
use MetaCPAN::Types::TypeTiny qw( Path );
use MetaCPAN::Util qw( author_dir );
use OrePAN2::Indexer;
Expand Down Expand Up @@ -67,6 +67,7 @@ sub run {
);
$orepan->make_index( no_compress => 1, );
$self->_write_06perms;
write_find_ls( $self->base_dir );
}

sub _write_06perms {
Expand Down
24 changes: 24 additions & 0 deletions t/lib/MetaCPAN/TestHelpers.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package # no_index
MetaCPAN::TestHelpers;

use Cpanel::JSON::XS;
use File::Copy qw( copy );
use File::pushd qw( pushd );
use FindBin;
use Git::Helpers qw( checkout_root );
use MetaCPAN::Script::Runner;
Expand All @@ -30,6 +32,7 @@ our @EXPORT = qw(
test_release
tmp_dir
try
write_find_ls
);

=head1 EXPORTS
Expand Down Expand Up @@ -145,4 +148,25 @@ sub test_cache_headers {
) if exists $conf->{surrogate_control};
}

sub write_find_ls {
my $cpan_dir = shift;

my $indices = $cpan_dir->child('indices');
$indices->mkpath;

my $find_ls = $indices->child('find-ls.gz')->openw(':gzip');

my $chdir = pushd($cpan_dir);

open my $fh, '-|', 'find', 'authors', '-ls'
or die "can't run find: $!";

copy $fh, $find_ls;

close $fh;
close $find_ls;

return;
}

1;

0 comments on commit b1625ce

Please sign in to comment.