ljr/livejournal/bin/upgrading/migrate-userprop.pl

163 lines
5.6 KiB
Perl
Raw Permalink Normal View History

2019-02-05 21:49:12 +00:00
#!/usr/bin/perl
use strict;
use lib "$ENV{'LJHOME'}/cgi-bin";
require 'ljlib.pl';
# so output happens quickly
$| = 1;
# make sure we got a parameter
my $propname = shift;
die "ERROR: no property specified\n" unless $propname;
# verify it's a valid property
my $prop = LJ::get_prop('user', $propname);
# see if we know how to handle this parameter
if ($propname eq 'external_foaf_url' && $prop->{cldversion} == 0) {
# this one is simple; we're moving this one to the clusters 1000 users at a time
print "Beginning initial data migration...\n";
cluster_property($prop);
# update the property to be indexed
print "Updating property data in userproplist...\n";
update_property($prop, { indexed => 0, cldversion => 4 });
# strongly recommend a restart
print "\n";
print "* " x 38 . "\n";
print "WARNING: It is recommended that you restart your web nodes now to cause the\n";
print " updated property to start to take effect. Please press enter when\n";
print " this is done.\n";
print "* " x 38 . "\n";
readline STDIN;
# now let's hope they restarted and let's migrate anybody who is still stuck
print "Beginning final data migration...\n";
cluster_property($prop);
# done
print "Finished migrating external_foaf_url property.\n";
} else {
# don't know how to handle it
die "ERROR: don't know how to handle '$propname' (has it already been handled?)\n";
}
##############################################################################
### helper subs
sub cluster_property {
my $prop = shift;
# some state tracking information
my (%dbcms); # ( clusterid => dbcm )
my (%to_write); # ( clusterid => [ [ userid, value ], [ userid, value ], ... ]
# note: livejournal has only about 7500 external_foaf_urls... those should
# be moved in less time than any database handle will time out, so I've made
# the decision not to worry about handle timeouts right now. all other sites
# are probably no more than this size, so it should be fine for everybody.
# setup our flushing sub that we'll need later
my $flush = sub {
my $cid = shift;
# get the ref from to_write etc
my $aref = $to_write{$cid};
delete $to_write{$cid};
# get handle to database if needed
my $dbcm = $dbcms{$cid} || ($dbcms{$cid} = LJ::get_cluster_master($cid));
# notice that we're flushing data
print "\tflushing " . scalar(@$aref) . " items to cluster $cid...";
# now construct SQL
my $repstr = join(', ', map { "($_->[0], $prop->{upropid}, " .
$dbcm->quote($_->[1]) . ")" } @$aref);
$dbcm->do("REPLACE INTO userproplite2 (userid, upropid, value) VALUES $repstr");
die "ERROR: database: " . $dbcm->errstr . "\n" if $dbcm->err;
# done, status update
print "flushed\n";
};
# start our main loop
while (1) {
# data storage for each loop
my (%users, %values); # ( userid => user object or value )
# clear our handles
$LJ::DBIRole->flush_cache();
# get main database handle
my $dbh = LJ::get_db_writer();
# select up to 1000 userid:value tuples
print "Getting values...";
my $vals = $dbh->selectall_arrayref
('SELECT userid, value FROM userprop WHERE upropid = ? LIMIT 1000',
undef, $prop->{upropid});
die "ERROR: database: " . $dbh->errstr . "\n" if $dbh->err;
print "got " . scalar(@$vals) . " values.\n";
# short circuit if we have 0
return 1 if scalar @$vals == 0;
# get the userids to load
my @to_load;
foreach my $row (@$vals) {
my ($userid, $value) = @$row;
$values{$userid} = $value;
push @to_load, $userid;
}
# now load the users in one big grab
print "Loading users...";
LJ::load_userids_multiple([ map { $_ => \$users{$_} } @to_load ]);
print "loaded.\n";
# now push data onto the cluster lists
while (my ($userid, $value) = each %values) {
my $cid = $users{$userid}->{clusterid};
# clusterid 0 means the user is expunged or somesuch, so we
# don't weant to migrate their settings anywhere and should
# just delete it.
next unless $cid;
# now push this onto the to_write array
$to_write{$cid} ||= [];
push @{$to_write{$cid}}, [ $userid, $value ];
# now, flush this list if it's large (100 or more)
$flush->($cid) if scalar @{$to_write{$cid}} >= 100;
}
# now flush everything that's left
$flush->($_) foreach keys %to_write;
# now delete from the global for items that we've written
print "Deleting " . scalar(keys %values) . " items from global...";
my $instr = join(',', map { $_ + 0 } keys %values);
$dbh->do("DELETE FROM userprop WHERE upropid = $prop->{upropid} AND userid IN ($instr)");
die "ERROR: database: " . $dbh->errstr . "\n" if $dbh->err;
print "deleted.\n";
# last if we had less than 1000 this time
last if scalar @$vals < 1000;
}
}
sub update_property {
my ($prop, $sets) = @_;
die "ERROR: nothing to set\n" unless %$sets;
# now make the updates they want
my $dbh = LJ::get_db_writer();
my $updstr = join(', ', map { "$_ = " . $dbh->quote($sets->{$_}) } keys %$sets);
$dbh->do("UPDATE userproplist SET $updstr WHERE upropid = $prop->{upropid}");
die "ERROR: database: " . $dbh->errstr . "\n" if $dbh->err;
}