ljr/livejournal/bin/upgrading/d4d5-global.pl

321 lines
10 KiB
Perl
Raw Normal View History

2019-02-05 21:49:12 +00:00
#!/usr/bin/perl
#
use strict;
$| = 1;
require "$ENV{'LJHOME'}/cgi-bin/ljlib.pl";
my $BLOCK_MOVE = 5000; # user rows to get at a time before moving
my $BLOCK_INSERT = 25; # rows to insert at a time when moving users
my $BLOCK_UPDATE = 1000; # users to update at a time if they had no data to move
# used for keeping stats notes
my %stats = (); # { 'action' => { 'pass' => { 'stat' => 'value' } } }
# rows come order by user, keep last username so we can avoid dups
my $lastuser;
# what pass are we on?
my $pass;
my $get_db_writer = sub {
return LJ::get_dbh({raw=>1}, "master");
};
my $get_db_slow = sub {
return LJ::get_dbh({raw=>1}, "slow", "master");
};
my $get_cluster_master = sub {
my $cid = shift;
return LJ::get_dbh({raw=>1}, "cluster$cid");
};
# percentage complete
my $status = sub {
my ($ct, $tot, $units, $pass, $user) = @_;
my $len = length($tot);
my $passtxt = $pass ? "[Pass: $pass] " : '';
my $usertxt = $user ? " Moving user: $user" : '';
return sprintf(" $passtxt\[%6.2f%%: %${len}d/%${len}d $units]$usertxt\n",
($ct / $tot) * 100, $ct, $tot);
};
my $header = sub {
my $size = 50;
return "\n" .
("#" x $size) . "\n" .
"# $_[0] " . (" " x ($size - length($_[0]) - 4)) . "#\n" .
("#" x $size) . "\n\n";
};
# mover function
my $move_user = sub {
my $user = shift;
# if the current user is the same as the last,
# we have a duplicate so skip it
return 0 if $user eq $lastuser;
$lastuser = $user;
my $u = LJ::load_user($user);
return 0 unless $u->{'dversion'} == 4;
# update user count for this pass
$stats{'move'}->{$pass}->{'user_ct'}++;
# print status
print $status->($stats{'move'}->{$pass}->{'ct'},
$stats{'move'}->{$pass}->{'total'}, "rows", $pass, $user);
# ignore expunged users
if ($u->{'statusvis'} eq "X") {
LJ::update_user($u, { 'dversion' => 5 })
or die "error updating dversion";
$u->{'dversion'} = 5; # update local copy in memory
return 1;
}
# get a handle for every user to revalidate our connection?
my $dbh = $get_db_writer->()
or die "Can't connect to global master";
my $dbslo = $get_db_slow->()
or die "Can't connect to global slow master";
my $dbcm = $get_cluster_master->($u->{'clusterid'})
or die "Can't connect to cluster master ($u->{'clusterid'})";
# be careful, we're moving data
foreach my $db ($dbh, $dbslo, $dbcm) {
$db->do("SET wait_timeout=28800");
$db->{'RaiseError'} = 1;
}
my @map = (['style' => 's1style',
qw(styleid userid styledes type formatdata is_public
is_embedded is_colorfree opt_cache has_ads lastupdate) ],
['overrides' => 's1overrides',
qw(userid override) ]
);
# in moving, s1stylecache gets special cased because its
# name hasn't changed, only location. so if there is only
# one cluster, then there's no point in physically moving it
if (@LJ::CLUSTERS > 1) {
push @map, ['s1stylecache' => 's1stylecache',
qw(styleid cleandate type opt_cache vars_stor vars_cleanver) ];
}
# user 'system' is a special case. if we encounter this user we'll swap $dbcm
# to secretly be a $dbh. because the 'system' user uses the clustered tables
# on the global dbs, the queries will still work, we just need to misdirect them
$dbcm = $dbh if $u->{'user'} eq 'system';
# styleids to delete since s1stylemap isn't keyed on user
my @delete_styleids = ();
# update tables
foreach my $tableinf (@map) {
# find src and dest table names
my $src_table = shift @$tableinf;
my $dest_table = shift @$tableinf;
# if this is the style table, replace into stylemap here,
# so that if this process is killed, style and stylemap
# won't get too out of sync
my $do_stylemap = $src_table eq 'style' && $dest_table eq 's1style';
# find what columns this table has
my @cols = @$tableinf;
my $cols = join(",", @cols);
my $bind_row = "(" . join(",", map { "?" } @cols) . ")";
my (@bind, @vals);
my (@map_bind, @map_vals);
# flush rows to destination table
my $flush = sub {
return unless @bind;
# insert data
my $bind = join(",", @bind);
$dbcm->do("REPLACE INTO $dest_table ($cols) VALUES $bind", undef, @vals);
# insert new styles into s1stylemap
if ($do_stylemap) {
my $map_bind = join(",", @map_bind);
$dbh->do("REPLACE INTO s1stylemap (styleid, userid) VALUES $map_bind",
undef, @map_vals);
}
# reset values
@bind = ();
@vals = ();
};
# s1stylecache is the only table keyed on styleid, not user
my $where = "user=" . $dbh->quote($u->{'user'});
if ($src_table eq "s1stylecache") {
my $ids = $dbh->selectcol_arrayref("SELECT styleid FROM style WHERE user=?",
undef, $u->{'user'});
my $ids_in = join(",", map { $dbh->quote($_) } @$ids) || "0";
$where = "styleid IN ($ids_in)";
}
# select from source table and build data for insert
my $sth = $dbh->prepare("SELECT * FROM $src_table WHERE $where");
$sth->execute();
while (my $row = $sth->fetchrow_hashref) {
# so that when we look for userid later, it'll be there
$row->{'userid'} = $u->{'userid'};
# build data for insert
push @bind, $bind_row;
push @vals, $row->{$_} foreach @cols;
# special case: insert new s1styles into s1stylemap
if ($do_stylemap) {
push @map_bind, "(?,?)";
push @map_vals, ($row->{'styleid'}, $u->{'userid'});
push @delete_styleids, $row->{'styleid'};
}
# increment the count for this pass, style or overrides
$stats{'move'}->{$pass}->{'ct'}++
unless $src_table eq 's1stylecache';
# flush if we've reached our insert limit
$flush->() if @bind > $BLOCK_INSERT;
}
$flush->();
}
# haven't died yet? everything is still going okay
# update dversion
LJ::update_user($u, { 'dversion' => 5 })
or die "error updating dversion";
$u->{'dversion'} = 5; # update local copy in memory
return 1;
};
my $dbh = $get_db_writer->();
die "Could not connect to global master" unless $dbh;
$dbh->{'RaiseError'} = 1;
my $dbslo = $get_db_slow->();
die "Could not connect to global slow master" unless $dbslo;
$dbslo->{'RaiseError'} = 1;
my $ts = $dbslo->selectrow_hashref("SHOW TABLE STATUS LIKE 'overrides'");
if ($ts->{'Type'} eq 'ISAM') {
die "This script isn't efficient with ISAM tables. Please convert to MyISAM with:\n" .
" mysql> ALTER TABLE overrides TYPE=MyISAM;\n\n" .
"Then re-run this script.\n";
}
print $header->("Moving user data");
# first pass should get everything, second pass will
# get changes made during the first
foreach my $p (1..2) {
# this is strange. perl bug?
$pass = $p;
# get totals from overrides and style so we can do a percentage bar
# there will be overlaps in this when users have both styles and
# overrides, but we'll fix those up as we go
foreach (qw(style overrides)) {
$stats{'move'}->{$pass}->{'total'} += $dbslo->selectrow_array("SELECT COUNT(*) FROM $_");
}
print "Processing $stats{'move'}->{$p}->{'total'} total rows\n";
# 2 passes, so we catch people with styles & overrides,
# styles w/o overrides, and overrides w/o styles
foreach my $table (qw(style overrides)) {
$lastuser = '';
# loop until we have no more users to convert
my $ct;
do {
# get blocks of $BLOCK_MOVE users at a time
my $sth = $dbslo->prepare("SELECT user FROM $table WHERE user>? " .
"ORDER BY user LIMIT $BLOCK_MOVE");
$sth->execute($lastuser);
$ct = 0;
while (my $user = $sth->fetchrow_array) {
$move_user->($user);
$ct++;
}
} while $ct;
}
print $stats{'move'}->{$p}->{'user_ct'}+0 . " users moved\n\n";
}
# now we're confident that all users have had their data moved if
# necessary, so we can just unconditionally change dversions.
print $header->("Updating remaining users");
$stats{'update'}->{'total'} = $dbslo->selectrow_array("SELECT COUNT(*) FROM user WHERE dversion=4");
print "Converting $stats{'update'}->{'total'} users\n";
# now update dversions for users who had no data to move
if ($stats{'update'}->{'total'}) {
my $get_users = sub {
my $sth = $dbslo->prepare("SELECT userid, user FROM user " .
"WHERE dversion=4 LIMIT $BLOCK_UPDATE");
$sth->execute();
my @rows; # [ userid, user ]
while ( my ($userid, $user) = $sth->fetchrow_array) {
push @rows, [ $userid, $user ];
}
return @rows;
};
while (my @rows = $get_users->()) {
# update database
my $bind = join(",", map { "?" } @rows);
my @vals = map { $_->[0] } @rows;
$dbh->do("UPDATE user SET dversion=5 WHERE userid IN ($bind)",
undef, @vals);
# update memcache
foreach (@rows) {
LJ::MemCache::delete([$_->[0], "userid:" . $_->[0]]);
LJ::MemCache::delete([$_->[1], "user:" . $_->[1]]);
}
$stats{'update'}->{'ct'} += @rows;
print $status->($stats{'update'}->{'ct'}, $stats{'update'}->{'total'}, "users");
}
}
my $zeropad = sub {
return sprintf("%d", $_[0]);
};
# calculate total move stats
foreach (1..2) {
$stats{'move'}->{'total_ct'} += $stats{'move'}->{$_}->{'ct'};
$stats{'move'}->{'total_user_ct'} += $stats{'move'}->{$_}->{'user_ct'};
}
print $header->("Dversion 4->5 conversion completed");
print " Rows moved: " . $zeropad->($stats{'move'}->{'total_ct'}) . "\n";
print " Users moved: " . $zeropad->($stats{'move'}->{'total_user_ct'}) . "\n";
print "Users updated: " . $zeropad->($stats{'update'}->{'ct'}) . "\n\n";