#!/usr/bin/perl use strict; use lib "$ENV{LJHOME}/cgi-bin"; require 'ljlib.pl'; use LJ::Blob; use LJ::User; use Getopt::Long; use IPC::Open3; use Digest::MD5; # this script is a migrater that will move phone posts from an old storage method # into mogilefs. # the basic theory is that we iterate over all clusters, find all phoneposts that # aren't in mogile right now, and put them there # determine my ($one, $besteffort, $dryrun, $user, $verify, $verbose, $clusters, $purge); my $rv = GetOptions("best-effort" => \$besteffort, "one" => \$one, "dry-run" => \$dryrun, "user=s" => \$user, "verify" => \$verify, "verbose" => \$verbose, "purge-old" => \$purge, "clusters=s" => \$clusters,); unless ($rv) { die <{phoneposts}; die "Unable to find MogileFS object (\%LJ::MOGILEFS_CONFIG not setup?)\n" unless $LJ::MogileFS; # setup stderr if we're in best effort mode if ($besteffort) { my $oldfd = select(STDERR); $| = 1; select($oldfd); } # operation modes if ($user) { # move a single user my $u = LJ::load_user($user); die "No such user: $user\n" unless $u; handle_userid($u->{userid}, $u->{clusterid}); } else { # parse the clusters my @clusters; if ($clusters) { if ($clusters =~ /^(\d+)(?:-(\d+))?$/) { my ($min, $max) = map { $_ + 0 } ($1, $2 || $1); push @clusters, $_ foreach $min..$max; } else { die "Error: --clusters argument not of right format.\n"; } } else { @clusters = @LJ::CLUSTERS; } # now iterate over the clusters to pick my $ctotal = scalar(@clusters); my $ccount = 0; foreach my $cid (sort { $a <=> $b } @clusters) { # status report $ccount++; print "\nChecking cluster $cid...\n\n"; # get a handle my $dbcm = get_db_handle($cid); # get all userids print "Getting userids...\n"; my $limit = $one ? 'LIMIT 1' : ''; my $userids = $dbcm->selectcol_arrayref ("SELECT DISTINCT userid FROM phonepostentry WHERE (location='blob' OR location IS NULL) $limit"); my $total = scalar(@$userids); # iterate over userids my $count = 0; print "Beginning iteration over userids...\n"; foreach my $userid (@$userids) { # move this phonepost my $extra = sprintf("[%6.2f%%, $ccount of $ctotal] ", (++$count/$total*100)); handle_userid($userid, $cid, $extra); } # don't hit up more clusters last if $one; } } print "\n"; print "Updater terminating.\n"; ############################################################################# ### helper subs down here # take a userid and move their phone posts sub handle_userid { my ($userid, $cid, $extra) = @_; # load user to move and do some sanity checks my $u = LJ::load_userid($userid); unless ($u) { LJ::end_request(); LJ::start_request(); $u = LJ::load_userid($userid); } die "ERROR: Unable to load userid $userid\n" unless $u; # if they're expunged, they might have data somewhere if they were # copy-moved from A to B, then expunged on B. now we're on A and # need to delete it ourselves (if purge-old is on) if ($u->{clusterid} == 0 && $u->{statusvis} eq "X") { return unless $purge; # if we get here, the user has indicated they want data purged, get handle my $to_purge_dbcm = get_db_handle($cid); my $ct = $to_purge_dbcm->do("DELETE FROM phonepostentry WHERE userid = ?", undef, $u->{userid}); print "\tnotice: purged $ct old rows.\n\n" if $verbose; return; } # get a handle my $dbcm = get_db_handle($u->{clusterid}); # print that we're doing this user print "$extra$u->{user}($u->{userid})\n"; # if a user has been moved to another cluster, but the source data from # phonepostentry wasn't deleted, we need to ignore the user or purge their data if ($u->{clusterid} != $cid) { return unless $purge; # verify they have some rows on the new side my $count = $dbcm->selectrow_array ("SELECT COUNT(*) FROM phonepostentry WHERE userid = ?", undef, $u->{userid}); return unless $count; # if we get here, the user has indicated they want data purged, get handle my $to_purge_dbcm = get_db_handle($cid); # delete the old data if ($dryrun) { print "\tnotice: need to delete phonepostentry rows.\n\n" if $verbose; } else { my $ct = $to_purge_dbcm->do("DELETE FROM phonepostentry WHERE userid = ?", undef, $u->{userid}); print "\tnotice: purged $ct old rows.\n\n" if $verbose; } # nothing else to do here return; } # get all their photos that aren't in mogile already my $rows = $dbcm->selectall_arrayref ("SELECT filetype, blobid FROM phonepostentry WHERE userid = ? ". "AND (location = 'blob' OR location IS NULL)", undef, $u->{userid}); return unless @$rows; # if a user has been moved to another cluster, but the source data from # phonepost2 wasn't deleted, we need to ignore the user return unless $u->{clusterid} == $cid; # now we have a userid and blobids, get the photos from the blob server foreach my $row (@$rows) { my ($filetype, $blobid) = @$row; print "\tstarting move for blobid $blobid\n" if $verbose; my $format = { 0 => 'mp3', 1 => 'ogg', 2 => 'wav' }->{$filetype}; my $data = LJ::Blob::get($u, "phonepost", $format, $blobid); # get length my $len = length($data); if (! $len) { my $has_blob = $dbcm->selectrow_array("SELECT COUNT(*) FROM userblob WHERE ". "journalid=? AND domain=? AND blobid=?", undef, $u->{userid}, LJ::get_blob_domainid("phonepost"), $blobid); if (! $has_blob) { $dbcm->do("UPDATE phonepostentry SET location='none' ". "WHERE userid=? AND blobid=?", undef, $u->{userid}, $blobid); print "\tnote: changed phonepost entry location to 'none'\n\n" if $verbose; next; } } if ($besteffort && !$len) { print STDERR "empty_phonepost userid=$u->{userid} blobid=$blobid\n"; print "\twarning: empty phonepost.\n\n" if $verbose; next; } die "Error: data from blob empty ($u->{user}, 'phonepost', $format, $blobid)\n" unless $len; # get filehandle to Mogile and put the file there print "\tdata length = $len bytes, uploading to MogileFS...\n" if $verbose; my $fh = $LJ::MogileFS->new_file("pp:$u->{userid}:$blobid", 'phoneposts'); if ($besteffort && !$fh) { print STDERR "new_file_failed userid=$u->{userid} blobid=$blobid\n"; print "\twarning: failed in call to new_file\n\n" if $verbose; next; } die "Unable to get filehandle to save file to MogileFS\n" unless $fh; # now save the file and close the handles $fh->print($data); my $rv = $fh->close; if ($besteffort && !$rv) { print STDERR "close_failed userid=$u->{userid} blobid=$blobid reason=$@\n"; print "\twarning: failed in call to cloes: $@\n\n" if $verbose; next; } die "Unable to save file to MogileFS: $@\n" unless $rv; # extra verification if ($verify) { my $data2 = $LJ::MogileFS->get_file_data("pp:$u->{userid}:$blobid"); my $eq = ($data2 && $$data2 eq $data) ? 1 : 0; if ($besteffort && !$eq) { print STDERR "verify_failed userid=$u->{userid} blobid=$blobid\n"; print "\twarning: verify failed; phone post not updated\n\n" if $verbose; next; } die "\tERROR: phone post NOT stored successfully, content mismatch\n" unless $eq; print "\tverified length = " . length($$data2) . " bytes...\n" if $verbose; } # done moving this phone post unless ($dryrun) { print "\tupdating database for this phone post...\n" if $verbose; $dbcm->do("UPDATE phonepostentry SET location = 'mogile' WHERE userid = ? AND blobid = ?", undef, $u->{userid}, $blobid); } # get the paths so the user can verify if they want if ($verbose) { my @paths = $LJ::MogileFS->get_paths("pp:$u->{userid}:$blobid", 1); print "\tverify mogile path: $_\n" foreach @paths; print "\tphone post update complete.\n\n"; } } } # a sub to get a cluster handle and set it up for our use sub get_db_handle { my $cid = shift; my $dbcm = LJ::get_cluster_master({ raw => 1 }, $cid); unless ($dbcm) { print STDERR "handle_unavailable clusterid=$cid\n"; die "ERROR: unable to get raw handle to cluster $cid\n"; } eval { $dbcm->do("SET wait_timeout = 28800"); die $dbcm->errstr if $dbcm->err; }; die "Couldn't set wait_timeout on $cid: $@\n" if $@; $dbcm->{'RaiseError'} = 1; return $dbcm; }