1150 lines
34 KiB
Perl
Executable File
1150 lines
34 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
##############################################################################
|
|
|
|
=head1 NAME
|
|
|
|
ljubackup - Per-user backup to mogilefs backend
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
$ ljubackup OPTIONS USERNAME
|
|
$ ljubackup --unlock
|
|
|
|
=head2 OPTIONS
|
|
|
|
=over 4
|
|
|
|
=item -h, --help
|
|
|
|
Output a help message and exit.
|
|
|
|
=item -d, --debug
|
|
|
|
Output debugging information in addition to normal progress messages.
|
|
|
|
=item -m, --max=<count>
|
|
|
|
Back up at most I<count> users before terminating.
|
|
|
|
=item -t, --test
|
|
|
|
Just test the backup code, don't actually insert the backup db.
|
|
|
|
=item -v, --verbose
|
|
|
|
Output verbose progress information.
|
|
|
|
=item -T, --threads=<count>
|
|
|
|
Specify how many threads (subprocesses) to start with for the move. Settings in
|
|
C<$ENV{LJHOME}/var/backup-workers> are overridden by this setting.
|
|
|
|
=item -u,--unlock
|
|
|
|
Run a query against the backup agent's "in-progress" table, confirming agents
|
|
listed there are still active before starting backups.
|
|
|
|
=head1 REQUIRES
|
|
|
|
I<Token requires line>
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This is a command-line tool which does mass user-backup operations. It drives
|
|
multiple invocations of the ljbackup.pl program for users listed in the
|
|
C<backupdirty> table.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Michael Granger E<lt>ged@FaerieMUD.orgE<gt>
|
|
|
|
Copyright (c) 2003, 2004 Danga Interactive. All rights reserved.
|
|
|
|
=cut
|
|
|
|
##############################################################################
|
|
package ljubackup;
|
|
use strict;
|
|
use warnings qw{all};
|
|
|
|
|
|
###############################################################################
|
|
### I N I T I A L I Z A T I O N
|
|
###############################################################################
|
|
BEGIN {
|
|
|
|
# Turn STDOUT buffering off
|
|
$| = 1;
|
|
|
|
# Versioning stuff and custom includes
|
|
use vars qw{$VERSION $RCSID $AUTOLOAD};
|
|
$VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
|
|
$RCSID = q$Id: ljubackup.pl,v 1.1 2004/08/20 23:51:15 deveiant Exp $;
|
|
|
|
# Define some constants
|
|
use constant TRUE => 1;
|
|
use constant FALSE => 0;
|
|
|
|
use lib ( "$ENV{LJHOME}/cgi-bin" );
|
|
|
|
# Modules
|
|
use Getopt::Long qw{GetOptions};
|
|
use Pod::Usage qw{pod2usage};
|
|
use IO::File qw{};
|
|
use Fcntl qw{O_RDONLY};
|
|
use Digest::MD5 qw{md5_base64};
|
|
use Data::Dumper qw{};
|
|
use IO::Socket qw{};
|
|
use Sys::Hostname qw{hostname};
|
|
use Time::HiRes qw{gettimeofday};
|
|
use LJ::User qw{};
|
|
|
|
# LiveJournal functions
|
|
require "ljconfig.pl";
|
|
|
|
# Turn on option bundling (-vid)
|
|
Getopt::Long::Configure( "bundling" );
|
|
|
|
$Data::Dumper::Terse = 1;
|
|
$Data::Dumper::Indent = 0;
|
|
}
|
|
|
|
sub backupUsers ($$$$$$);
|
|
sub parseCommand ($);
|
|
sub parseCluster ($);
|
|
sub cleanup ();
|
|
sub unlockStaleUsers ();
|
|
sub startDaemon ();
|
|
sub daemonRoutine ($$);
|
|
sub abort (@);
|
|
|
|
|
|
###############################################################################
|
|
### C O N F I G U R A T I O N G L O B A L S
|
|
###############################################################################
|
|
our (
|
|
$Debug, $VerboseFlag, $DaemonPid, $BackupAgentWorkersFile, $ActiveDaysDefault,
|
|
);
|
|
|
|
# -d and -v option flags
|
|
$Debug = FALSE;
|
|
$VerboseFlag = FALSE;
|
|
|
|
|
|
# The PID of the distributed lock daemon
|
|
$DaemonPid = undef;
|
|
|
|
# The path to the file that controls the number of running threads.
|
|
$BackupAgentWorkersFile = "$ENV{LJHOME}/var/userbackup-workers";
|
|
|
|
# The number of days to use as the threshold for activity if the "+active" flag
|
|
# is given.
|
|
$ActiveDaysDefault = 30;
|
|
|
|
|
|
|
|
### Main body
|
|
MAIN: {
|
|
my (
|
|
$helpFlag, # User requested help?
|
|
$testingMode, # Test-only mode
|
|
$command, # Command iterator
|
|
$usercount, # Total users moved
|
|
$max, # Max users to move
|
|
$threads, # The number of threads to use while backing up
|
|
$unlockMode, # Run an unlock cycle?
|
|
$instanceId, # The unique instance id of this invocation
|
|
$daemonPid, # The pid of the distributed lock daemon
|
|
$daemonIp, # The IP the daemon is listening on
|
|
$daemonPort, # The ephemeral port the daemon is listening on
|
|
$ifh, # Input filehandle
|
|
$mogfs, # MogileFS handle for 'userbackup' domain
|
|
@commands, # Move commands
|
|
);
|
|
|
|
GetOptions(
|
|
'd|debug+' => \$Debug,
|
|
'v|verbose' => \$VerboseFlag,
|
|
'h|help' => \$helpFlag,
|
|
'm|max=i' => \$max,
|
|
't|test' => \$testingMode,
|
|
'T|threads=i' => \$threads,
|
|
'u|unlock' => \$unlockMode,
|
|
) or abortWithUsage();
|
|
|
|
# If the -h flag was given, just show the usage and quit
|
|
helpMode() and exit if $helpFlag;
|
|
verboseMsg( "Starting up." );
|
|
$usercount = 0;
|
|
|
|
DBI->trace( $Debug - 1 ) if $Debug >= 2;
|
|
|
|
# If there's a MogileFS instance, test it for the required domain
|
|
if ( defined %LJ::MOGILEFS_CONFIG ) {
|
|
$MogileFS::DEBUG = $Debug;
|
|
|
|
my %mogconfig = ( %LJ::MOGILEFS_CONFIG, domain => 'userbackup' );
|
|
$mogfs = MogileFS->new( %mogconfig )
|
|
or abort( "Couldn't create a MogileFS handle." );
|
|
}
|
|
|
|
# Otherwise we can't continue
|
|
else {
|
|
abort( "Requires MogileFS configuration. Check your ljconfig.pl." );
|
|
}
|
|
|
|
unlockStaleUsers() if $unlockMode;
|
|
( $instanceId, $daemonIp, $daemonPort ) = startDaemon();
|
|
|
|
# Set signal handlers
|
|
$SIG{HUP} = sub { abort "Caught SIGHUP." };
|
|
$SIG{INT} = sub { abort "Interrupted." };
|
|
$SIG{TERM} = sub { abort "Terminated." };
|
|
|
|
# Now run the given move commands
|
|
backupUsers( $mogfs, $testingMode, $threads, $instanceId, $daemonIp, $daemonPort );
|
|
|
|
# Run any needed cleanup functions
|
|
cleanup();
|
|
}
|
|
|
|
|
|
### FUNCTION: cleanup()
|
|
### Clean up any children that are still running.
|
|
sub cleanup () {
|
|
kill 'TERM', $DaemonPid;
|
|
}
|
|
|
|
|
|
#####################################################################
|
|
### D A E M O N ( M U L T I - M O V E R ) F U N C T I O N S
|
|
#####################################################################
|
|
|
|
### FUNCTION: unlockStaleUsers()
|
|
### Traverse the clustermove_inprogress table, confirming that each entry
|
|
### belongs to an active process, removing those that don't.
|
|
sub unlockStaleUsers () {
|
|
my (
|
|
$sql, # SQL query source
|
|
$dbh, # Database handle (writer)
|
|
$selsth, # SELECT statement handle
|
|
$delsth, # DELETE statement handle
|
|
$row, # Selected row hashref
|
|
$sock, # Query socket
|
|
%cachedReply, # Cached replies: {$instance => $bool} (running or not)
|
|
);
|
|
|
|
verboseMsg( "Cleaning up the in-progress table." );
|
|
|
|
$sql = q{
|
|
SELECT *
|
|
FROM clustermove_inprogress
|
|
};
|
|
|
|
# Get a select cursor for the in-progress table
|
|
$dbh = LJ::get_db_writer() or abort( "Couldn't fetch a database handle." );
|
|
$selsth = $dbh->prepare( $sql )
|
|
or abort( "prepare: $sql: ", $dbh->errstr );
|
|
$selsth->execute or abort( "execute: $sql: ", $selsth->errstr );
|
|
|
|
# Get a deletion cursor for it too.
|
|
$sql = q{
|
|
DELETE FROM clustermove_inprogress
|
|
WHERE userid = ?
|
|
};
|
|
$delsth = $dbh->prepare( $sql )
|
|
or abort( "prepare: $sql: ", $dbh->errstr );
|
|
$delsth->{ShowErrorStatement} = 1;
|
|
|
|
# Fetch each record, connect to the given host/port for each backup agent
|
|
# and deleting entries for those found not to be running.
|
|
while (( $row = $selsth->fetchrow_hashref )) {
|
|
my ( $host, $port, $instance, $userid ) =
|
|
@{$row}{'moverhost','moverport','moverinstance','userid'};
|
|
my $ip = join '.',
|
|
reverse map { ($host >> $_ * 8) & 0xff } 0..3;
|
|
|
|
# If the host hasn't been contacted yet, do so now
|
|
if ( !exists $cachedReply{$instance} ) {
|
|
debugMsg( "Contacting process at %s:%d (%s) for user %d",
|
|
$ip, $port, $instance, $userid );
|
|
|
|
# If the connection succeeds and replies with the correct response,
|
|
# then the entry's okay
|
|
if (( $sock = new IO::Socket::INET("$ip:$port") )) {
|
|
my $reply = $sock->getline;
|
|
$sock->close;
|
|
debugMsg( "Got reply '%s' from process at %s:%d", $reply, $host, $port );
|
|
$cachedReply{$instance} = ($instance eq $reply ? 1 : 0);
|
|
}
|
|
|
|
# Connection error
|
|
else {
|
|
debugMsg( "Couldn't open a socket to $ip:$port: $!" );
|
|
$cachedReply{$instance} = '';
|
|
}
|
|
}
|
|
|
|
# If the cached value indicates it's an invalid record, delete it.
|
|
if ( !$cachedReply{$instance} ) {
|
|
debugMsg( "Removing stale lock set by %s:%d on %s for uid %d",
|
|
$host, $port, scalar localtime($row->{locktime}), $userid );
|
|
my $user = lookup LJ::User $userid;
|
|
$user->make_readwrite;
|
|
$delsth->execute( $userid )
|
|
or abort( "execute: $userid: ", $delsth->errstr );
|
|
} else {
|
|
debugMsg( "Keeping lock set by %s:%d on %s for uid %d",
|
|
$host, $port, scalar localtime($row->{locktime}), $userid );
|
|
}
|
|
}
|
|
$delsth->finish;
|
|
$selsth->finish;
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
### FUNCTION: startDaemon()
|
|
### Start a daemon process on an ephemeral port to support distributed
|
|
### moves. This function returns a list which consists of an I<instanceId>, the
|
|
### ip address of the listener, and the port of the listener. The I<instanceId>,
|
|
### which is a 22-character-long (e.g., MD5 hash in base64) string which
|
|
### uniquely identifies this instance, should be used in the 'moverinstance'
|
|
### field of the clustermove_inprogress' table when locking users for
|
|
### backup. When anything connects to the opened port, the daemon writes its
|
|
### I<instanceId> to the socket and shuts the socket down. This function also
|
|
### sets $DaemonPid to the process id of the forked child.
|
|
sub startDaemon () {
|
|
my (
|
|
$seed, # The source string for the instance id
|
|
$id, # The instance id
|
|
$lsock, # Locking socket
|
|
$host, # Hostname to listen on
|
|
$ip, # The ip of the listener socket
|
|
$port, # The port number the listener socket is listening to
|
|
);
|
|
|
|
verboseMsg( "Starting distributed lock daemon." );
|
|
|
|
# Create the "instance id"
|
|
$seed = join( ':', $$, (gettimeofday), hostname );
|
|
$id = md5_base64( $seed );
|
|
|
|
# Create the listener socket
|
|
$host = hostname();
|
|
$lsock = new IO::Socket::INET(
|
|
Listen => 4,
|
|
LocalAddr => $host,
|
|
#LocalPort => 0, # Kernel chooses ephemeral port
|
|
Reuse => 1 ) # SO_REUSEADDR
|
|
or abort( "Could not open listener socket: $!" );
|
|
|
|
$ip = sprintf '%vd', $lsock->sockaddr;
|
|
$port = $lsock->sockport;
|
|
|
|
if (( $DaemonPid = fork )) {
|
|
debugMsg( "Started daemon (%d) at %s:%d with id = '%s'",
|
|
$DaemonPid, $ip, $port, $id );
|
|
$lsock->close;
|
|
}
|
|
|
|
else {
|
|
LJ::disconnect_dbs();
|
|
daemonRoutine( $lsock, $id );
|
|
exit;
|
|
}
|
|
|
|
return ( $id, $ip, $port );
|
|
}
|
|
|
|
|
|
### FUNCTION: daemonRoutine( $socket, $instanceId )
|
|
### Listen to the given I<socket>, writing the specified I<instanceId> to any
|
|
### connecting client.
|
|
sub daemonRoutine ($$) {
|
|
my ( $listener, $id ) = @_;
|
|
|
|
while (( my $sock = $listener->accept )) {
|
|
$sock->print( $id );
|
|
$sock->shutdown( 2 );
|
|
}
|
|
}
|
|
|
|
|
|
#####################################################################
|
|
### M O V E R F U N C T I O N S
|
|
#####################################################################
|
|
|
|
### FUNCTION: backupUsers( $testingMode, $maxThreads, )
|
|
### Parse the given command and run it, returning the numebr of users that were
|
|
### moved.
|
|
sub backupUsers ($$$$$$) {
|
|
my ( $mogfs, $testingMode, $maxThreads, $id, $ip, $port, $max ) = @_;
|
|
|
|
my (
|
|
$cmd,
|
|
$agent,
|
|
$count,
|
|
);
|
|
|
|
$agent = new BackupAgent (
|
|
mogilefs => $mogfs,
|
|
chunksize => 500,
|
|
maxUsers => $max,
|
|
debugFunction => \&debugMsg,
|
|
messageFunction => \&verboseMsg,
|
|
testingMode => $testingMode,
|
|
maxThreads => $maxThreads,
|
|
instanceId => $id,
|
|
lockIp => $ip,
|
|
lockPort => $port,
|
|
);
|
|
message( 'Backing up users%s: %s',
|
|
$testingMode ? " (testing mode)" : "", $agent->desc );
|
|
$count = $agent->start;
|
|
message( 'Done with %s: %d users.',
|
|
$agent->desc, $count );
|
|
|
|
return $count;
|
|
}
|
|
|
|
|
|
### Kill the daemon process if it's defined and alive
|
|
END {
|
|
if ( $DaemonPid ) {
|
|
kill 'TERM', $DaemonPid;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
#####################################################################
|
|
### U T I L I T Y F U N C T I O N S
|
|
#####################################################################
|
|
|
|
### FUNCTION: helpMode()
|
|
### Exit normally after printing the usage message
|
|
sub helpMode {
|
|
pod2usage( -verbose => 1, -exitval => 0 );
|
|
}
|
|
|
|
|
|
### FUNCTION: abortWithUsage( $message )
|
|
### Abort the program showing usage message.
|
|
sub abortWithUsage {
|
|
my $msg = join '', @_;
|
|
|
|
if ( $msg ) {
|
|
pod2usage( -verbose => 1, -exitval => 1, -message => "$msg" );
|
|
} else {
|
|
pod2usage( -verbose => 1, -exitval => 1 );
|
|
}
|
|
}
|
|
|
|
|
|
### FUNCTION: message( @messages )
|
|
### Concatenate and print the specified messages.
|
|
sub message {
|
|
my ( $format, @args ) = @_;
|
|
printf STDERR "$format\n", @args;
|
|
}
|
|
|
|
|
|
### FUNCTION: verboseMsg( @messages )
|
|
### Concatenate and print the specified messages if verbose output is turned on.
|
|
sub verboseMsg {
|
|
return unless $VerboseFlag;
|
|
message( @_ );
|
|
}
|
|
|
|
|
|
### FUNCTION: error( @messages )
|
|
### Print the specified messages to the terminal's STDERR.
|
|
sub error {
|
|
my $message = @_ ? join '', @_ : '[Mark]';
|
|
print STDERR "ERROR >>> $message <<<\n";
|
|
}
|
|
|
|
|
|
### FUNCTION: debugMsg( @messages )
|
|
### Print the specified messages to the terminal if debugging mode is activated.
|
|
sub debugMsg {
|
|
return unless $Debug;
|
|
my $format = shift;
|
|
chomp( $format );
|
|
|
|
my @args = map {
|
|
ref $_
|
|
? Data::Dumper->Dumpxs([$_])
|
|
: $_;
|
|
} @_;
|
|
|
|
my $message = sprintf( $format, @args );
|
|
print STDERR "DEBUG> $message\n";
|
|
}
|
|
|
|
|
|
### FUNCTION: abort( @messages )
|
|
### Print the specified messages to the terminal and exit with a non-zero status.
|
|
sub abort (@) {
|
|
my $msg = @_ ? join '', @_ : "unknown error";
|
|
print STDERR "Aborted: $msg.\n\n";
|
|
|
|
exit 1;
|
|
}
|
|
|
|
|
|
#####################################################################
|
|
### B A C K U P A G E N T C L A S S
|
|
#####################################################################
|
|
package BackupAgent;
|
|
|
|
BEGIN {
|
|
# LiveJournal functions
|
|
require "$ENV{'LJHOME'}/cgi-bin/ljlib.pl";
|
|
|
|
use vars qw{$AUTOLOAD};
|
|
|
|
use Carp qw{confess croak};
|
|
use Time::HiRes qw{usleep};
|
|
use POSIX qw{:sys_wait_h};
|
|
}
|
|
|
|
|
|
### METHOD: new( %args )
|
|
### Create a new BackupAgent object configured with the given I<args>.
|
|
sub new {
|
|
my $class = shift;
|
|
my %args = @_;
|
|
|
|
my $self = bless {
|
|
mogilefs => undef,
|
|
|
|
chunksize => 500,
|
|
maxUsers => 0,
|
|
maxThreads => 0,
|
|
agentWorkersMtime => 0,
|
|
|
|
userThreads => {},
|
|
activeThreads => {},
|
|
fakeMovedUsers => {},
|
|
|
|
debugFunction => undef,
|
|
messageFunction => undef,
|
|
debugMode => 0,
|
|
|
|
instanceId => undef,
|
|
lockIp => undef,
|
|
lockPort => undef,
|
|
|
|
_signals => {},
|
|
_haltFlag => 0,
|
|
_shutdownFlag => 0,
|
|
_lastStat => 0,
|
|
|
|
%args,
|
|
}, $class;
|
|
|
|
return $self;
|
|
}
|
|
|
|
|
|
### METHOD: desc()
|
|
### Return a description of the lock object.
|
|
sub desc {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
|
|
return sprintf( 'Backup Agent (chunksize: %d)', $self->{chunksize} );
|
|
}
|
|
|
|
|
|
### METHOD: debugMsg( @args )
|
|
### If the 'debugFunction' attribute of the agent object is set, call it with
|
|
### the specified I<args>.
|
|
sub debugMsg {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
return unless $self->{debugFunction};
|
|
$self->{debugFunction}( @_ );
|
|
}
|
|
|
|
|
|
### METHOD: message( @args )
|
|
### If the 'messageFunction' attribute of the agent object is set, call it with
|
|
### the specified I<args>.
|
|
sub message {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
return unless $self->{messageFunction};
|
|
$self->{messageFunction}( @_ );
|
|
}
|
|
|
|
|
|
### METHOD: start( [$max] )
|
|
### Start backing up users. If I<max> is specified, quit after the specified
|
|
### number are moved. Returns the number of users moved.
|
|
sub start {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
|
|
my (
|
|
$maxUsers,
|
|
$count,
|
|
$scale,
|
|
$maxThreads,
|
|
$oldMax,
|
|
$chunksize,
|
|
@users,
|
|
@queue,
|
|
$thread,
|
|
$uid,
|
|
$pid,
|
|
$dbh,
|
|
);
|
|
|
|
$maxUsers = $self->maxUsers || 1e+33;
|
|
$maxThreads = $oldMax = $self->{maxThreads};
|
|
$chunksize = $self->chunksize;
|
|
$chunksize = $maxUsers if $maxUsers < $chunksize;
|
|
$count = 0;
|
|
$self->setSignalHandlers;
|
|
|
|
# Iterate over all users for this worker's cluster list, $chunksize per
|
|
# cluster at a time.
|
|
USER: while ( !$self->{_haltFlag} && !$self->{_shutdownFlag} && $count < $maxUsers )
|
|
{
|
|
# Re-read the thread config each time
|
|
$maxThreads = $self->getMaxThreads( $maxThreads );
|
|
$self->debugMsg( "User loop: max threads: $maxThreads" );
|
|
|
|
# Advise the user if the thread count changes.
|
|
if ( defined $oldMax && $maxThreads != $oldMax ) {
|
|
$self->message( "Set thread count to %d (was %d)", $maxThreads, $oldMax );
|
|
$oldMax = $maxThreads;
|
|
}
|
|
|
|
# No need to do any of the rest if there's no threads to run 'em.
|
|
unless ( $maxThreads ) {
|
|
$self->message( "Idling (threads = 0)." );
|
|
$self->reapChildren;
|
|
sleep 10;
|
|
next USER;
|
|
}
|
|
|
|
# Fetch users if the buffer isn't already populated
|
|
unless ( @users ) {
|
|
@users = $self->getDirtyUsers( $chunksize )
|
|
or last USER;
|
|
$self->message( "Fetched %d dirty users.", scalar @users );
|
|
}
|
|
|
|
# Splice off some users to prepare for backup. Never splice off more
|
|
# than the maximum number of users for this run
|
|
$scale = $maxThreads * 3;
|
|
$scale = ($maxUsers - $count) if ($count + $scale) > $maxUsers;
|
|
@queue = splice( @users, 0, $scale );
|
|
|
|
# Now wrap a thread object around each user in the queue, which also
|
|
# locks each one.
|
|
@queue = map {
|
|
my $user = $_;
|
|
last USER if $self->{_haltFlag} || $self->{_shutdownFlag};
|
|
$self->debugMsg( "Creating a thread for user '%s'", $user->user );
|
|
|
|
# Create a agent thread (sets the user's read-only bit).
|
|
$self->{userThreads}{$user->userid} =
|
|
BackupAgent::Thread->new( $self->{mogilefs}, $user );
|
|
} @queue;
|
|
|
|
# Wait for the read-only bit to sink in
|
|
$self->debugMsg( "Waiting for read-only bit to sink in." );
|
|
sleep 3;
|
|
|
|
# Iterate over the thread objects, forking each one off as the
|
|
# number of active ones falls below the maximum allowed.
|
|
THREAD: foreach my $thread ( @queue ) {
|
|
last USER if $self->{_haltFlag};
|
|
|
|
# Wait until more threads can be started
|
|
until ( keys %{$self->{activeThreads}} < $maxThreads ) {
|
|
last USER if $self->{_haltFlag} || $self->{_shutdownFlag};
|
|
$self->reapChildren;
|
|
usleep 0.5;
|
|
}
|
|
|
|
# Mark the user as "in progress" by setting the destination
|
|
# cluster field. :FIXME: This is obviously stupid to disconnect
|
|
# and reconnect every time, but since the handle is b0rked after
|
|
# the ->run() below fork()s, this is necessary for it to work.
|
|
|
|
# :FIXME: Is this necessary? We obviously don't have a
|
|
# destination cluster...
|
|
|
|
#LJ::disconnect_dbs();
|
|
#$dbh = LJ::get_db_writer()
|
|
# or die "Couldn't fetch a writer.";
|
|
#$dbh->do(q{
|
|
# UPDATE clustermove_inprogress SET dstclust = ? WHERE userid = ?
|
|
#}, undef, 0, $thread->userid )
|
|
# or die "Failed to update lock: ", $dbh->errstr;
|
|
|
|
# Run the thread
|
|
$count++;
|
|
$thread->testingMode( $self->testingMode );
|
|
$self->message( "Backing up user '%s' (#%d) count: %d",
|
|
$thread->user->user, $thread->user->userid, $count );
|
|
$pid = $thread->run;
|
|
$self->{activeThreads}{ $pid } = $thread;
|
|
$self->reapChildren;
|
|
}
|
|
|
|
$self->reapChildren;
|
|
}
|
|
|
|
if ( $self->{_haltFlag} ) { $self->message( ">>> Halted by signal <<<" ) }
|
|
elsif ( $self->{_shutdownFlag} ) { $self->message( ">>> Shutdown by signal <<<" ) }
|
|
else { $self->debugMsg( "Done with thread loop." ); }
|
|
|
|
$self->restoreSignalHandlers;
|
|
|
|
# Handle threads that are still running
|
|
if ( %{$self->{activeThreads}} ) {
|
|
|
|
# Let children finish unless the process is being forcefully shut down.
|
|
unless ( $self->{_haltFlag} ) {
|
|
foreach ( 1..10 ) {
|
|
last unless %{$self->{activeThreads}};
|
|
$self->message( "Waiting for %d remaining children to finish.",
|
|
scalar keys %{$self->{activeThreads}} );
|
|
|
|
$self->reapChildren;
|
|
sleep 1;
|
|
}
|
|
}
|
|
|
|
# Kill off any remaining children if there are any
|
|
foreach my $signal ( 'TERM', 'QUIT', 'KILL' ) {
|
|
last unless %{$self->{activeThreads}};
|
|
|
|
$self->message( "Sending SIG%s to remaining %d threads.",
|
|
$signal, scalar keys %{$self->{activeThreads}} );
|
|
foreach my $pid ( keys %{$self->{activeThreads}} ) {
|
|
kill $signal, $pid if exists $self->{activeThreads}{ $pid };
|
|
}
|
|
|
|
$self->reapChildren;
|
|
} continue {
|
|
sleep 2;
|
|
};
|
|
}
|
|
|
|
# Unlock any users that didn't get moved
|
|
if ( %{$self->{userThreads}} ) {
|
|
$self->message( "Unlocking %d remaining users.", values %{$self->{userThreads}} );
|
|
|
|
foreach my $thread ( values %{$self->{userThreads}} ) {
|
|
$thread->unlock;
|
|
LJ::disconnect_dbs();
|
|
my $dbh = LJ::get_db_writer() or die "Couldn't get a db_writer.";
|
|
$dbh->do( "DELETE FROM clustermove_inprogress WHERE userid = ?",
|
|
undef, $thread->user->userid )
|
|
or die "Failed to delete user ", $thread->user->userid,
|
|
" from the in-progress table: ", $dbh->errstr;
|
|
}
|
|
}
|
|
|
|
return $count;
|
|
}
|
|
|
|
|
|
### METHOD: reapChildren()
|
|
### Collect any child processes that have died. Returns the number of processes
|
|
### reaped.
|
|
sub reapChildren {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
my $count = 0;
|
|
|
|
# Reap any child processes that need it and delete the corresponding thread
|
|
# object from the thread table. Delete the user from the user => thread map
|
|
# unless the thread is in testing mode (ie., doesn't actually remove the
|
|
# user from the source table).
|
|
while ((my $pid = waitpid( -1, WNOHANG )) > 0) {
|
|
next if $pid == $DaemonPid;
|
|
my $thread = delete $self->{activeThreads}{ $pid };
|
|
$self->{fakeMovedUsers}{$thread->user->userid} = 1 if $thread->testingMode;
|
|
delete $self->{userThreads}{$thread->user->userid};
|
|
|
|
$thread->unlock;
|
|
LJ::disconnect_dbs();
|
|
my $dbh = LJ::get_db_writer() or die "Couldn't get a db_writer.";
|
|
$dbh->do( "DELETE FROM clustermove_inprogress WHERE userid = ?",
|
|
undef, $thread->user->userid )
|
|
or die "Failed to delete user ", $thread->user->userid,
|
|
" from the in-progress table: ", $dbh->errstr;
|
|
|
|
$self->debugMsg( "Reaped child %d (uid: %d, exit: %d). %d process/es remain.",
|
|
$pid, $thread->user->userid, $?,
|
|
scalar keys %{$self->{activeThreads}} );
|
|
$count++;
|
|
}
|
|
|
|
return $count;
|
|
}
|
|
|
|
|
|
### METHOD: getDirtyUsers()
|
|
### Return users that need backup.
|
|
sub getDirtyUsers {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
my $limit = shift || 500;
|
|
|
|
my (
|
|
$sql, # SQL query string
|
|
$dbh, # Database handle
|
|
$ipsth, # INSERT cursor for the in-progress table
|
|
$selsth, # User-selection cursor
|
|
$iip, # Integer IP for insertion into the in-progress table
|
|
$row, # Row iterator
|
|
@userids, # Ids of users to back up
|
|
@users, # User objects to back up
|
|
$user, # User object iterator
|
|
);
|
|
|
|
# :FIXME: This is the only way I can make this query work. If I don't do
|
|
# this, I get "MySQL has gone away" on the second query, despite calling
|
|
# disconnect_dbs() in the thread's start() method immediately after the
|
|
# fork(), too. Perhaps I'll revisit this after hacking on DBI::Role for a
|
|
# bit.
|
|
LJ::disconnect_dbs();
|
|
$dbh = LJ::get_db_writer() or die "failed to get_db_writer()";
|
|
|
|
$sql = q{
|
|
INSERT INTO clustermove_inprogress
|
|
( userid, locktime, moverhost, moverport, moverinstance )
|
|
VALUES
|
|
( ?, ?, ?, ?, ? )
|
|
};
|
|
$ipsth = $dbh->prepare( $sql ) or die "prepare: ", $dbh->errstr;
|
|
|
|
# Pick a query based on whether the user wants only active users.
|
|
$sql = sprintf q{
|
|
SELECT userid
|
|
FROM backupdirty
|
|
ORDER BY marktime ASC
|
|
LIMIT %d
|
|
}, $limit;
|
|
|
|
# Prepare the selection cursor and execute it
|
|
$selsth = $dbh->prepare( $sql ) or die "prepare: ", $dbh->errstr;
|
|
$self->debugMsg( "Running user-select query '%s'", $sql );
|
|
$selsth->execute or die "execute: ", $selsth->errstr;
|
|
|
|
$iip = unpack( 'N', pack('C4', split( /\./, $self->lockIp )) );
|
|
|
|
# Fetch userids
|
|
@userids = grep {
|
|
!exists $self->{userThreads}{$_}
|
|
&& !exists $self->{fakeMovedUsers}{$_}
|
|
} map {
|
|
$_->[0][0]
|
|
} $selsth->fetchall_arrayref([0]);
|
|
|
|
# If there are potential users to look up, do so
|
|
if ( @userids ) {
|
|
@users = LJ::User->lookup( @userids );
|
|
|
|
for ( my $i = 0; $i <= $#users; $i++ ) {
|
|
# If the user record didn't load, remove it
|
|
unless ( defined($user = $users[$i]) ) {
|
|
$self->debugMsg( "Failed lookup for user $userids[$i]" );
|
|
splice @users, $i, 1;
|
|
splice @userids, $i, 1;
|
|
$i--;
|
|
next;
|
|
}
|
|
|
|
# If the user loaded, but won't lock for some reason, remove it
|
|
unless ( $ipsth->execute($user->userid, time, $iip,
|
|
$self->lockPort, $self->instanceId) )
|
|
{
|
|
$self->debugMsg( "Failed lock for user %s: %s", $user->user, $ipsth->errstr );
|
|
splice @users, $i, 1;
|
|
splice @userids, $i, 1;
|
|
$i--;
|
|
next;
|
|
}
|
|
|
|
$self->debugMsg( "Selected user: %s", $user->user );
|
|
}
|
|
}
|
|
|
|
$ipsth->finish;
|
|
return @users;
|
|
}
|
|
|
|
|
|
### METHOD: getMaxThreads()
|
|
### Fetch the maximum number of threads from the config file, or return a
|
|
### default if the config file doesn't exist or is unreadable.
|
|
sub getMaxThreads {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
my $maxThreads = shift;
|
|
|
|
if ( -r $BackupAgentWorkersFile ) {
|
|
$self->{agentWorkersMtime} ||= (stat _)[9];
|
|
my $mtime = $self->{agentWorkersMtime};
|
|
|
|
if ( !defined $maxThreads || (stat _)[9] > $mtime ) {
|
|
$self->{agentWorkersMtime} = (stat _)[9];
|
|
$self->message( "(Re)-reading $BackupAgentWorkersFile:\n\t%s < %s",
|
|
scalar localtime($mtime),
|
|
scalar localtime($self->{agentWorkersMtime}) );
|
|
|
|
# Read the process limit from a file, or default to unlimited
|
|
if ( open my $ifh, $BackupAgentWorkersFile ) {
|
|
chomp( $maxThreads = <$ifh> );
|
|
$maxThreads = int($maxThreads);
|
|
}
|
|
}
|
|
}
|
|
|
|
$maxThreads = 1 if !defined $maxThreads;
|
|
return $maxThreads;
|
|
}
|
|
|
|
|
|
### METHOD: setSignalHandlers()
|
|
### Set up signal handlers to toggle shutdown flags in the object, saving any
|
|
### current handlers.
|
|
sub setSignalHandlers {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
|
|
$self->debugMsg( "Installing new signal handlers." );
|
|
$self->{_signals}{HUP} = $SIG{HUP};
|
|
$SIG{HUP} = sub { $self->{_shutdownFlag} = 1 };
|
|
|
|
$self->{_signals}{INT} = $SIG{INT};
|
|
$SIG{INT} = sub { $self->{_shutdownFlag} = 1 };
|
|
|
|
$self->{_signals}{TERM} = $SIG{TERM};
|
|
$SIG{TERM} = sub { $self->{_haltFlag} = 1 };
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
### METHOD: restoreSignalHandlers()
|
|
### Restore the signal handlers that were saved by setSignalHandlers().
|
|
sub restoreSignalHandlers {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
|
|
$self->debugMsg( "Restoring initial signal handlers." );
|
|
foreach my $signal ( keys %{$self->{_signals}} ) {
|
|
$SIG{$signal} = $self->{_signals}{$signal};
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
### (PROXY) METHOD: AUTOLOAD( @args )
|
|
### Proxy method to build object accessors.
|
|
sub AUTOLOAD {
|
|
my $self = shift or croak "Cannot be called as a function";
|
|
( my $name = $AUTOLOAD ) =~ s{.*::}{};
|
|
|
|
my $method;
|
|
|
|
if ( ref $self && exists $self->{$name} ) {
|
|
|
|
# Define an accessor for this attribute
|
|
$method = sub : lvalue {
|
|
my $closureSelf = shift or croak "Can't be used as a function.";
|
|
$closureSelf->{$name} = shift if @_;
|
|
return $closureSelf->{$name};
|
|
};
|
|
|
|
# Install the new method in the symbol table
|
|
NO_STRICT_REFS: {
|
|
no strict 'refs';
|
|
*{$AUTOLOAD} = $method;
|
|
}
|
|
|
|
# Now jump to the new method after sticking the self-ref back onto the
|
|
# stack
|
|
unshift @_, $self;
|
|
goto &$AUTOLOAD;
|
|
}
|
|
|
|
# Try to delegate to our parent's version of the method
|
|
my $parentMethod = "SUPER::$name";
|
|
return $self->$parentMethod( @_ );
|
|
}
|
|
|
|
|
|
DESTROY {
|
|
my $self = shift;
|
|
$self->restoreSignalHandlers;
|
|
}
|
|
|
|
|
|
|
|
#####################################################################
|
|
### M O V E R T H R E A D C L A S S
|
|
#####################################################################
|
|
package BackupAgent::Thread;
|
|
|
|
BEGIN {
|
|
# LiveJournal functions
|
|
require "$ENV{'LJHOME'}/cgi-bin/ljlib.pl";
|
|
|
|
use vars qw{$AUTOLOAD};
|
|
use Carp qw{croak confess};
|
|
use IO::File qw{};
|
|
use Fcntl qw{O_RDONLY};
|
|
}
|
|
|
|
|
|
### METHOD: new( $mogilefs, $user )
|
|
### Create a agent thread object that will move the specified I<user> to the
|
|
### given I<mogilefs> filestore.
|
|
sub new {
|
|
my $class = shift;
|
|
my ( $mogfs, $user ) = @_;
|
|
|
|
# Lock the user
|
|
$user->make_readonly;
|
|
|
|
return bless {
|
|
mogilefs => $mogfs,
|
|
user => $user,
|
|
pid => undef,
|
|
testingMode => 0,
|
|
locked => 1,
|
|
}, $class;
|
|
}
|
|
|
|
|
|
### METHOD: run()
|
|
### Execute the backend backup program.
|
|
sub run {
|
|
my $self = shift or confess "Cannot be called as a function";
|
|
|
|
# Fork and exec a child, keeping the pid
|
|
unless (( $self->{pid} = fork )) {
|
|
LJ::disconnect_dbs();
|
|
|
|
my (
|
|
$user,
|
|
$backfile,
|
|
$mogilekey,
|
|
$ifh,
|
|
$ofh,
|
|
$buf,
|
|
);
|
|
|
|
$user = $self->{user};
|
|
$backfile = sprintf( "ljubackup.%s.%d.%d",
|
|
$user->user,
|
|
$user->userid,
|
|
$$ );
|
|
$mogilekey = sprintf( 'userbackup:%d', $user->userid );
|
|
|
|
# In testing mode, run the program with the --dump option
|
|
if ( $self->{testingMode} ) {
|
|
system( "$ENV{LJHOME}/bin/ljbackup.pl", "--dump", $user->user ) == 0
|
|
or die "ljbackup.pl failed: $?";
|
|
}
|
|
|
|
# In regular mode, dump the user to a dbm file and then stick that in
|
|
# MogileFS.
|
|
else {
|
|
system( "$ENV{LJHOME}/bin/ljbackup.pl", "--file=$backfile",
|
|
$user->user ) == 0 or die "ljbackup.pl failed: $?";
|
|
|
|
# Open the dbm and a new Mogile handle
|
|
$ifh = new IO::File $backfile, O_RDONLY
|
|
or die "open: $backfile: $!";
|
|
$ofh = $self->{mogilefs}->new_file( $mogilekey, 'normal' )
|
|
or die "MogileFS::new_file: ", $self->{mogilefs}->errstr;
|
|
|
|
# Copy the data over
|
|
until ( $ifh->eof ) {
|
|
my $bytes = $ifh->read( $buf, 4096 );
|
|
|
|
if ( $bytes ) {
|
|
$ofh->print( $buf );
|
|
} elsif ( $!{EAGAIN} ) {
|
|
next;
|
|
} else {
|
|
die "read: $backfile: $!";
|
|
}
|
|
}
|
|
|
|
# Make sure it's uploaded correctly
|
|
$ofh->close or die "error saving file to mogile: $@";
|
|
unlink $backfile;
|
|
$user->mark_clean;
|
|
}
|
|
|
|
exit 0;
|
|
}
|
|
|
|
return $self->pid;
|
|
}
|
|
|
|
|
|
### METHOD: unlock()
|
|
### Remove the read-only bit from the user this thread corresponds to.
|
|
sub unlock {
|
|
my $self = shift;
|
|
|
|
if ( $self->{locked} ) {
|
|
print STDERR "Unlocking user ", $self->{user}->userid, ".\n";
|
|
$self->{user}->make_readwrite;
|
|
$self->{locked} = 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
|
|
sub DESTROY {}
|
|
|
|
|
|
### (PROXY) METHOD: AUTOLOAD( @args )
|
|
### Proxy method to build object accessors.
|
|
sub AUTOLOAD {
|
|
my $self = shift or croak "Cannot be called as a function";
|
|
( my $name = $AUTOLOAD ) =~ s{.*::}{};
|
|
|
|
my $method;
|
|
|
|
if ( ref $self && exists $self->{$name} ) {
|
|
|
|
# Define an accessor for this attribute
|
|
$method = sub : lvalue {
|
|
my $closureSelf = shift or croak "Can't be used as a function.";
|
|
$closureSelf->{$name} = shift if @_;
|
|
return $closureSelf->{$name};
|
|
};
|
|
|
|
# Install the new method in the symbol table
|
|
NO_STRICT_REFS: {
|
|
no strict 'refs';
|
|
*{$AUTOLOAD} = $method;
|
|
}
|
|
|
|
# Now jump to the new method after sticking the self-ref back onto the
|
|
# stack
|
|
unshift @_, $self;
|
|
goto &$AUTOLOAD;
|
|
}
|
|
|
|
# Try to delegate to our parent's version of the method
|
|
#my $parentMethod = "SUPER::$name";
|
|
#return $self->$parentMethod( @_ );
|
|
croak sprintf q{Can't locate object method "%s" via package "%s"}, $name, ref $self;
|
|
}
|
|
|
|
|