init
This commit is contained in:
86
livejournal/bin/maint/bday.pl
Executable file
86
livejournal/bin/maint/bday.pl
Executable file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/perl
|
||||
#
|
||||
|
||||
use strict;
|
||||
use vars qw(%maint);
|
||||
|
||||
$maint{'bdaymail'} = sub
|
||||
{
|
||||
my $dbr = LJ::get_db_reader();
|
||||
my $sth;
|
||||
|
||||
# get everybody whose birthday is today.
|
||||
$sth = $dbr->prepare("SELECT u.userid, u.user, u.name, u.email ".
|
||||
"FROM user u, userusage uu WHERE u.userid=uu.userid AND ".
|
||||
"u.bdate IS NOT NULL AND u.status='A' AND ".
|
||||
"u.statusvis='V' AND u.bdate <> '0000-00-00' AND ".
|
||||
"MONTH(NOW())=MONTH(u.bdate) AND DAYOFMONTH(NOW())=DAYOFMONTH(u.bdate) ".
|
||||
"AND uu.timeupdate > DATE_SUB(NOW(), INTERVAL 1 MONTH) AND ".
|
||||
"u.journaltype='P'");
|
||||
$sth->execute;
|
||||
my @bdays; push @bdays, $_ while ($_ = $sth->fetchrow_hashref);
|
||||
$sth->finish;
|
||||
|
||||
# go through each birthday person and tell them happy birthday.
|
||||
foreach my $bu (@bdays)
|
||||
{
|
||||
my ($user, $userid, $name, $email) = map { $bu->{$_} } qw(user userid name email);
|
||||
print "$user ($userid) .. $name .. $email\n";
|
||||
|
||||
LJ::send_mail({
|
||||
'to' => $email,
|
||||
'toname' => $name,
|
||||
'subject' => "Happy Birthday!",
|
||||
'from' => $LJ::ADMIN_EMAIL,
|
||||
'fromname' => $LJ::SITENAME,
|
||||
'body' => ("Happy Birthday $name!!\n\n".
|
||||
"According to our records, today is your birthday... everybody here ".
|
||||
"at $LJ::SITENAME would like to wish you a happy birthday!\n\n".
|
||||
"If you have any interesting birthday stories to share, do let us know! ".
|
||||
"Or better, email them to us and also update your LiveJournal with them. ".
|
||||
":) And if you have any questions/comments about the service in general, ".
|
||||
"let us know too... we're real people, not a huge corporation, so we read ".
|
||||
"and try to reply to all email.\n\n".
|
||||
"Anyway... the point of this email was originally just HAPPY BIRTHDAY!\n\n".
|
||||
"--\n$LJ::SITENAME\n$LJ::SITEROOT/\n"),
|
||||
});
|
||||
|
||||
# and now, tell people that list them as friends.
|
||||
$sth = $dbr->prepare("SELECT u.user, u.name, u.email ".
|
||||
"FROM user u, friends f, userprop up, userproplist upl, userusage uu ".
|
||||
"WHERE f.friendid=$userid AND f.userid=u.userid AND ".
|
||||
"up.userid=u.userid AND upl.upropid=up.upropid AND uu.userid=u.userid AND ".
|
||||
"upl.name='opt_bdaymail' AND up.value='1' AND ".
|
||||
"u.journaltype='P' AND u.status='A' AND u.statusvis='V' AND ".
|
||||
"uu.timeupdate>DATE_SUB(NOW(), INTERVAL 1 MONTH) AND ".
|
||||
"u.userid <> $userid");
|
||||
$sth->execute;
|
||||
if ($dbr->err) { die $dbr->errstr; }
|
||||
my @friendof; push @friendof, $_ while ($_ = $sth->fetchrow_hashref);
|
||||
|
||||
# possesive es
|
||||
my $s = ($name =~ /s$/i) ? "'" : "'s";
|
||||
|
||||
foreach my $fu (@friendof)
|
||||
{
|
||||
my ($fuser, $fname, $femail) = map { $fu->{$_} } qw(user name email);
|
||||
print " mail $fuser about $user...\n";
|
||||
|
||||
LJ::send_mail({
|
||||
'to' => $femail,
|
||||
'toname' => $fname,
|
||||
'subject' => "Birthday Reminder!",
|
||||
'from' => $LJ::ADMIN_EMAIL,
|
||||
'fromname' => $LJ::SITENAME,
|
||||
'body' => ("This is a reminder that today is $name$s birthday ".
|
||||
"(LiveJournal user: $user). You have $name listed as ".
|
||||
"a friend in your LiveJournal, so we thought this ".
|
||||
"reminder would be useful.".
|
||||
"\n\n--\n$LJ::SITENAME\n$LJ::SITEROOT/\n"),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
1;
|
||||
361
livejournal/bin/maint/captcha.pl
Executable file
361
livejournal/bin/maint/captcha.pl
Executable file
@@ -0,0 +1,361 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use vars qw(%maint %maintinfo);
|
||||
|
||||
use LJ::Captcha qw{};
|
||||
use LJ::Blob qw{};
|
||||
use File::Temp qw{tempdir};
|
||||
use File::Path qw{rmtree};
|
||||
use File::Spec qw{};
|
||||
|
||||
our ( $FakeUserId, $ClusterId, $Digits, $DigitCount,
|
||||
$ExpireThresUser, $ExpireThresNoUser );
|
||||
|
||||
# Data for code-generation
|
||||
$Digits = "abcdefghkmnpqrstuvwzyz23456789";
|
||||
$DigitCount = length( $Digits );
|
||||
|
||||
# Maximum age of answered captchas. this is just
|
||||
# for double-click protection.
|
||||
$ExpireThresUser = 2 * 60; # two minutes
|
||||
|
||||
# 24 hours for captchas which were given out but not answered.
|
||||
# (they might leave their browser window open or something)
|
||||
$ExpireThresNoUser = 24 * 3600; # 1 day
|
||||
|
||||
|
||||
#####################################################################
|
||||
### F U N C T I O N S
|
||||
#####################################################################
|
||||
|
||||
### Read a file in as a scalar and return it
|
||||
sub readfile ($) {
|
||||
my ( $filename ) = @_;
|
||||
open my $fh, "<$filename" or die "open: $filename: $!";
|
||||
local $/ = undef;
|
||||
my $data = <$fh>;
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
### Generate an n-character challenge code
|
||||
sub gencode ($) {
|
||||
my ( $digits ) = @_;
|
||||
my $code = '';
|
||||
for ( 1..$digits ) {
|
||||
$code .= substr( $Digits, int(rand($DigitCount)), 1 );
|
||||
}
|
||||
|
||||
return $code;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#####################################################################
|
||||
### M A I N T E N A N C E T A S K S
|
||||
#####################################################################
|
||||
$maintinfo{gen_audio_captchas}{opts}{locking} = "per_host";
|
||||
$maint{gen_audio_captchas} = sub {
|
||||
my (
|
||||
$u, # Fake user record for Blob::put
|
||||
$dbh, # Database handle (writer)
|
||||
$count, # Count of currently-extant audio challenges
|
||||
$need, # How many we need to still create
|
||||
$make, # how many we're actually going to create this round
|
||||
$tmpdir, # Temporary working directory
|
||||
$code, # The generated challenge code
|
||||
$wav, # Wav file
|
||||
$data, # Wav file data
|
||||
$err, # Error-message ref for Blob::put calls
|
||||
$capid, # Captcha row id
|
||||
$anum, # Deseries-ifier value
|
||||
);
|
||||
|
||||
print "-I- Generating new audio captchas...\n";
|
||||
|
||||
$dbh = LJ::get_dbh({raw=>1}, "master") or die "Failed to get_db_writer()";
|
||||
$dbh->do("SET wait_timeout=28800");
|
||||
|
||||
# Count how many challenges there are currently
|
||||
$count = $dbh->selectrow_array(q{
|
||||
SELECT COUNT(*)
|
||||
FROM captchas
|
||||
WHERE
|
||||
type = 'audio'
|
||||
AND issuetime = 0
|
||||
});
|
||||
|
||||
|
||||
my $MaxItems = $LJ::CAPTCHA_AUDIO_PREGEN || 500;
|
||||
|
||||
# If there are enough, don't generate any more
|
||||
print "Current count is $count of $MaxItems...";
|
||||
if ( $count >= $MaxItems ) {
|
||||
print "already have enough.\n";
|
||||
return;
|
||||
} else {
|
||||
$make = $need = $MaxItems - $count;
|
||||
$make = $LJ::CAPTCHA_AUDIO_MAKE
|
||||
if defined $LJ::CAPTCHA_AUDIO_MAKE && $make > $LJ::CAPTCHA_AUDIO_MAKE;
|
||||
print "generating $make new audio challenges.\n";
|
||||
}
|
||||
|
||||
# Clean up any old audio directories lying about from failed generations
|
||||
# before. In theory, File::Temp::tempdir() is supposed to clean them up
|
||||
# itself, but it doesn't appear to be doing so.
|
||||
foreach my $olddir ( glob "audio_captchas_*" ) {
|
||||
|
||||
# If it's been more than an hour since it's been changed from the
|
||||
# starting time of the script, kill it
|
||||
if ( (-M $olddir) * 24 > 1 ) {
|
||||
print "cleaning up old working temp directory ($olddir).\n";
|
||||
rmtree( $olddir ) or die "rmtree: $olddir: $!";
|
||||
}
|
||||
}
|
||||
|
||||
# Load the system user for Blob::put() and create an auto-cleaning temp
|
||||
# directory for audio generation
|
||||
$u = LJ::load_user( "system" )
|
||||
or die "Couldn't load the system user.";
|
||||
$tmpdir = tempdir( "audio_captchas_XXXXXX", CLEANUP => 0 );
|
||||
|
||||
# target location
|
||||
my $location = $LJ::CAPTCHA_MOGILEFS ? 'mogile' : 'blob';
|
||||
|
||||
# Generate the challenges
|
||||
for ( my $i = 0; $i < $make; $i++ ) {
|
||||
print "Generating audio $i...";
|
||||
( $wav, $code ) = LJ::Captcha::generate_audio( $tmpdir );
|
||||
$data = readfile( $wav );
|
||||
unlink $wav or die "unlink: $wav: $!";
|
||||
|
||||
# Generate the capid + anum
|
||||
print "generating new capid/anum...";
|
||||
$capid = LJ::alloc_global_counter( 'C' );
|
||||
die "Couldn't allocate capid" unless $capid;
|
||||
$anum = int( rand 65_535 );
|
||||
|
||||
# Insert the blob
|
||||
print "uploading (capid = $capid, anum = $anum)...";
|
||||
if ($location eq 'mogile') {
|
||||
my $mogfs = LJ::mogclient(); # force load
|
||||
die "Requested to store captchas on MogileFS, but it's not loaded.\n"
|
||||
unless $mogfs;
|
||||
my $fh = $mogfs->new_file("captcha:$capid", 'captcha')
|
||||
or die "Unable to contact MogileFS server for storage.\n";
|
||||
$fh->print($data);
|
||||
$fh->close
|
||||
or die "Unable to save captcha to MogileFS server: $@\n";
|
||||
} else {
|
||||
LJ::Blob::put( $u, 'captcha_audio', 'wav', $capid, $data, \$err )
|
||||
or die "Error uploading to media server: $err";
|
||||
}
|
||||
|
||||
# Insert the captcha into the DB. If it fails for some reason, delete
|
||||
# the just-uploaded file from the media storage system too.
|
||||
print "inserting (code = $code)...";
|
||||
my $rval = eval {
|
||||
$dbh->do(q{
|
||||
INSERT INTO captchas( capid, type, location, answer, anum )
|
||||
VALUES ( ?, 'audio', ?, ?, ? )
|
||||
}, undef, $capid, $location, $code, $anum);
|
||||
};
|
||||
if ( !$rval || $@ ) {
|
||||
my $err = $@ || $dbh->errstr;
|
||||
if ( $location eq 'mogile' ) {
|
||||
LJ::mogclient()->delete( "captcha:$capid" );
|
||||
} else {
|
||||
LJ::Blob::delete( $u, 'captcha_audio', 'wav', $capid );
|
||||
}
|
||||
die "audio captcha insert error on ($capid, $location, $code, $anum): $err";
|
||||
}
|
||||
|
||||
print "done.\n";
|
||||
}
|
||||
|
||||
print "cleaning up working temporary directory ($tmpdir).\n";
|
||||
rmtree( $tmpdir ) or die "Failed directory cleanup: $!";
|
||||
|
||||
print "done. Created $make new audio captchas.\n";
|
||||
return 1;
|
||||
};
|
||||
|
||||
$maintinfo{gen_image_captchas}{opts}{locking} = "per_host";
|
||||
$maint{gen_image_captchas} = sub {
|
||||
my (
|
||||
$u, # Fake user record for Blob::put
|
||||
$dbh, # Database handle (writer)
|
||||
$count, # Count of currently-extant audio challenges
|
||||
$need, # How many we need to still create
|
||||
$code, # The generated challenge code
|
||||
$png, # PNG data
|
||||
$err, # Error-message ref for Blob::put calls
|
||||
$capid, # Captcha row id
|
||||
$anum, # Deseries-ifier value
|
||||
);
|
||||
|
||||
print "-I- Generating new image captchas...\n";
|
||||
|
||||
$dbh = LJ::get_dbh({raw=>1}, "master") or die "Failed to get_db_writer()";
|
||||
$dbh->do("SET wait_timeout=28800");
|
||||
|
||||
# Count how many challenges there are currently
|
||||
$count = $dbh->selectrow_array(q{
|
||||
SELECT COUNT(*)
|
||||
FROM captchas
|
||||
WHERE
|
||||
type = 'image'
|
||||
AND issuetime = 0
|
||||
});
|
||||
|
||||
my $MaxItems = $LJ::CAPTCHA_IMAGE_PREGEN || 1000;
|
||||
|
||||
# If there are enough, don't generate any more
|
||||
print "Current count is $count of $MaxItems...";
|
||||
if ( $count >= $MaxItems ) {
|
||||
print "already have enough.\n";
|
||||
return;
|
||||
} else {
|
||||
$need = $MaxItems - $count;
|
||||
print "generating $need new image challenges.\n";
|
||||
}
|
||||
|
||||
# Load system user for Blob::put()
|
||||
$u = LJ::load_user( "system" )
|
||||
or die "Couldn't load the system user.";
|
||||
|
||||
$dbh = LJ::get_db_writer() or die "Failed to get_db_writer()";
|
||||
|
||||
# target location
|
||||
my $location = $LJ::CAPTCHA_MOGILEFS ? 'mogile' : 'blob';
|
||||
|
||||
# Generate the challenges
|
||||
for ( my $i = 0; $i < $need; $i++ ) {
|
||||
print "Generating image $i...";
|
||||
$code = gencode( 7 );
|
||||
( $png ) = LJ::Captcha::generate_visual( $code );
|
||||
|
||||
# Generate the capid + anum
|
||||
print "generating new capid/anum...";
|
||||
$capid = LJ::alloc_global_counter( 'C' );
|
||||
die "Couldn't allocate capid" unless $capid;
|
||||
$anum = int( rand 65_535 );
|
||||
|
||||
# Insert the blob
|
||||
print "uploading (capid = $capid, anum = $anum)...";
|
||||
if ($location eq 'mogile') {
|
||||
my $mogfs = LJ::mogclient(); # force load
|
||||
die "Requested to store captchas on MogileFS, but it's not loaded.\n"
|
||||
unless $mogfs;
|
||||
my $fh = $mogfs->new_file("captcha:$capid", 'captcha')
|
||||
or die "Unable to contact MogileFS server for storage.\n";
|
||||
$fh->print($png);
|
||||
$fh->close
|
||||
or die "Unable to save captcha to MogileFS server: $@\n";
|
||||
} else {
|
||||
LJ::Blob::put( $u, 'captcha_image', 'png', $capid, $png, \$err )
|
||||
or die "Error uploading to media server: $err";
|
||||
}
|
||||
|
||||
# Insert the captcha into the DB. If it fails for some reason, delete
|
||||
# the just-uploaded file from the media storage system too.
|
||||
print "inserting (code = $code)...";
|
||||
my $rval = eval {
|
||||
$dbh->do(q{
|
||||
INSERT INTO captchas( capid, type, location, answer, anum )
|
||||
VALUES ( ?, 'image', ?, ?, ? )
|
||||
}, undef, $capid, $location, $code, $anum);
|
||||
};
|
||||
if ( !$rval || $@ ) {
|
||||
my $err = $@ || $dbh->errstr;
|
||||
if ( $location eq 'mogile' ) {
|
||||
LJ::mogclient()->delete( "captcha:$capid" );
|
||||
} else {
|
||||
LJ::Blob::delete( $u, 'captcha_image', 'png', $capid );
|
||||
}
|
||||
die "image captcha insert error on ($capid, $location, $code, $anum): $err";
|
||||
}
|
||||
|
||||
print "done.\n";
|
||||
}
|
||||
|
||||
print "done. Created $need new image captchas.\n";
|
||||
return 1;
|
||||
};
|
||||
|
||||
$maint{clean_captchas} = sub {
|
||||
my (
|
||||
$u, # System user
|
||||
$expired, # arrayref of arrayrefs of expired captchas
|
||||
$dbh, # Database handle (writer)
|
||||
$sql, # SQL statement
|
||||
$sth, # Statement handle
|
||||
$count, # Deletion count
|
||||
$err, # Error message reference for Blob::delete calls
|
||||
);
|
||||
|
||||
print "-I- Cleaning captchas.\n";
|
||||
|
||||
# Find captchas to delete
|
||||
$sql = q{
|
||||
SELECT
|
||||
capid, type, location
|
||||
FROM captchas
|
||||
WHERE
|
||||
( issuetime <> 0 AND issuetime < ? )
|
||||
OR
|
||||
( userid > 0
|
||||
AND ( issuetime <> 0 AND issuetime < ? )
|
||||
)
|
||||
LIMIT 2500
|
||||
};
|
||||
$dbh = LJ::get_db_writer()
|
||||
or die "No master DB handle";
|
||||
$expired = $dbh->selectall_arrayref( $sql, undef,
|
||||
time() - $ExpireThresNoUser,
|
||||
time() - $ExpireThresUser );
|
||||
die "selectall_arrayref: $sql: ", $dbh->errstr if $dbh->err;
|
||||
|
||||
if ( @$expired ) {
|
||||
print "found ", scalar @$expired, " captchas to delete...\n";
|
||||
} else {
|
||||
print "Done: No captchas to delete.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
# Prepare deletion statement
|
||||
$sql = q{ DELETE FROM captchas WHERE capid = ? };
|
||||
$sth = $dbh->prepare( $sql );
|
||||
|
||||
# Fetch system user
|
||||
$u = LJ::load_user( "system" )
|
||||
or die "Couldn't load the system user.";
|
||||
|
||||
# Now delete each one from the DB and the media server
|
||||
foreach my $captcha ( @$expired ) {
|
||||
my ( $capid, $type, $location ) = @$captcha;
|
||||
$location ||= 'blob';
|
||||
print "Deleting captcha $capid ($type, $location)\n";
|
||||
my $ext = $type eq 'audio' ? 'wav' : 'png';
|
||||
|
||||
if ($location eq 'mogile') {
|
||||
my $mogfs = LJ::mogclient(); # force load
|
||||
die "Requested to delete captchas from MogileFS, but it's not loaded.\n"
|
||||
unless $mogfs;
|
||||
$mogfs->delete("captcha:$capid")
|
||||
or die "Unable to delete captcha from MogileFS server for capid = $capid.\n";
|
||||
} else {
|
||||
LJ::Blob::delete( $u, "captcha_$type", $ext, $capid, \$err )
|
||||
or die "Failed to delete $type file from media server for ".
|
||||
"capid = $capid: $err";
|
||||
}
|
||||
$sth->execute( $capid )
|
||||
or die "execute: $sql ($capid): ", $sth->errstr;
|
||||
$count++;
|
||||
}
|
||||
|
||||
print "Done: deleted $count expired captchas.\n";
|
||||
return 1;
|
||||
};
|
||||
|
||||
207
livejournal/bin/maint/clean_caches.pl
Executable file
207
livejournal/bin/maint/clean_caches.pl
Executable file
@@ -0,0 +1,207 @@
|
||||
#!/usr/bin/perl
|
||||
#
|
||||
|
||||
$maint{'clean_caches'} = sub
|
||||
{
|
||||
my $dbh = LJ::get_db_writer();
|
||||
my $sth;
|
||||
|
||||
my $verbose = $LJ::LJMAINT_VERBOSE;
|
||||
|
||||
print "-I- Cleaning authactions.\n";
|
||||
$dbh->do("DELETE FROM authactions WHERE datecreate < DATE_SUB(NOW(), INTERVAL 30 DAY)");
|
||||
|
||||
print "-I- Cleaning faquses.\n";
|
||||
$dbh->do("DELETE FROM faquses WHERE dateview < DATE_SUB(NOW(), INTERVAL 7 DAY)");
|
||||
|
||||
print "-I- Cleaning duplock.\n";
|
||||
$dbh->do("DELETE FROM duplock WHERE instime < DATE_SUB(NOW(), INTERVAL 1 HOUR)");
|
||||
|
||||
print "-I- Cleaning commenturl.\n";
|
||||
$dbh->do("DELETE FROM commenturls WHERE timecreate < UNIX_TIMESTAMP() - 86400*30 LIMIT 50000");
|
||||
|
||||
print "-I- Cleaning captcha sessions.\n";
|
||||
foreach my $c (@LJ::CLUSTERS) {
|
||||
my $dbcm = LJ::get_cluster_master($c);
|
||||
next unless $dbcm;
|
||||
$dbcm->do("DELETE FROM captcha_session WHERE sesstime < UNIX_TIMESTAMP()-86400");
|
||||
}
|
||||
|
||||
print "-I- Cleaning old anonymous comment IP logs.\n";
|
||||
my $count;
|
||||
foreach my $c (@LJ::CLUSTERS) {
|
||||
my $dbcm = LJ::get_cluster_master($c);
|
||||
next unless $dbcm;
|
||||
# 432,000 seconds is 5 days
|
||||
$count += $dbcm->do('DELETE FROM tempanonips WHERE reporttime < (UNIX_TIMESTAMP() - 432000)');
|
||||
}
|
||||
print " deleted $count\n";
|
||||
|
||||
print "-I- Cleaning diresearchres.\n";
|
||||
# need insert before delete so master logs delete and slaves actually do it
|
||||
$dbh->do("INSERT INTO dirsearchres2 VALUES (MD5(NOW()), DATE_SUB(NOW(), INTERVAL 31 MINUTE), '')");
|
||||
$dbh->do("DELETE FROM dirsearchres2 WHERE dateins < DATE_SUB(NOW(), INTERVAL 30 MINUTE)");
|
||||
|
||||
print "-I- Cleaning meme.\n";
|
||||
do {
|
||||
$sth = $dbh->prepare("DELETE FROM meme WHERE ts < DATE_SUB(NOW(), INTERVAL 7 DAY) LIMIT 250");
|
||||
$sth->execute;
|
||||
if ($dbh->err) { print $dbh->errstr; }
|
||||
print " deleted ", $sth->rows, "\n";
|
||||
} while ($sth->rows && ! $sth->err);
|
||||
|
||||
print "-I- Cleaning old pending comments.\n";
|
||||
$count = 0;
|
||||
foreach my $c (@LJ::CLUSTERS) {
|
||||
my $dbcm = LJ::get_cluster_master($c);
|
||||
next unless $dbcm;
|
||||
# 3600 seconds is one hour
|
||||
my $time = time() - 3600;
|
||||
$count += $dbcm->do('DELETE FROM pendcomments WHERE datesubmit < ? LIMIT 2000', undef, $time);
|
||||
}
|
||||
print " deleted $count\n";
|
||||
|
||||
# move rows from talkleft_xfp to talkleft
|
||||
print "-I- Moving talkleft_xfp.\n";
|
||||
|
||||
my $xfp_count = $dbh->selectrow_array("SELECT COUNT(*) FROM talkleft_xfp");
|
||||
print " rows found: $xfp_count\n";
|
||||
|
||||
if ($xfp_count) {
|
||||
|
||||
my @xfp_cols = qw(userid posttime journalid nodetype nodeid jtalkid publicitem);
|
||||
my $xfp_cols = join(",", @xfp_cols);
|
||||
my $xfp_cols_join = join(",", map { "t.$_" } @xfp_cols);
|
||||
|
||||
my %insert_vals;
|
||||
my %delete_vals;
|
||||
|
||||
# select out 1000 rows from random clusters
|
||||
$sth = $dbh->prepare("SELECT u.clusterid,u.user,$xfp_cols_join " .
|
||||
"FROM talkleft_xfp t, user u " .
|
||||
"WHERE t.userid=u.userid LIMIT 1000");
|
||||
$sth->execute();
|
||||
my $row_ct = 0;
|
||||
while (my $row = $sth->fetchrow_hashref) {
|
||||
|
||||
my %qrow = map { $_, $dbh->quote($row->{$_}) } @xfp_cols;
|
||||
|
||||
push @{$insert_vals{$row->{'clusterid'}}},
|
||||
("(" . join(",", map { $qrow{$_} } @xfp_cols) . ")");
|
||||
push @{$delete_vals{$row->{'clusterid'}}},
|
||||
("(userid=$qrow{'userid'} AND " .
|
||||
"journalid=$qrow{'journalid'} AND " .
|
||||
"nodetype=$qrow{'nodetype'} AND " .
|
||||
"nodeid=$qrow{'nodeid'} AND " .
|
||||
"posttime=$qrow{'posttime'} AND " .
|
||||
"jtalkid=$qrow{'jtalkid'})");
|
||||
|
||||
$row_ct++;
|
||||
}
|
||||
|
||||
foreach my $clusterid (sort keys %insert_vals) {
|
||||
my $dbcm = LJ::get_cluster_master($clusterid);
|
||||
unless ($dbcm) {
|
||||
print " cluster down: $clusterid\n";
|
||||
next;
|
||||
}
|
||||
|
||||
print " cluster $clusterid: " . scalar(@{$insert_vals{$clusterid}}) .
|
||||
" rows\n" if $verbose;
|
||||
$dbcm->do("INSERT INTO talkleft ($xfp_cols) VALUES " .
|
||||
join(",", @{$insert_vals{$clusterid}})) . "\n";
|
||||
if ($dbcm->err) {
|
||||
print " db error (insert): " . $dbcm->errstr . "\n";
|
||||
next;
|
||||
}
|
||||
|
||||
# no error, delete from _xfp
|
||||
$dbh->do("DELETE FROM talkleft_xfp WHERE " .
|
||||
join(" OR ", @{$delete_vals{$clusterid}})) . "\n";
|
||||
if ($dbh->err) {
|
||||
print " db error (delete): " . $dbh->errstr . "\n";
|
||||
next;
|
||||
}
|
||||
}
|
||||
|
||||
print " rows remaining: " . ($xfp_count - $row_ct) . "\n";
|
||||
}
|
||||
|
||||
# move clustered recentaction summaries from their respective clusters
|
||||
# to the global actionhistory table
|
||||
print "-I- Migrating recentactions.\n";
|
||||
|
||||
foreach my $cid (@LJ::CLUSTERS) {
|
||||
next unless $cid;
|
||||
|
||||
my $dbcm = LJ::get_cluster_master($cid);
|
||||
unless ($dbcm) {
|
||||
print " cluster down: $clusterid\n";
|
||||
next;
|
||||
}
|
||||
|
||||
unless ($dbcm->do("LOCK TABLES recentactions WRITE")) {
|
||||
print " db error (lock): " . $dbcm->errstr . "\n";
|
||||
next;
|
||||
}
|
||||
|
||||
my $sth = $dbcm->prepare
|
||||
("SELECT what, COUNT(*) FROM recentactions GROUP BY 1");
|
||||
$sth->execute;
|
||||
if ($dbcm->err) {
|
||||
print " db error (select): " . $dbcm->errstr . "\n";
|
||||
next;
|
||||
}
|
||||
|
||||
my %counts = ();
|
||||
my $total_ct = 0;
|
||||
while (my ($what, $ct) = $sth->fetchrow_array) {
|
||||
$counts{$what} += $ct;
|
||||
$total_ct += $ct;
|
||||
}
|
||||
|
||||
print " cluster $cid: $total_ct rows\n" if $verbose;
|
||||
|
||||
# Note: We can experience failures on both sides of this
|
||||
# transaction. Either our delete can succeed then
|
||||
# insert fail or vice versa. Luckily this data is
|
||||
# for statistical purposes so we can just live with
|
||||
# the possibility of a small skew.
|
||||
|
||||
unless ($dbcm->do("DELETE FROM recentactions")) {
|
||||
print " db error (delete): " . $dbcm->errstr . "\n";
|
||||
next;
|
||||
}
|
||||
|
||||
# at this point if there is an error we will ignore it and try
|
||||
# to insert the count data above anyway
|
||||
$dbcm->do("UNLOCK TABLES")
|
||||
or print " db error (unlock): " . $dbcm->errstr . "\n";
|
||||
|
||||
# nothing to insert, why bother?
|
||||
next unless %counts;
|
||||
|
||||
# insert summary into global actionhistory table
|
||||
my @bind = ();
|
||||
my @vals = ();
|
||||
while (my ($what, $ct) = each %counts) {
|
||||
push @bind, "(UNIX_TIMESTAMP(),?,?,?)";
|
||||
push @vals, $cid, $what, $ct;
|
||||
}
|
||||
my $bind = join(",", @bind);
|
||||
|
||||
$dbh->do("INSERT INTO actionhistory (time, clusterid, what, count) " .
|
||||
"VALUES $bind", undef, @vals);
|
||||
if ($dbh->err) {
|
||||
print " db error (insert): " . $dbh->errstr . "\n";
|
||||
|
||||
# something's badly b0rked, don't try any other clusters for now
|
||||
last;
|
||||
}
|
||||
|
||||
# next cluster
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
1;
|
||||
76
livejournal/bin/maint/generic.pl
Executable file
76
livejournal/bin/maint/generic.pl
Executable file
@@ -0,0 +1,76 @@
|
||||
#!/usr/bin/perl
|
||||
#
|
||||
|
||||
$maint{joinmail} = sub {
|
||||
# this needs to be resumeable, so that it can run once every 10 or 15 minutes to digest things
|
||||
# that are a day old but haven't been sent. also, the first query down there needs to include
|
||||
# the right authaction type in the WHERE clause, and NOT do a GROUP BY.
|
||||
print "Returning without running... I'm disabled right now.\n";
|
||||
return 1;
|
||||
|
||||
my $dbr = LJ::get_db_reader();
|
||||
|
||||
# get all information
|
||||
my $pending = $dbr->selectall_arrayref("SELECT userid, COUNT(arg1) FROM authactions " .
|
||||
"WHERE used = 'N' AND datecreate > DATE_SUB(NOW(), INTERVAL 1 DAY)" .
|
||||
"GROUP BY userid") || [];
|
||||
|
||||
# get userids of communities
|
||||
my @commids;
|
||||
push @commids, $_->[0]+0 foreach @$pending;
|
||||
my $cus = LJ::load_userids(@commids);
|
||||
|
||||
# now let's get the maintainers of these
|
||||
my $in = join ',', @commids;
|
||||
my $maintrows = $dbr->selectall_arrayref("SELECT userid, targetid FROM reluser WHERE userid IN ($in) AND type = 'A'") || [];
|
||||
my @maintids;
|
||||
my %maints;
|
||||
foreach (@$maintrows) {
|
||||
push @{$maints{$_->[0]}}, $_->[1];
|
||||
push @maintids, $_->[1];
|
||||
}
|
||||
my $mus = LJ::load_userids(@maintids);
|
||||
|
||||
# tell the maintainers that they got new people.
|
||||
foreach my $row (@$pending) {
|
||||
my $cuser = $cus->{$row->[0]}{user};
|
||||
print "$cuser: $row->[1] invites: ";
|
||||
my %email; # see who we emailed on this comm
|
||||
foreach my $mid (@{$maints{$row->[0]}}) {
|
||||
print "$mid ";
|
||||
next if $email{$mus->{$mid}{email}}++;
|
||||
LJ::load_user_props($mus->{$mid}, 'opt_communityjoinemail');
|
||||
next unless $mus->{$mid}{opt_communityjoinemail} eq 'D'; # Daily or Digest
|
||||
|
||||
my $body = "Dear $mus->{$mid}{user},\n\n" .
|
||||
"Over the past day or so, $row->[1] request(s) to join the \"$cuser\" community have " .
|
||||
"been received. To look at the currently pending membership requests, please visit the pending " .
|
||||
"membership page:\n\n" .
|
||||
"\t$LJ::SITEROOT/community/pending.bml?comm=$cuser\n\n" .
|
||||
"You may also ignore this email. Outstanding requests to join will expire after a period of 30 days.\n\n" .
|
||||
"If you wish to no longer receive these emails, visit the community management page and " .
|
||||
"set the relevant options:\n\n\t$LJ::SITEROOT/community/manage.bml\n\n" .
|
||||
"Regards,\n$LJ::SITENAME Team\n";
|
||||
|
||||
LJ::send_mail({
|
||||
to => $mus->{$mid}{email},
|
||||
from => $LJ::COMMUNITY_EMAIL,
|
||||
fromname => $LJ::SITENAME,
|
||||
charset => 'utf-8',
|
||||
subject => "$cuser Membership Requests",
|
||||
body => $body,
|
||||
wrap => 76,
|
||||
});
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
};
|
||||
|
||||
$maint{clean_spamreports} = sub {
|
||||
my $dbh = LJ::get_db_writer();
|
||||
my $len = 86400 * 90; # 90 days
|
||||
my $ct = $dbh->do("DELETE FROM spamreports WHERE reporttime < UNIX_TIMESTAMP() - $len")+0;
|
||||
print "Deleted $ct reports.\n";
|
||||
};
|
||||
|
||||
1;
|
||||
530
livejournal/bin/maint/stats.pl
Executable file
530
livejournal/bin/maint/stats.pl
Executable file
@@ -0,0 +1,530 @@
|
||||
#!/usr/bin/perl
|
||||
#
|
||||
|
||||
use strict;
|
||||
use vars qw(%maint);
|
||||
|
||||
require "$ENV{'LJHOME'}/cgi-bin/statslib.pl";
|
||||
|
||||
# filled in by ljmaint.pl, 0=quiet, 1=normal, 2=verbose
|
||||
$LJ::Stats::VERBOSE = $LJ::LJMAINT_VERBOSE >= 2 ? 1 : 0;
|
||||
|
||||
$maint{'genstats'} = sub
|
||||
{
|
||||
my @which = @_ || qw(users countries
|
||||
states gender clients
|
||||
pop_interests meme pop_faq);
|
||||
|
||||
# popular faq items
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "global",
|
||||
'jobname' => "popfaq",
|
||||
'statname' => "pop_faq",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
return undef unless $db;
|
||||
|
||||
my $sth = $db->prepare("SELECT faqid, COUNT(*) FROM faquses WHERE " .
|
||||
"faqid<>0 GROUP BY 1 ORDER BY 2 DESC LIMIT 50");
|
||||
$sth->execute;
|
||||
die $db->errstr if $db->err;
|
||||
|
||||
my %ret;
|
||||
while (my ($id, $count) = $sth->fetchrow_array) {
|
||||
$ret{$id} = $count;
|
||||
}
|
||||
|
||||
return \%ret;
|
||||
},
|
||||
|
||||
});
|
||||
|
||||
# popular interests
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "global",
|
||||
'jobname' => "pop_interests",
|
||||
'statname' => "pop_interests",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
return undef unless $db;
|
||||
|
||||
return {} if $LJ::DISABLED{'interests-popular'};
|
||||
|
||||
# see what the previous min was, then subtract 20% of max from it
|
||||
my ($prev_min, $prev_max) = $db->selectrow_array("SELECT MIN(statval), MAX(statval) " .
|
||||
"FROM stats WHERE statcat='pop_interests'");
|
||||
my $stat_min = int($prev_min - (0.2*$prev_max));
|
||||
$stat_min = 1 if $stat_min < 1;
|
||||
|
||||
my $sth = $db->prepare("SELECT interest, intcount FROM interests WHERE intcount>? " .
|
||||
"ORDER BY intcount DESC, interest ASC LIMIT 400");
|
||||
$sth->execute($stat_min);
|
||||
die $db->errstr if $db->err;
|
||||
|
||||
my %ret;
|
||||
while (my ($int, $count) = $sth->fetchrow_array) {
|
||||
$ret{$int} = $count;
|
||||
}
|
||||
|
||||
return \%ret;
|
||||
},
|
||||
|
||||
});
|
||||
|
||||
# popular memes
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "global",
|
||||
'jobname' => "meme",
|
||||
'statname' => "popmeme",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
return undef unless $db;
|
||||
|
||||
return {} if $LJ::DISABLED{'meme'};
|
||||
|
||||
my $sth = $db->prepare("SELECT url, count(*) FROM meme " .
|
||||
"GROUP BY 1 ORDER BY 2 DESC LIMIT 100");
|
||||
$sth->execute;
|
||||
die $db->errstr if $db->err;
|
||||
|
||||
my %ret;
|
||||
while (my ($url, $count) = $sth->fetchrow_array) {
|
||||
$ret{$url} = $count;
|
||||
}
|
||||
|
||||
return \%ret;
|
||||
},
|
||||
});
|
||||
|
||||
# clients
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "global",
|
||||
'jobname' => "clients",
|
||||
'statname' => "client",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
return undef unless $db;
|
||||
|
||||
return {} if $LJ::DISABLED{'clientversionlog'};
|
||||
|
||||
my $usertotal = $db->selectrow_array("SELECT MAX(userid) FROM user");
|
||||
my $blocks = LJ::Stats::num_blocks($usertotal);
|
||||
|
||||
my %ret;
|
||||
foreach my $block (1..$blocks) {
|
||||
my ($low, $high) = LJ::Stats::get_block_bounds($block);
|
||||
|
||||
$db = $db_getter->(); # revalidate connection
|
||||
my $sth = $db->prepare("SELECT c.client, COUNT(*) AS 'count' FROM clients c, clientusage cu " .
|
||||
"WHERE c.clientid=cu.clientid AND cu.userid BETWEEN $low AND $high " .
|
||||
"AND cu.lastlogin > DATE_SUB(NOW(), INTERVAL 30 DAY) GROUP BY 1 ORDER BY 2");
|
||||
$sth->execute;
|
||||
die $db->errstr if $db->err;
|
||||
|
||||
while ($_ = $sth->fetchrow_hashref) {
|
||||
$ret{$_->{'client'}} += $_->{'count'};
|
||||
}
|
||||
|
||||
print LJ::Stats::block_status_line($block, $blocks);
|
||||
}
|
||||
|
||||
return \%ret;
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
# user table analysis
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "global",
|
||||
'jobname' => "users",
|
||||
'statname' => ["account", "newbyday", "age", "userinfo"],
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
return undef unless $db;
|
||||
|
||||
my $usertotal = $db->selectrow_array("SELECT MAX(userid) FROM user");
|
||||
my $blocks = LJ::Stats::num_blocks($usertotal);
|
||||
|
||||
my %ret; # return hash, (statname => { arg => val } since 'statname' is arrayref above
|
||||
|
||||
# iterate over user table in batches
|
||||
foreach my $block (1..$blocks) {
|
||||
|
||||
my ($low, $high) = LJ::Stats::get_block_bounds($block);
|
||||
|
||||
# user query: gets user,caps,age,status,allow_getljnews
|
||||
$db = $db_getter->(); # revalidate connection
|
||||
my $sth = $db->prepare
|
||||
("SELECT user, caps, " .
|
||||
"FLOOR((TO_DAYS(NOW())-TO_DAYS(bdate))/365.25) AS 'age', " .
|
||||
"status, allow_getljnews " .
|
||||
"FROM user WHERE userid BETWEEN $low AND $high");
|
||||
$sth->execute;
|
||||
die $db->errstr if $db->err;
|
||||
while (my $rec = $sth->fetchrow_hashref) {
|
||||
|
||||
# account types
|
||||
my $capnameshort = LJ::name_caps_short($rec->{'caps'});
|
||||
$ret{'account'}->{$capnameshort}++;
|
||||
|
||||
# ages
|
||||
$ret{'age'}->{$rec->{'age'}}++
|
||||
if $rec->{'age'} > 4 && $rec->{'age'} < 110;
|
||||
|
||||
# users receiving news emails
|
||||
$ret{'userinfo'}->{'allow_getljnews'}++
|
||||
if $rec->{'status'} eq "A" && $rec->{'allow_getljnews'} eq "Y";
|
||||
}
|
||||
|
||||
# userusage query: gets timeupdate,datereg,nowdate
|
||||
my $sth = $db->prepare
|
||||
("SELECT DATE_FORMAT(timecreate, '%Y-%m-%d') AS 'datereg', " .
|
||||
"DATE_FORMAT(NOW(), '%Y-%m-%d') AS 'nowdate', " .
|
||||
"UNIX_TIMESTAMP(timeupdate) AS 'timeupdate' " .
|
||||
"FROM userusage WHERE userid BETWEEN $low AND $high");
|
||||
$sth->execute;
|
||||
die $db->errstr if $db->err;
|
||||
|
||||
while (my $rec = $sth->fetchrow_hashref) {
|
||||
|
||||
# date registered
|
||||
$ret{'newbyday'}->{$rec->{'datereg'}}++
|
||||
unless $rec->{'datereg'} eq $rec->{'nowdate'};
|
||||
|
||||
# total user/activity counts
|
||||
$ret{'userinfo'}->{'total'}++;
|
||||
if (my $time = $rec->{'timeupdate'}) {
|
||||
my $now = time();
|
||||
$ret{'userinfo'}->{'updated'}++;
|
||||
$ret{'userinfo'}->{'updated_last30'}++ if $time > $now-60*60*24*30;
|
||||
$ret{'userinfo'}->{'updated_last7'}++ if $time > $now-60*60*24*7;
|
||||
$ret{'userinfo'}->{'updated_last1'}++ if $time > $now-60*60*24*1;
|
||||
}
|
||||
}
|
||||
|
||||
print LJ::Stats::block_status_line($block, $blocks);
|
||||
}
|
||||
|
||||
return \%ret;
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "clustered",
|
||||
'jobname' => "countries",
|
||||
'statname' => "country",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
my $cid = shift;
|
||||
return undef unless $db && $cid;
|
||||
|
||||
my $upc = LJ::get_prop("user", "country");
|
||||
die "Can't find country userprop. Database populated?\n" unless $upc;
|
||||
|
||||
my $usertotal = $db->selectrow_array("SELECT MAX(userid) FROM userproplite2");
|
||||
my $blocks = LJ::Stats::num_blocks($usertotal);
|
||||
|
||||
my %ret;
|
||||
foreach my $block (1..$blocks) {
|
||||
my ($low, $high) = LJ::Stats::get_block_bounds($block);
|
||||
|
||||
$db = $db_getter->(); # revalidate connection
|
||||
my $sth = $db->prepare("SELECT u.value, COUNT(*) AS 'count' FROM userproplite2 u " .
|
||||
"LEFT JOIN clustertrack2 c ON u.userid=c.userid " .
|
||||
"WHERE u.upropid=? AND u.value<>'' AND u.userid=c.userid " .
|
||||
"AND u.userid BETWEEN $low AND $high " .
|
||||
"AND (c.clusterid IS NULL OR c.clusterid=?)" .
|
||||
"GROUP BY 1 ORDER BY 2");
|
||||
$sth->execute($upc->{'id'}, $cid);
|
||||
die "clusterid: $cid, " . $db->errstr if $db->err;
|
||||
|
||||
while ($_ = $sth->fetchrow_hashref) {
|
||||
$ret{$_->{'value'}} += $_->{'count'};
|
||||
}
|
||||
|
||||
print LJ::Stats::block_status_line($block, $blocks);
|
||||
}
|
||||
|
||||
return \%ret;
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "clustered",
|
||||
'jobname' => "states",
|
||||
'statname' => "stateus",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
my $cid = shift;
|
||||
return undef unless $db && $cid;
|
||||
|
||||
my $upc = LJ::get_prop("user", "country");
|
||||
die "Can't find country userprop. Database populated?\n" unless $upc;
|
||||
|
||||
my $ups = LJ::get_prop("user", "state");
|
||||
die "Can't find state userprop. Database populated?\n" unless $ups;
|
||||
|
||||
my $usertotal = $db->selectrow_array("SELECT MAX(userid) FROM userproplite2");
|
||||
my $blocks = LJ::Stats::num_blocks($usertotal);
|
||||
|
||||
my %ret;
|
||||
foreach my $block (1..$blocks) {
|
||||
my ($low, $high) = LJ::Stats::get_block_bounds($block);
|
||||
|
||||
$db = $db_getter->(); # revalidate connection
|
||||
my $sth = $db->prepare("SELECT ua.value, COUNT(*) AS 'count' " .
|
||||
"FROM userproplite2 ua, userproplite2 ub " .
|
||||
"WHERE ua.userid=ub.userid AND ua.upropid=? AND " .
|
||||
"ub.upropid=? and ub.value='US' AND ub.value<>'' " .
|
||||
"AND ua.userid BETWEEN $low AND $high " .
|
||||
"GROUP BY 1 ORDER BY 2");
|
||||
$sth->execute($ups->{'id'}, $upc->{'id'});
|
||||
die $db->errstr if $db->err;
|
||||
|
||||
while ($_ = $sth->fetchrow_hashref) {
|
||||
$ret{$_->{'value'}} += $_->{'count'};
|
||||
}
|
||||
|
||||
print LJ::Stats::block_status_line($block, $blocks);
|
||||
}
|
||||
|
||||
return \%ret;
|
||||
},
|
||||
|
||||
});
|
||||
|
||||
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "clustered",
|
||||
'jobname' => "gender",
|
||||
'statname' => "gender",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
my $cid = shift;
|
||||
return undef unless $db && $cid;
|
||||
|
||||
my $upg = LJ::get_prop("user", "gender");
|
||||
die "Can't find gender userprop. Database populated?\n" unless $upg;
|
||||
|
||||
my $usertotal = $db->selectrow_array("SELECT MAX(userid) FROM userproplite2");
|
||||
my $blocks = LJ::Stats::num_blocks($usertotal);
|
||||
|
||||
my %ret;
|
||||
foreach my $block (1..$blocks) {
|
||||
my ($low, $high) = LJ::Stats::get_block_bounds($block);
|
||||
|
||||
$db = $db_getter->(); # revalidate connection
|
||||
my $sth = $db->prepare("SELECT value, COUNT(*) AS 'count' FROM userproplite2 up " .
|
||||
"LEFT JOIN clustertrack2 c ON up.userid=c.userid " .
|
||||
"WHERE up.upropid=? AND up.userid BETWEEN $low AND $high " .
|
||||
"AND (c.clusterid IS NULL OR c.clusterid=?) GROUP BY 1");
|
||||
$sth->execute($upg->{'id'}, $cid);
|
||||
die "clusterid: $cid, " . $db->errstr if $db->err;
|
||||
|
||||
while ($_ = $sth->fetchrow_hashref) {
|
||||
$ret{$_->{'value'}} += $_->{'count'};
|
||||
}
|
||||
|
||||
print LJ::Stats::block_status_line($block, $blocks);
|
||||
}
|
||||
|
||||
return \%ret;
|
||||
},
|
||||
|
||||
});
|
||||
|
||||
# run stats
|
||||
LJ::Stats::run_stats(@which);
|
||||
|
||||
#### dump to text file
|
||||
print "-I- Dumping to a text file.\n";
|
||||
|
||||
{
|
||||
my $dbh = LJ::Stats::get_db("dbh");
|
||||
my $sth = $dbh->prepare("SELECT statcat, statkey, statval FROM stats ORDER BY 1, 2");
|
||||
$sth->execute;
|
||||
die $dbh->errstr if $dbh->err;
|
||||
|
||||
open (OUT, ">$LJ::HTDOCS/stats/stats.txt");
|
||||
while (my @row = $sth->fetchrow_array) {
|
||||
next if grep { $row[0] eq $_ } @LJ::PRIVATE_STATS;
|
||||
print OUT join("\t", @row), "\n";
|
||||
}
|
||||
close OUT;
|
||||
}
|
||||
|
||||
print "-I- Done.\n";
|
||||
|
||||
};
|
||||
|
||||
$maint{'genstats_size'} = sub {
|
||||
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "global",
|
||||
'jobname' => "size-accounts",
|
||||
'statname' => "size",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
return undef unless $db;
|
||||
|
||||
# not that this isn't a total of current accounts (some rows may have
|
||||
# been deleted), but rather a total of accounts ever created
|
||||
my $size = $db->selectrow_array("SELECT MAX(userid) FROM user");
|
||||
return { 'accounts' => $size };
|
||||
},
|
||||
});
|
||||
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "clustered",
|
||||
'jobname' => "size-accounts_active",
|
||||
'statname' => "size",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
return undef unless $db;
|
||||
|
||||
my $period = 30; # one month is considered active
|
||||
my $active = $db->selectrow_array
|
||||
("SELECT COUNT(*) FROM clustertrack2 WHERE ".
|
||||
"timeactive > UNIX_TIMESTAMP()-86400*$period");
|
||||
|
||||
return { 'accounts_active' => $active };
|
||||
},
|
||||
});
|
||||
|
||||
print "-I- Generating account size stats.\n";
|
||||
LJ::Stats::run_stats("size-accounts", "size-accounts_active");
|
||||
print "-I- Done.\n";
|
||||
};
|
||||
|
||||
|
||||
$maint{'genstats_weekly'} = sub
|
||||
{
|
||||
LJ::Stats::register_stat
|
||||
({ 'type' => "global",
|
||||
'jobname' => "supportrank",
|
||||
'statname' => "supportrank",
|
||||
'handler' =>
|
||||
sub {
|
||||
my $db_getter = shift;
|
||||
return undef unless ref $db_getter eq 'CODE';
|
||||
my $db = $db_getter->();
|
||||
return undef unless $db;
|
||||
|
||||
my %supportrank;
|
||||
my $rank = 0;
|
||||
my $lastpoints = 0;
|
||||
my $buildup = 0;
|
||||
|
||||
my $sth = $db->prepare
|
||||
("SELECT u.userid, SUM(sp.points) AS 'points' " .
|
||||
"FROM user u, supportpoints sp " .
|
||||
"WHERE u.userid=sp.userid GROUP BY 1 ORDER BY 2 DESC");
|
||||
$sth->execute;
|
||||
die $db->errstr if $db->err;
|
||||
|
||||
while ($_ = $sth->fetchrow_hashref) {
|
||||
if ($lastpoints != $_->{'points'}) {
|
||||
$lastpoints = $_->{'points'};
|
||||
$rank += (1 + $buildup);
|
||||
$buildup = 0;
|
||||
} else {
|
||||
$buildup++;
|
||||
}
|
||||
$supportrank{$_->{'userid'}} = $rank;
|
||||
}
|
||||
|
||||
# move old 'supportrank' stat to supportrank_prev
|
||||
# no API for this :-/
|
||||
{
|
||||
my $dbh = LJ::Stats::get_db("dbh");
|
||||
$dbh->do("DELETE FROM stats WHERE statcat='supportrank_prev'");
|
||||
$dbh->do("UPDATE stats SET statcat='supportrank_prev' WHERE statcat='supportrank'");
|
||||
}
|
||||
|
||||
return \%supportrank;
|
||||
}
|
||||
});
|
||||
|
||||
print "-I- Generating weekly stats.\n";
|
||||
LJ::Stats::run_stats('supportrank');
|
||||
print "-I- Done.\n";
|
||||
};
|
||||
|
||||
$maint{'build_randomuserset'} = sub
|
||||
{
|
||||
## this sets up the randomuserset table daily (or whenever) that htdocs/random.bml uses to
|
||||
## find a random user that is both 1) publicly listed in the directory, and 2) updated
|
||||
## within the past 24 hours.
|
||||
|
||||
## note that if a user changes their privacy setting to not be in the database, it'll take
|
||||
## up to 24 hours for them to be removed from the random.bml listing, but that's acceptable.
|
||||
|
||||
my $dbh = LJ::get_db_writer();
|
||||
|
||||
print "-I- Building randomuserset.\n";
|
||||
$dbh->do("TRUNCATE TABLE randomuserset");
|
||||
$dbh->do("REPLACE INTO randomuserset (userid) " .
|
||||
"SELECT uu.userid FROM userusage uu, user u " .
|
||||
"WHERE u.userid=uu.userid AND u.allow_infoshow='Y' " .
|
||||
"AND uu.timeupdate > DATE_SUB(NOW(), INTERVAL 1 DAY) ORDER BY RAND() LIMIT 5000");
|
||||
my $num = $dbh->selectrow_array("SELECT MAX(rid) FROM randomuserset");
|
||||
$dbh->do("REPLACE INTO stats (statcat, statkey, statval) " .
|
||||
"VALUES ('userinfo', 'randomcount', $num)");
|
||||
|
||||
print "-I- Done.\n";
|
||||
};
|
||||
|
||||
$maint{'memeclean'} = sub
|
||||
{
|
||||
my $dbh = LJ::get_db_writer();
|
||||
|
||||
print "-I- Cleaning memes.\n";
|
||||
my $sth = $dbh->prepare("SELECT statkey FROM stats WHERE statcat='popmeme'");
|
||||
$sth->execute;
|
||||
die $dbh->errstr if $dbh->err;
|
||||
|
||||
while (my $url = $sth->fetchrow_array) {
|
||||
my $copy = $url;
|
||||
LJ::run_hooks("canonicalize_url", \$copy);
|
||||
unless ($copy) {
|
||||
my $d = $dbh->quote($url);
|
||||
$dbh->do("DELETE FROM stats WHERE statcat='popmeme' AND statkey=$d");
|
||||
print " deleting: $url\n";
|
||||
}
|
||||
}
|
||||
print "-I- Done.\n";
|
||||
};
|
||||
|
||||
1;
|
||||
122
livejournal/bin/maint/statspics.pl
Executable file
122
livejournal/bin/maint/statspics.pl
Executable file
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/perl
|
||||
#
|
||||
|
||||
use GD::Graph::bars;
|
||||
|
||||
$maint{'genstatspics'} = sub
|
||||
{
|
||||
my $dbh = LJ::get_db_writer();
|
||||
my $sth;
|
||||
|
||||
### get posts by day data from summary table
|
||||
print "-I- new accounts by day.\n";
|
||||
$sth = $dbh->prepare("SELECT DATE_FORMAT(statkey, '%m-%d') AS 'day', statval AS 'new' FROM stats WHERE statcat='newbyday' ORDER BY statkey DESC LIMIT 60");
|
||||
$sth->execute;
|
||||
if ($dbh->err) { die $dbh->errstr; }
|
||||
|
||||
my @data;
|
||||
my $i;
|
||||
my $max;
|
||||
while ($_ = $sth->fetchrow_hashref)
|
||||
{
|
||||
my $val = $_->{'new'};
|
||||
unshift @{$data[0]}, ($i++ % 5 == 0 ? $_->{'day'} : "");
|
||||
unshift @{$data[1]}, $val;
|
||||
if ($val > $max) { $max = $val; }
|
||||
}
|
||||
|
||||
# posts by day graph
|
||||
my $g = GD::Graph::bars->new(520, 350);
|
||||
$g->set(
|
||||
x_label => 'Day',
|
||||
y_label => 'Accounts',
|
||||
title => 'New accounts per day',
|
||||
tranparent => 0,
|
||||
y_max_value => $max,
|
||||
);
|
||||
|
||||
my $gd = $g->plot(\@data);
|
||||
open(IMG, ">$LJ::HTDOCS/stats/newbyday.png") or die $!;
|
||||
binmode IMG;
|
||||
print IMG $gd->png;
|
||||
close IMG;
|
||||
|
||||
unless ($LJ::DISABLED{'stats-postsbyday'}) {
|
||||
print "-I- posts in last 60 days.\n";
|
||||
|
||||
### suck the data in
|
||||
$sth = $dbh->prepare("SELECT DATE_FORMAT(statkey, '%m-%d') AS 'day', statval AS 'posts' FROM stats WHERE statcat='postsbyday' ORDER BY statkey DESC LIMIT 60");
|
||||
$sth->execute;
|
||||
if ($dbh->err) { die $dbh->errstr; }
|
||||
|
||||
### analyze the last 60 days data
|
||||
|
||||
my @data;
|
||||
my $i;
|
||||
my $max;
|
||||
while ($_ = $sth->fetchrow_hashref)
|
||||
{
|
||||
my $val = $_->{'posts'};
|
||||
unshift @{$data[0]}, ($i++ % 5 == 0 ? $_->{'day'} : "");
|
||||
unshift @{$data[1]}, $val;
|
||||
if ($val > $max) { $max = $val; }
|
||||
}
|
||||
|
||||
# posts by day graph
|
||||
my $g = GD::Graph::bars->new(520, 350);
|
||||
$g->set(
|
||||
x_label => 'Day',
|
||||
y_label => 'Posts',
|
||||
title => 'Posts per day',
|
||||
tranparent => 0,
|
||||
y_max_value => $max,
|
||||
);
|
||||
|
||||
my $gd = $g->plot(\@data);
|
||||
open(IMG, ">$LJ::HTDOCS/stats/postsbyday.png") or die $!;
|
||||
binmode IMG;
|
||||
print IMG $gd->png;
|
||||
close IMG;
|
||||
|
||||
print "-I- posts by week.\n";
|
||||
|
||||
### suck the data in
|
||||
$sth = $dbh->prepare("SELECT DATE_FORMAT(statkey, '%X-%V') AS 'week', SUM(statval) AS 'posts' FROM stats WHERE statcat='postsbyday' AND DATE_FORMAT(statkey, '%X-%V') <> DATE_FORMAT(NOW(), '%X-%V') AND statkey>'1999-06-01' GROUP BY 1 ORDER BY statkey DESC");
|
||||
$sth->execute;
|
||||
if ($dbh->err) { die $dbh->errstr; }
|
||||
|
||||
### analyze the last 60 days data
|
||||
|
||||
my @data;
|
||||
my $i;
|
||||
my $max;
|
||||
while ($_ = $sth->fetchrow_hashref)
|
||||
{
|
||||
my $val = $_->{'posts'};
|
||||
unshift @{$data[0]}, ($i++ % 10 == 0 ? $_->{'week'} : "");
|
||||
unshift @{$data[1]}, $val;
|
||||
if ($val > $max) { $max = $val; }
|
||||
}
|
||||
|
||||
# posts by week graph
|
||||
my $g = GD::Graph::bars->new(520, 350);
|
||||
$g->set(
|
||||
x_label => 'Week',
|
||||
y_label => 'Posts',
|
||||
title => 'Posts per week',
|
||||
tranparent => 0,
|
||||
y_max_value => $max,
|
||||
);
|
||||
|
||||
my $gd = $g->plot(\@data);
|
||||
open(IMG, ">$LJ::HTDOCS/stats/postsbyweek.png") or die $!;
|
||||
binmode IMG;
|
||||
print IMG $gd->png;
|
||||
close IMG;
|
||||
}
|
||||
|
||||
print "-I- done.\n";
|
||||
|
||||
};
|
||||
|
||||
1;
|
||||
513
livejournal/bin/maint/synsuck.pl
Executable file
513
livejournal/bin/maint/synsuck.pl
Executable file
@@ -0,0 +1,513 @@
|
||||
#!/usr/bin/perl
|
||||
#
|
||||
|
||||
use strict;
|
||||
use vars qw(%maint %maintinfo);
|
||||
use lib "$ENV{'LJHOME'}/cgi-bin"; # extra XML::Encoding files in cgi-bin/XML/*
|
||||
use LWP::UserAgent;
|
||||
use XML::RSS;
|
||||
use HTTP::Status;
|
||||
require "ljprotocol.pl";
|
||||
require "parsefeed.pl";
|
||||
require "cleanhtml.pl";
|
||||
|
||||
$maintinfo{'synsuck'}{opts}{locking} = "per_host";
|
||||
$maint{'synsuck'} = sub
|
||||
{
|
||||
my $maxcount = shift || 0;
|
||||
my $verbose = $LJ::LJMAINT_VERBOSE;
|
||||
|
||||
my %child_jobs; # child pid => [ userid, lock ]
|
||||
|
||||
my $process_user = sub {
|
||||
my $urow = shift;
|
||||
return unless $urow;
|
||||
|
||||
my ($user, $userid, $synurl, $lastmod, $etag, $readers) =
|
||||
map { $urow->{$_} } qw(user userid synurl lastmod etag numreaders);
|
||||
|
||||
# we're a child process now, need to invalidate caches and
|
||||
# get a new database handle
|
||||
LJ::start_request();
|
||||
|
||||
my $dbh = LJ::get_db_writer();
|
||||
|
||||
# see if things have changed since we last looked and acquired the lock.
|
||||
# otherwise we could 1) check work, 2) get lock, and between 1 and 2 another
|
||||
# process could do both steps. we don't want to duplicate work already done.
|
||||
my $now_checknext = $dbh->selectrow_array("SELECT checknext FROM syndicated ".
|
||||
"WHERE userid=?", undef, $userid);
|
||||
return if $now_checknext ne $urow->{checknext};
|
||||
|
||||
my $ua = LWP::UserAgent->new("timeout" => 10);
|
||||
my $reader_info = $readers ? "; $readers readers" : "";
|
||||
$ua->agent("$LJ::SITENAME ($LJ::ADMIN_EMAIL; for $LJ::SITEROOT/users/$user/" . $reader_info . ")");
|
||||
|
||||
my $delay = sub {
|
||||
my $minutes = shift;
|
||||
my $status = shift;
|
||||
|
||||
# add some random backoff to avoid waves building up
|
||||
$minutes += int(rand(5));
|
||||
|
||||
$dbh->do("UPDATE syndicated SET lastcheck=NOW(), checknext=DATE_ADD(NOW(), ".
|
||||
"INTERVAL ? MINUTE), laststatus=? WHERE userid=?",
|
||||
undef, $minutes, $status, $userid);
|
||||
};
|
||||
|
||||
print "[$$] Synsuck: $user ($synurl)\n" if $verbose;
|
||||
|
||||
my $req = HTTP::Request->new("GET", $synurl);
|
||||
$req->header('If-Modified-Since', LJ::time_to_http($lastmod))
|
||||
if $lastmod;
|
||||
$req->header('If-None-Match', $etag)
|
||||
if $etag;
|
||||
|
||||
my ($content, $too_big);
|
||||
my $res = $ua->request($req, sub {
|
||||
if (length($content) > 1024*150) { $too_big = 1; return; }
|
||||
$content .= $_[0];
|
||||
}, 4096);
|
||||
if ($too_big) { $delay->(60, "toobig"); return; }
|
||||
|
||||
if ($res->is_error()) {
|
||||
# http error
|
||||
print "HTTP error!\n" if $verbose;
|
||||
|
||||
# overload parseerror here because it's already there -- we'll
|
||||
# never have both an http error and a parse error on the
|
||||
# same request
|
||||
$delay->(3*60, "parseerror");
|
||||
|
||||
LJ::set_userprop($userid, "rssparseerror", $res->status_line());
|
||||
return;
|
||||
}
|
||||
|
||||
# check if not modified
|
||||
if ($res->code() == RC_NOT_MODIFIED) {
|
||||
print " not modified.\n" if $verbose;
|
||||
$delay->($readers ? 60 : 24*60, "notmodified");
|
||||
return;
|
||||
}
|
||||
|
||||
# WARNING: blatant XML spec violation ahead...
|
||||
#
|
||||
# Blogger doesn't produce valid XML, since they don't handle encodings
|
||||
# correctly. So if we see they have no encoding (which is UTF-8 implictly)
|
||||
# but it's not valid UTF-8, say it's Windows-1252, which won't
|
||||
# cause XML::Parser to barf... but there will probably be some bogus characters.
|
||||
# better than nothing I guess. (personally, I'd prefer to leave it broken
|
||||
# and have people bitch at Blogger, but jwz wouldn't stop bugging me)
|
||||
# XML::Parser doesn't include Windows-1252, but we put it in cgi-bin/XML/* for it
|
||||
# to find.
|
||||
my $encoding;
|
||||
if ($content =~ /<\?xml.+?>/ && $& =~ /encoding=([\"\'])(.+?)\1/) {
|
||||
$encoding = lc($2);
|
||||
}
|
||||
if (! $encoding && ! LJ::is_utf8($content)) {
|
||||
$content =~ s/\?>/ encoding='windows-1252' \?>/;
|
||||
}
|
||||
|
||||
# WARNING: another hack...
|
||||
# People produce what they think is iso-8859-1, but they include
|
||||
# Windows-style smart quotes. Check for invalid iso-8859-1 and correct.
|
||||
if ($encoding =~ /^iso-8859-1$/i && $content =~ /[\x80-\x9F]/) {
|
||||
# They claimed they were iso-8859-1, but they are lying.
|
||||
# Assume it was Windows-1252.
|
||||
print "Invalid ISO-8859-1; assuming Windows-1252...\n" if $verbose;
|
||||
$content =~ s/encoding=([\"\'])(.+?)\1/encoding='windows-1252'/;
|
||||
}
|
||||
|
||||
# parsing time...
|
||||
my ($feed, $error) = LJ::ParseFeed::parse_feed($content);
|
||||
if ($error) {
|
||||
# parse error!
|
||||
print "Parse error! $error\n" if $verbose;
|
||||
$delay->(3*60, "parseerror");
|
||||
$error =~ s! at /.*!!;
|
||||
$error =~ s/^\n//; # cleanup of newline at the beggining of the line
|
||||
LJ::set_userprop($userid, "rssparseerror", $error);
|
||||
return;
|
||||
}
|
||||
|
||||
# another sanity check
|
||||
unless (ref $feed->{'items'} eq "ARRAY") {
|
||||
$delay->(3*60, "noitems");
|
||||
return;
|
||||
}
|
||||
|
||||
my @items = reverse @{$feed->{'items'}};
|
||||
|
||||
# take most recent 20
|
||||
splice(@items, 0, @items-20) if @items > 20;
|
||||
|
||||
# delete existing items older than the age which can show on a
|
||||
# friends view.
|
||||
my $su = LJ::load_userid($userid);
|
||||
my $udbh = LJ::get_cluster_master($su);
|
||||
unless ($udbh) {
|
||||
$delay->(15, "nodb");
|
||||
return;
|
||||
}
|
||||
|
||||
# TAG:LOG2:synsuck_delete_olderitems
|
||||
my $secs = ($LJ::MAX_FRIENDS_VIEW_AGE || 3600*24*14)+0; # 2 week default.
|
||||
my $sth = $udbh->prepare("SELECT jitemid, anum FROM log2 WHERE journalid=? AND ".
|
||||
"logtime < DATE_SUB(NOW(), INTERVAL $secs SECOND)");
|
||||
$sth->execute($userid);
|
||||
die $udbh->errstr if $udbh->err;
|
||||
while (my ($jitemid, $anum) = $sth->fetchrow_array) {
|
||||
print "DELETE itemid: $jitemid, anum: $anum... \n" if $verbose;
|
||||
if (LJ::delete_entry($su, $jitemid, 0, $anum)) {
|
||||
print "success.\n" if $verbose;
|
||||
} else {
|
||||
print "fail.\n" if $verbose;
|
||||
}
|
||||
}
|
||||
|
||||
# determine if link tags are good or not, where good means
|
||||
# "likely to be a unique per item". some feeds have the same
|
||||
# <link> element for each item, which isn't good.
|
||||
# if we have unique ids, we don't compare link tags
|
||||
|
||||
my ($compare_links, $have_ids) = 0;
|
||||
{
|
||||
my %link_seen;
|
||||
foreach my $it (@items) {
|
||||
$have_ids = 1 if $it->{'id'};
|
||||
next unless $it->{'link'};
|
||||
$link_seen{$it->{'link'}} = 1;
|
||||
}
|
||||
$compare_links = 1 if !$have_ids and $feed->{'type'} eq 'rss' and
|
||||
scalar(keys %link_seen) == scalar(@items);
|
||||
}
|
||||
|
||||
# if we have unique links/ids, load them for syndicated
|
||||
# items we already have on the server. then, if we have one
|
||||
# already later and see it's changed, we'll do an editevent
|
||||
# instead of a new post.
|
||||
my %existing_item = ();
|
||||
if ($have_ids || $compare_links) {
|
||||
my $p = $have_ids ? LJ::get_prop("log", "syn_id") :
|
||||
LJ::get_prop("log", "syn_link");
|
||||
my $sth = $udbh->prepare("SELECT jitemid, value FROM logprop2 WHERE ".
|
||||
"journalid=? AND propid=? LIMIT 1000");
|
||||
$sth->execute($su->{'userid'}, $p->{'id'});
|
||||
while (my ($itemid, $id) = $sth->fetchrow_array) {
|
||||
$existing_item{$id} = $itemid;
|
||||
}
|
||||
}
|
||||
|
||||
# post these items
|
||||
my $newcount = 0;
|
||||
my $errorflag = 0;
|
||||
my $mindate; # "yyyy-mm-dd hh:mm:ss";
|
||||
my $notedate = sub {
|
||||
my $date = shift;
|
||||
$mindate = $date if ! $mindate || $date lt $mindate;
|
||||
};
|
||||
|
||||
foreach my $it (@items) {
|
||||
|
||||
# remove the SvUTF8 flag. it's still UTF-8, but
|
||||
# we don't want perl knowing that and fucking stuff up
|
||||
# for us behind our back in random places all over
|
||||
# http://zilla.livejournal.org/show_bug.cgi?id=1037
|
||||
foreach my $attr (qw(subject text link)) {
|
||||
$it->{$attr} = pack('C*', unpack('C*', $it->{$attr}));
|
||||
}
|
||||
|
||||
my $dig = LJ::md5_struct($it)->b64digest;
|
||||
my $prevadd = $dbh->selectrow_array("SELECT MAX(dateadd) FROM synitem WHERE ".
|
||||
"userid=? AND item=?", undef,
|
||||
$userid, $dig);
|
||||
if ($prevadd) {
|
||||
$notedate->($prevadd);
|
||||
next;
|
||||
}
|
||||
|
||||
my $now_dateadd = $dbh->selectrow_array("SELECT NOW()");
|
||||
die "unexpected format" unless $now_dateadd =~ /^\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d$/;
|
||||
|
||||
$dbh->do("INSERT INTO synitem (userid, item, dateadd) VALUES (?,?,?)",
|
||||
undef, $userid, $dig, $now_dateadd);
|
||||
$notedate->($now_dateadd);
|
||||
|
||||
$newcount++;
|
||||
print "[$$] $dig - $it->{'subject'}\n" if $verbose;
|
||||
$it->{'text'} =~ s/^\s+//;
|
||||
$it->{'text'} =~ s/\s+$//;
|
||||
|
||||
my $htmllink;
|
||||
if (defined $it->{'link'}) {
|
||||
$htmllink = "<p class='ljsyndicationlink'>" .
|
||||
"<a href='$it->{'link'}'>$it->{'link'}</a></p>";
|
||||
}
|
||||
|
||||
# Show the <guid> link if it's present and different than the
|
||||
# <link>.
|
||||
# [zilla: 267] Patch: Chaz Meyers <lj-zilla@thechaz.net>
|
||||
if ( defined $it->{'id'} && $it->{'id'} ne $it->{'link'}
|
||||
&& $it->{'id'} =~ m!^http://! )
|
||||
{
|
||||
$htmllink .= "<p class='ljsyndicationlink'>" .
|
||||
"<a href='$it->{'id'}'>$it->{'id'}</a></p>";
|
||||
}
|
||||
|
||||
# rewrite relative URLs to absolute URLs, but only invoke the HTML parser
|
||||
# if we see there's some image or link tag, to save us some work if it's
|
||||
# unnecessary (the common case)
|
||||
if ($it->{'text'} =~ /<(?:img|a)\b/i) {
|
||||
# TODO: support XML Base? http://www.w3.org/TR/xmlbase/
|
||||
my $base_href = $it->{'link'} || $synurl;
|
||||
LJ::CleanHTML::resolve_relative_urls(\$it->{'text'}, $base_href);
|
||||
}
|
||||
|
||||
# $own_time==1 means we took the time from the feed rather than localtime
|
||||
my ($own_time, $year, $mon, $day, $hour, $min);
|
||||
|
||||
if ($it->{'time'} &&
|
||||
$it->{'time'} =~ m!^(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d)!) {
|
||||
$own_time = 1;
|
||||
($year, $mon, $day, $hour, $min) = ($1,$2,$3,$4,$5);
|
||||
} else {
|
||||
$own_time = 0;
|
||||
my @now = localtime();
|
||||
($year, $mon, $day, $hour, $min) =
|
||||
($now[5]+1900, $now[4]+1, $now[3], $now[2], $now[1]);
|
||||
}
|
||||
|
||||
my $command = "postevent";
|
||||
my $req = {
|
||||
'username' => $user,
|
||||
'ver' => 1,
|
||||
'subject' => $it->{'subject'},
|
||||
'event' => "$htmllink$it->{'text'}",
|
||||
'year' => $year,
|
||||
'mon' => $mon,
|
||||
'day' => $day,
|
||||
'hour' => $hour,
|
||||
'min' => $min,
|
||||
'props' => {
|
||||
'syn_link' => $it->{'link'},
|
||||
},
|
||||
};
|
||||
$req->{'props'}->{'syn_id'} = $it->{'id'}
|
||||
if $it->{'id'};
|
||||
|
||||
my $flags = {
|
||||
'nopassword' => 1,
|
||||
};
|
||||
|
||||
# if the post contains html linebreaks, assume it's preformatted.
|
||||
if ($it->{'text'} =~ /<(?:p|br)\b/i) {
|
||||
$req->{'props'}->{'opt_preformatted'} = 1;
|
||||
}
|
||||
|
||||
# do an editevent if we've seen this item before
|
||||
my $id = $have_ids ? $it->{'id'} : $it->{'link'};
|
||||
my $old_itemid = $existing_item{$id};
|
||||
if ($id && $old_itemid) {
|
||||
$newcount--; # cancel increment above
|
||||
$command = "editevent";
|
||||
$req->{'itemid'} = $old_itemid;
|
||||
|
||||
# the editevent requires us to resend the date info, which
|
||||
# we have to go fetch first, in case the feed doesn't have it
|
||||
|
||||
# TAG:LOG2:synsuck_fetch_itemdates
|
||||
unless($own_time) {
|
||||
my $origtime =
|
||||
$udbh->selectrow_array("SELECT eventtime FROM log2 WHERE ".
|
||||
"journalid=? AND jitemid=?", undef,
|
||||
$su->{'userid'}, $old_itemid);
|
||||
$origtime =~ /(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d)/;
|
||||
$req->{'year'} = $1;
|
||||
$req->{'mon'} = $2;
|
||||
$req->{'day'} = $3;
|
||||
$req->{'hour'} = $4;
|
||||
$req->{'min'} = $5;
|
||||
}
|
||||
}
|
||||
|
||||
my $err;
|
||||
my $res = LJ::Protocol::do_request($command, $req, \$err, $flags);
|
||||
unless ($res && ! $err) {
|
||||
print " Error: $err\n" if $verbose;
|
||||
$errorflag = 1;
|
||||
}
|
||||
}
|
||||
|
||||
# delete some unneeded synitems. the limit 1000 is because
|
||||
# historically we never deleted and there are accounts with
|
||||
# 222,000 items on a myisam table, and that'd be quite the
|
||||
# delete hit.
|
||||
# the 14 day interval is because if a remote site deleted an
|
||||
# entry, it's possible for the oldest item that was previously
|
||||
# gone to reappear, and we want to protect against that a
|
||||
# little.
|
||||
if ($LJ::SYNITEM_CLEAN) {
|
||||
$dbh->do("DELETE FROM synitem WHERE userid=? AND ".
|
||||
"dateadd < ? - INTERVAL 14 DAY LIMIT 1000",
|
||||
undef, $userid, $mindate);
|
||||
}
|
||||
$dbh->do("UPDATE syndicated SET oldest_ourdate=? WHERE userid=?",
|
||||
undef, $mindate, $userid);
|
||||
|
||||
# bail out if errors, and try again shortly
|
||||
if ($errorflag) {
|
||||
$delay->(30, "posterror");
|
||||
return;
|
||||
}
|
||||
|
||||
# update syndicated account's userinfo if necessary
|
||||
LJ::load_user_props($su, "url", "urlname");
|
||||
{
|
||||
my $title = $feed->{'title'};
|
||||
$title = $su->{'user'} unless LJ::is_utf8($title);
|
||||
$title =~ s/[\n\r]//g;
|
||||
if ($title && $title ne $su->{'name'}) {
|
||||
LJ::update_user($su, { name => $title });
|
||||
LJ::set_userprop($su, "urlname", $title);
|
||||
}
|
||||
|
||||
my $link = $feed->{'link'};
|
||||
if ($link && $link ne $su->{'url'}) {
|
||||
LJ::set_userprop($su, "url", $link);
|
||||
}
|
||||
|
||||
my $des = $feed->{'description'};
|
||||
if ($des) {
|
||||
my $bio;
|
||||
if ($su->{'has_bio'} eq "Y") {
|
||||
$bio = $udbh->selectrow_array("SELECT bio FROM userbio WHERE userid=?", undef,
|
||||
$su->{'userid'});
|
||||
}
|
||||
if ($bio ne $des && $bio !~ /\[LJ:KEEP\]/) {
|
||||
if ($des) {
|
||||
$su->do("REPLACE INTO userbio (userid, bio) VALUES (?,?)", undef,
|
||||
$su->{'userid'}, $des);
|
||||
} else {
|
||||
$su->do("DELETE FROM userbio WHERE userid=?", undef, $su->{'userid'});
|
||||
}
|
||||
LJ::update_user($su, { has_bio => ($des ? "Y" : "N") });
|
||||
LJ::MemCache::delete([$su->{'userid'}, "bio:$su->{'userid'}"]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $r_lastmod = LJ::http_to_time($res->header('Last-Modified'));
|
||||
my $r_etag = $res->header('ETag');
|
||||
|
||||
# decide when to poll next (in minutes).
|
||||
# FIXME: this is super lame. (use hints in RSS file!)
|
||||
my $int = $newcount ? 30 : 60;
|
||||
my $status = $newcount ? "ok" : "nonew";
|
||||
my $updatenew = $newcount ? ", lastnew=NOW()" : "";
|
||||
|
||||
# update reader count while we're changing things, but not
|
||||
# if feed is stale (minimize DB work for inactive things)
|
||||
if ($newcount || ! defined $readers) {
|
||||
$readers = $dbh->selectrow_array("SELECT COUNT(*) FROM friends WHERE ".
|
||||
"friendid=?", undef, $userid);
|
||||
}
|
||||
|
||||
# if readers are gone, don't check for a whole day
|
||||
$int = 60*24 unless $readers;
|
||||
|
||||
$dbh->do("UPDATE syndicated SET checknext=DATE_ADD(NOW(), INTERVAL $int MINUTE), ".
|
||||
"lastcheck=NOW(), lastmod=?, etag=?, laststatus=?, numreaders=? $updatenew ".
|
||||
"WHERE userid=$userid", undef, $r_lastmod, $r_etag, $status, $readers);
|
||||
};
|
||||
|
||||
###
|
||||
### child process management
|
||||
###
|
||||
|
||||
# get the next user to be processed
|
||||
my @all_users;
|
||||
my $get_next_user = sub {
|
||||
return shift @all_users if @all_users;
|
||||
|
||||
# need to get some more rows
|
||||
my $dbh = LJ::get_db_writer();
|
||||
my $current_jobs = join(",", map { $dbh->quote($_->[0]) } values %child_jobs);
|
||||
my $in_sql = " AND u.userid NOT IN ($current_jobs)" if $current_jobs;
|
||||
my $sth = $dbh->prepare("SELECT u.user, s.userid, s.synurl, s.lastmod, " .
|
||||
" s.etag, s.numreaders, s.checknext " .
|
||||
"FROM user u, syndicated s " .
|
||||
"WHERE u.userid=s.userid AND u.statusvis='V' " .
|
||||
"AND s.checknext < NOW()$in_sql " .
|
||||
"ORDER BY RAND() LIMIT 500");
|
||||
$sth->execute;
|
||||
while (my $urow = $sth->fetchrow_hashref) {
|
||||
push @all_users, $urow;
|
||||
}
|
||||
|
||||
return undef unless @all_users;
|
||||
return shift @all_users;
|
||||
};
|
||||
|
||||
# fork and manage child processes
|
||||
my $max_threads = $LJ::SYNSUCK_MAX_THREADS || 1;
|
||||
print "[$$] PARENT -- using $max_threads workers\n" if $verbose;
|
||||
|
||||
my $threads = 0;
|
||||
my $userct = 0;
|
||||
my $keep_forking = 1;
|
||||
while ( $maxcount == 0 || $userct < $maxcount ) {
|
||||
|
||||
if ($threads < $max_threads && $keep_forking) {
|
||||
my $urow = $get_next_user->();
|
||||
unless ($urow) {
|
||||
$keep_forking = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
my $lockname = "synsuck-user-" . $urow->{user};
|
||||
my $lock = LJ::locker()->trylock($lockname);
|
||||
next unless $lock;
|
||||
print "Got lock on '$lockname'. Running\n" if $verbose;
|
||||
|
||||
# spawn a new process
|
||||
if (my $pid = fork) {
|
||||
# we are a parent, nothing to do?
|
||||
$child_jobs{$pid} = [$urow->{'userid'}, $lock];
|
||||
$threads++;
|
||||
$userct++;
|
||||
} else {
|
||||
# handles won't survive the fork
|
||||
LJ::disconnect_dbs();
|
||||
$process_user->($urow);
|
||||
exit 0;
|
||||
}
|
||||
|
||||
# wait for child(ren) to die
|
||||
} else {
|
||||
my $child = wait();
|
||||
last if $child == -1;
|
||||
delete $child_jobs{$child};
|
||||
$threads--;
|
||||
}
|
||||
}
|
||||
|
||||
# Now wait on any remaining children so we don't leave zombies behind.
|
||||
while ( %child_jobs ) {
|
||||
my $child = wait();
|
||||
last if $child == -1;
|
||||
delete $child_jobs{ $child };
|
||||
$threads--;
|
||||
}
|
||||
|
||||
print "[$$] $userct users processed\n" if $verbose;
|
||||
return;
|
||||
};
|
||||
|
||||
1;
|
||||
|
||||
|
||||
# Local Variables:
|
||||
# mode: perl
|
||||
# c-basic-indent: 4
|
||||
# indent-tabs-mode: nil
|
||||
# End:
|
||||
27
livejournal/bin/maint/taskinfo.txt
Executable file
27
livejournal/bin/maint/taskinfo.txt
Executable file
@@ -0,0 +1,27 @@
|
||||
bday.pl:
|
||||
bdaymail - Sends people birthday wishes & notifications
|
||||
|
||||
stats.pl:
|
||||
build_randomuserset - builds/cleans table of users applicable for inclusion in random listings
|
||||
genstats - Generates the nightly statistics
|
||||
genstats_size - Generates the site size stats
|
||||
genstats_weekly - Generates the weekly statistics
|
||||
memeclean - Removes things from meme summary that are excluded by new URL cleaner rules
|
||||
|
||||
statspics.pl:
|
||||
genstatspics - Makes a bunch of graphs to show on the statistics page.
|
||||
|
||||
clean_caches.pl:
|
||||
clean_caches - removes old cache files
|
||||
|
||||
synsuck.pl:
|
||||
synsuck - Polls needed remote, syndicated RSS/etc and updates journals.
|
||||
|
||||
captcha.pl:
|
||||
gen_audio_captchas - Generate any needed new audio challenges.
|
||||
gen_image_captchas - Generate any needed new graphical challenges.
|
||||
clean_captchas - Purge old challenges from the database.
|
||||
|
||||
generic.pl:
|
||||
joinmail - Generates daily email digests for community join requests
|
||||
clean_spamreports - Clean out data from the spamreports table older than 90 days.
|
||||
Reference in New Issue
Block a user