ljr/livejournal/cgi-bin/statslib.pl

286 lines
9.1 KiB
Perl
Executable File

#!/usr/bin/perl
#
# Partial Stats
#
use strict;
package LJ::Stats;
%LJ::Stats::INFO = (
# jobname => { type => 'global' || 'clustered',
# jobname => jobname
# statname => statname || [statname1, statname2]
# handler => sub {},
# max_age => age }
);
sub LJ::Stats::register_stat {
my $stat = shift;
return undef unless ref $stat eq 'HASH';
$stat->{'type'} = $stat->{'type'} eq 'clustered' ? 'clustered' : 'global';
return undef unless $stat->{'jobname'};
$stat->{'statname'} ||= $stat->{'jobname'};
return undef unless ref $stat->{'handler'} eq 'CODE';
delete $stat->{'max_age'} unless $stat->{'max_age'} > 0;
# register in master INFO hash
$LJ::Stats::INFO{$stat->{'jobname'}} = $stat;
return 1;
};
sub LJ::Stats::run_stats {
my @stats = @_ ? @_ : sort keys %LJ::Stats::INFO;
foreach my $jobname (@stats) {
my $stat = $LJ::Stats::INFO{$jobname};
# stats calculated on global db reader
if ($stat->{'type'} eq "global") {
unless (LJ::Stats::need_calc($jobname)) {
print "-I- Up-to-date: $jobname\n";
next;
}
# rather than passing an actual db handle to the stat handler,
# just pass a getter subef so it can be revalidated as necessary
my $dbr_getter = sub {
return LJ::Stats::get_db("dbr")
or die "Can't get db reader handle.";
};
print "-I- Running: $jobname\n";
my $res = $stat->{'handler'}->($dbr_getter);
die "Error running '$jobname' handler on global reader."
unless $res;
# 2 cases:
# - 'statname' is an arrayref, %res structure is ( 'statname' => { 'arg' => 'val' } )
# - 'statname' is scalar, %res structure is ( 'arg' => 'val' )
{
if (ref $stat->{'statname'} eq 'ARRAY') {
foreach my $statname (@{$stat->{'statname'}}) {
foreach my $key (keys %{$res->{$statname}}) {
LJ::Stats::save_stat($statname, $key, $res->{$statname}->{$key});
}
}
} else {
my $statname = $stat->{'statname'};
foreach my $key (keys %$res) {
LJ::Stats::save_stat($statname, $key, $res->{$key});
}
}
}
LJ::Stats::save_calc($jobname);
next;
}
# stats calculated per-cluster
if ($stat->{'type'} eq "clustered") {
foreach my $cid (@LJ::CLUSTERS) {
unless (LJ::Stats::need_calc($jobname, $cid)) {
print "-I- Up-to-date: $jobname, cluster $cid\n";
next;
}
# pass a dbcr getter subref so the stat handler knows how
# to revalidate its database handles, by invoking this closure
my $dbcr_getter = sub {
return LJ::Stats::get_db("dbcr", $cid)
or die "Can't get cluster $cid db handle.";
};
print "-I- Running: $jobname, cluster $cid\n";
my $res = $stat->{'handler'}->($dbcr_getter, $cid);
die "Error running '$jobname' handler on cluster $cid."
unless $res;
# 2 cases:
# - 'statname' is an arrayref, %res structure is ( 'statname' => { 'arg' => 'val' } )
# - 'statname' is scalar, %res structure is ( 'arg' => 'val' )
{
if (ref $stat->{'statname'} eq 'ARRAY') {
foreach my $statname (@{$stat->{'statname'}}) {
foreach my $key (keys %{$res->{$statname}}) {
LJ::Stats::save_part($statname, $cid, $key, $res->{$statname}->{$key});
}
}
} else {
my $statname = $stat->{'statname'};
foreach my $key (keys %$res) {
LJ::Stats::save_part($statname, $cid, $key, $res->{$key});
}
}
}
LJ::Stats::save_calc($jobname, $cid);
}
# save the summation(s) of the statname(s) we found above
if (ref $stat->{'statname'} eq 'ARRAY') {
foreach my $statname (@{$stat->{'statname'}}) {
LJ::Stats::save_sum($statname);
}
} else {
LJ::Stats::save_sum($stat->{'statname'});
}
}
}
return 1;
};
# get raw dbr/dbh/cluster handle
sub LJ::Stats::get_db {
my $type = shift;
return undef unless $type;
my $cid = shift;
# tell DBI to revalidate connections before returning them
$LJ::DBIRole->clear_req_cache();
my $opts = {raw=>1,nocache=>1}; # get_dbh opts
# global handles
if ($type eq "dbr") {
my @roles = $LJ::STATS_FORCE_SLOW ? ("slow") : ("slave", "master");
my $db = LJ::get_dbh($opts, @roles);
return $db if $db;
# don't fall back to slave/master if STATS_FORCE_SLOW is on
die "ERROR: Could not get handle for slow database role\n"
if $LJ::STATS_FORCE_SLOW;
return undef;
}
return LJ::get_dbh($opts, 'master')
if $type eq "dbh";
# cluster handles
return undef unless $cid > 0;
return LJ::get_cluster_def_reader($opts, $cid)
if $type eq "dbcm" || $type eq "dbcr";
return undef;
}
# save a given stat to the 'stats' table in the db
sub LJ::Stats::save_stat {
my ($cat, $statkey, $val) = @_;
return undef unless $cat && $statkey && $val;
# replace/insert stats row
my $dbh = LJ::Stats::get_db("dbh");
$dbh->do("REPLACE INTO stats (statcat, statkey, statval) VALUES (?, ?, ?)",
undef, $cat, $statkey, $val);
die $dbh->errstr if $dbh->err;
return 1;
}
# note the last calctime of a given stat
sub LJ::Stats::save_calc {
my ($jobname, $cid) = @_;
return unless $jobname;
my $dbh = LJ::Stats::get_db("dbh");
$dbh->do("REPLACE INTO partialstats (jobname, clusterid, calctime) " .
"VALUES (?,?,UNIX_TIMESTAMP())", undef, $jobname, $cid || 1);
die $dbh->errstr if $dbh->err;
return 1;
}
# save partial stats
sub LJ::Stats::save_part {
my ($statname, $cid, $arg, $value) = @_;
return undef unless $statname && $cid > 0;
# replace/insert partialstats(data) row
my $dbh = LJ::Stats::get_db("dbh");
$dbh->do("REPLACE INTO partialstatsdata (statname, arg, clusterid, value) " .
"VALUES (?,?,?,?)", undef, $statname, $arg, $cid, $value);
die $dbh->errstr if $dbh->err;
return 1;
};
# see if a given stat is stale
sub LJ::Stats::need_calc {
my ($jobname, $cid) = @_;
return undef unless $jobname;
my $dbr = LJ::Stats::get_db("dbr");
my $calctime = $dbr->selectrow_array("SELECT calctime FROM partialstats " .
"WHERE jobname=? AND clusterid=?",
undef, $jobname, $cid || 1);
my $max = $LJ::Stats::INFO{$jobname}->{'max_age'} || 3600*6; # 6 hours default
return ($calctime < time() - $max);
}
# sum up counts for all clusters
sub LJ::Stats::save_sum {
my $statname = shift;
return undef unless $statname;
# get sum of this stat for all clusters
my $dbr = LJ::Stats::get_db("dbr");
my $sth = $dbr->prepare("SELECT arg, SUM(value) FROM partialstatsdata " .
"WHERE statname=? GROUP BY 1");
$sth->execute($statname);
while (my ($arg, $count) = $sth->fetchrow_array) {
next unless $count;
LJ::Stats::save_stat($statname, $arg, $count);
}
return 1;
}
# get number of pages, given a total row count
sub LJ::Stats::num_blocks {
my $row_tot = shift;
return 0 unless $row_tot;
return int($row_tot / $LJ::STATS_BLOCK_SIZE) + (($row_tot % $LJ::STATS_BLOCK_SIZE) ? 1 : 0);
}
# get low/high ids for a BETWEEN query based on page number
sub LJ::Stats::get_block_bounds {
my ($block, $offset) = @_;
return ($offset+0, $offset+$LJ::Stats::BLOCK_SIZE) unless $block;
# calculate min, then add one to not overlap previous max,
# unless there was no previous max so we set to 0 so we don't
# miss rows with id=0
my $min = ($block-1)*$LJ::STATS_BLOCK_SIZE + 1;
$min = $min == 1 ? 0 : $min;
return ($offset+$min, $offset+$block*$LJ::STATS_BLOCK_SIZE);
}
sub LJ::Stats::block_status_line {
my ($block, $total) = @_;
return "" unless $LJ::Stats::VERBOSE;
return "" if $total == 1; # who cares about percentage for one block?
# status line gets called AFTER work is done, so we show percentage
# for $block+1, that way the final line displays 100%
my $pct = sprintf("%.2f", 100*($block / ($total || 1)));
return " [$pct%] Processing block $block of $total.\n";
}
1;