ljr/wcmtools/spud/bin/gatherer

441 lines
14 KiB
Perl
Executable File

#!/usr/bin/perl
#
# Danga's Statistics Gatherer
# Gathers statistics using plugins.
#
# Command line options:
#
# --conf STRING set what config file to use for options
# --server STRING set what server to point at
# --port INT the port of the server to use
# --foreground if present, tell the server to force to the foreground
#
# Configuration file format:
#
# server = STRING location of statistics server
# port = INT port number server listens on
#
# job: plugin(parameter, parameter)
# job2: plugin
# helperplugin(parameter, parameter)
# ...
#
# Copyright 2004, Danga Interactive
#
# Authors:
# Mark Smith <marksmith@danga.com>
#
# License:
# undecided.
#
use strict;
use lib "$ENV{LJHOME}/cgi-bin";
use Getopt::Long;
use POSIX qw(:sys_wait_h);
use Danga::Daemon;
# config
my $path = "$ENV{LJHOME}/bin/spud";
my $plugins_path = "$path/plugins";
# command line config
my %opts;
GetOptions \%opts, qw/ conf=s server=s port=i /;
# reverse mapping; ( pid => job-name )
my %pids;
# mapping of what we expect to die soon
my %todie;
# called if we die
sub fail {
foreach my $pid (keys %pids) {
kill 15, $pid;
}
$_[0] ||= 'no reason provided';
die "shutdown detected: $_[0]\n";
}
# now daemonize
Danga::Daemon::daemonize(
\&worker,
{
interval => 1,
listenport => 13500,
chdir => $path,
shutdowncode => \&fail,
},
);
# ( plugin-name =>
# {
# modtime => int, # last modified time of plugin file (for reload detection)
# children => [ pid, pid, pid, ... ], # pids of workers doing this plugin
# file => str, # filename
# reload => bool, # if on, reload_plugins() will reload this plugin
# registered => bool, # if on, this plugin has registered correctly and is good to go
# code => {
# register => sub { ... },
# worker => sub { ... },
# },
# }
# );
my %plugins;
# ( job-name =>
# {
# plugin => plugin-name,
# respawn_with => plugin-name,
# pid => pid,
# options => [ option, option, option, ... ], # list of options from config file
# reload => bool, # if on, reload_plugins() will reload this job
# active => bool, # if on, this is active per the config file
# }
# );
my %jobs;
# array of plugins that want to help us out; array of [ pluginname, [ opts ] ]
my @helpers;
# cached socket to our stat server
my $sock;
sub set {
# sends a statistic up to the stat server
$sock ||= IO::Socket::INET->new(PeerAddr => $opts{server},
PeerPort => $opts{port},);
fail("can't create socket: $!")
unless $sock;
# send this on up to the server
while (scalar(@_) >= 2) {
$sock->print("QSET $_[0] $_[1]\r\n");
shift @_; shift @_;
}
}
# keeps track of whether or not this is the first worker run
sub worker {
# step 1: check through plugins directory to see what plugins have changed (or are new)
check_plugin_modtimes();
# step 2: check for a changed config file and load/reload as necessary
check_config();
# step 3: now that we know exactly what's changed, call master reloader
reload();
# step 4: run through any defined helpers so they can, well, help
foreach my $helpref (@helpers) {
my ($plugin, $opts) = @$helpref;
next unless $plugins{$plugin} && $plugins{$plugin}->{registered} &&
$plugins{$plugin}->{code}->{helper};
eval {
$plugins{$plugin}->{code}->{helper}->($opts);
};
debug($@) if $@;
}
}
sub check_plugin_modtimes {
opendir DIR, $plugins_path
or fail("Unable to open plugins directory for reading");
foreach my $file (readdir(DIR)) {
next if $file =~ /^\./;
next unless $file =~ /^(.+)\.pl$/;
my $plugin = $1;
# create an empty record if this is the first time we've found this plugin
$plugins{$plugin} ||= {
modtime => 0,
reload => 1,
registered => 0,
children => [],
file => "$plugins_path/$file",
};
# compare modtime and mark for reload if necessary
my $mt = (stat($plugins{$plugin}->{file}))[9];
$plugins{$plugin}->{reload} = 1
if $mt > $plugins{$plugin}->{modtime};
$plugins{$plugin}->{modtime} = $mt;
}
closedir DIR;
}
sub check_config {
fail("Config file not found")
unless -e $opts{conf};
my $mt = (stat($opts{conf}))[9];
$opts{config_modtime} ||= 0;
reload_config()
if $mt > $opts{config_modtime};
$opts{config_modtime} = $mt;
}
sub reload_config {
debug("");
debug("configuration file reloading");
# we mark all jobs as inactive, so they get marked as active below
foreach (keys %jobs) {
$jobs{$_}->{active} = 0
unless $jobs{$_}->{respawn_with};
}
# clear out all helpers, as they should get readded. they aren't in
# separate threads so it doesn't matter if we readd them every time.
@helpers = ();
open FILE, "<$opts{conf}"
or fail("Unable to open config file: $!");
foreach my $line (<FILE>) {
# ignore comments and clean surrounding whitespace
next if $line =~ /^\s*#/;
$line =~ s/^\s+//;
$line =~ s/[\s\r\n]+$//;
next unless length $line;
# shortcut; set some options
if ($line =~ /^(\w+)\s*=\s*(.+)$/) {
$opts{$1} = $2;
next;
}
# extract any options contained in (...) and going to the end of the line
my $optionstr;
if ($line =~ s/(?:\((.*)\))$//) {
$optionstr = $1;
}
my @options =
map { (s/^\s+//, s/\s$//, 1) ? $_ : undef } # now trim whitespace front and back
split(/,/, $optionstr); # split option string on commas
# now see if it has a job + plugin left, or just a plugin
if ($line =~ /^([-\w:.]+)\s*:\s*(\w+)$/) {
# this is a job definition
my ($job, $plugin) = ($1, $2);
fail("Error adding $job to job list")
unless add_job($job, $plugin, \@options);
} elsif ($line =~ /^(\w+)$/) {
# this is just a helper plugin
fail("Plugin $1 not defined")
unless $plugins{$1};
# push name of plugin on helper list
debug("helper from plugin $1 added");
push @helpers, [ $1, \@options ];
} else {
fail("Unknown format: $line($optionstr)");
}
}
close FILE;
debug("configuration file reloaded");
}
# main processor that goes through everything we have and reloads as necessary. this
# also handles reaping our children.
sub reload {
# iterate over any dead children we might have picked up
while ((my $pid = waitpid(-1, WNOHANG)) > 0) {
if (my $job = delete($pids{$pid})) {
if ($jobs{$job}->{active}) {
debug("[$job] dead; pid = $pid; marking to reload; unexpected!");
$jobs{$job}->{reload} = 1;
$jobs{$job}->{pid} = 0;
} else {
debug("[$job] dead; pid = $pid; inactive job, NOT reloading");
delete $jobs{$job};
}
} else {
if (delete $todie{$pid}) {
debug("child death; pid = $pid; expected death, already reloaded");
} else {
debug("ERROR: $pid died but we have no record of it");
}
}
}
# iterate over plugins and reload as necessary
foreach my $plugin (sort keys %plugins) {
next unless $plugins{$plugin}->{reload};
debug("reloading plugin: $plugin");
# now require the file
my $file = $plugins{$plugin}->{file};
unless (my $ret = do $file) {
if ($@) {
warn "couldn't parse $file: $@\n";
} elsif (! defined $ret) {
warn "couldn't do $file: $!\n";
} else {
warn "couldn't run $file\n";
}
next;
}
# now mark any jobs with this plugin to reload
foreach my $job (keys %jobs) {
$jobs{$job}->{reload} = 1
if $jobs{$job}->{plugin} eq $plugin ||
$jobs{$job}->{respawn_with} eq $plugin;
}
}
# now that we know all the plugins are loaded, iterate over jobs so we can get
# the plugins spawned and doing something
foreach my $job (sort keys %jobs) {
my $plugin = $plugins{$jobs{$job}->{plugin}};
fail("can't add job for plugin with no worker code: job = $job; plugin = $jobs{$job}->{plugin}")
unless ref $plugin->{code}->{worker};
# see if we need to kill off this job
unless ($jobs{$job}->{active}) {
debug("killing job: $job");
if ($jobs{$job}->{pid}) {
kill 15, $jobs{$job}->{pid};
} else {
delete $pids{$jobs{$job}->{pid}};
delete $jobs{$job};
}
next;
}
# now, the following path does a reload of this job if necessary
next unless $jobs{$job}->{reload} && $plugin->{registered};
debug("reloading job: $job");
# kill off this child if we had one
if ($jobs{$job}->{pid}) {
kill 15, $jobs{$job}->{pid};
delete $pids{$jobs{$job}->{pid}};
$todie{$jobs{$job}->{pid}} = 1;
debug("[$job] killing child; pid = $jobs{$job}->{pid}");
$jobs{$job}->{pid} = 0;
}
# bust out a child for this job
my $pid = fork;
fail("can't fork child: $!") if !defined $pid;
unless ($pid) {
# child path; do some basic setup and then call the worker
$0 .= " [$jobs{$job}->{plugin}: $job]";
$SIG{INT} = undef; # in case parent is in the foreground
$SIG{TERM} = undef; # no special handling for this
# call the child which should do all the work and return when it's done
$plugin->{code}->{worker}->($job, $jobs{$job}->{options});
# when the above returns, the worker is done, so we exit
exit 0;
}
# if we get here we're a parent, which means we need to mark this child as
# run and that we don't need to do anything more
$jobs{$job}->{pid} = $pid;
$jobs{$job}->{reload} = 0;
$pids{$pid} = $job;
}
}
# called by plugins to let us know that they want to be active. they have to provide a
# certain set of minimum functionality which we use. we also import some things into
# their namespace.
sub register_plugin {
my ($plugin, $package, $opts) = @_;
return unless $plugin && $package && $plugins{$plugin} && $opts;
# make sure they gave us enough functions
unless (ref $opts->{register} && (ref $opts->{worker} || ref $opts->{helper})) {
debug("${plugin} did not provide minimum functionality: register and either worker or helper");
return;
}
# now create some aliases in their package so they can get to debug and set
eval "*${package}::debug = *main::debug;";
eval "*${package}::set = *main::set;";
eval "*${package}::add_job = *main::add_job;";
eval "*${package}::get_var = *main::get_var;";
eval "*${package}::mark_inactive_by_plugin = *main::mark_inactive_by_plugin;";
# call the plugin's register function so that it knows we've acknowledged its presence
unless ($opts->{register}->()) {
debug("${plugin}::register() didn't return true");
return;
}
# done reloading, mark as reloaded (so we don't reload next time)
$plugins{$plugin}->{code} = $opts;
$plugins{$plugin}->{reload} = 0;
$plugins{$plugin}->{registered} = 1;
}
# called by us and by helpers to add jobs to the list. if called by a plugin, $respawn_with
# must be specified and should be set to the name of the plugin. otherwise, this job will
# die the next time the config file is changed.
sub add_job {
my ($job, $plugin, $options, $respawn_with) = @_;
fail("Bad input to add_job: job = $job, plugin = $plugin")
unless $job && $plugin;
$options ||= [];
# now print out debugging info
#debug("found config: $job: $plugin(" . join(', ', @$options) . ")");
# make sure this plugin exists
fail("Plugin $plugin not defined")
unless $plugins{$plugin};
# default %jobs setup
$jobs{$job} ||= {
plugin => $plugin,
pid => 0,
reload => 1,
options => $options,
respawn_with => $respawn_with,
};
$jobs{$job}->{active} = 1; # on unconditionally
# now determine if this job needs reloading
$jobs{$job}->{reload} = 1 unless $jobs{$job}->{pid};
if (scalar(@$options) == scalar(@{$jobs{$job}->{options}})) {
# compare options one by one, reload if any have changed
for (my $i = 0; $i < scalar(@$options); $i++) {
$jobs{$job}->{reload} = 1
if $options->[$i] ne $jobs{$job}->{options}->[$i];
}
} else {
# number of options changed, reload them all
$jobs{$job}->{reload} = 1;
}
# if reload, copy in new options just in case
if ($jobs{$job}->{reload}) {
@{$jobs{$job}->{options}} = @$options;
}
return 1;
}
# called by helpers to mark everything they've spawned as inactive before
# they begin another round of adding jobs. this is basically a way to say
# to the gatherer that a process is dead. if it's not re-added immediately
# by the helper, it gets killed off in the next round of reaping.
sub mark_inactive_by_plugin {
my $plugin = shift;
foreach my $job (keys %jobs) {
$jobs{$job}->{active} = 0
if $jobs{$job}->{respawn_with} eq $plugin;
}
}
# used by plugins to get access to variables set in the config file
sub get_var {
return $opts{$_[0] || ''};
}