This commit is contained in:
2019-02-06 00:49:12 +03:00
commit 8dbb1bb605
4796 changed files with 506072 additions and 0 deletions

77
wcmtools/gearman/dev/DMap.pm Executable file
View File

@@ -0,0 +1,77 @@
#!/usr/bin/perl
package DMap;
use strict;
use Exporter;
use Storable;
use IO::Socket::INET;
use Gearman::Util;
use vars qw(@ISA @EXPORT);
@ISA = qw(Exporter);
@EXPORT = qw(dmap);
$Storable::Deparse = 1;
$Storable::Eval = 1;
our @js;
sub set_job_servers {
@js = @_;
}
sub dmap (&@) {
my $code = shift;
my $fz = Storable::freeze($code);
my $sock;
foreach (@js) {
$_ .= ":7003" unless /:/;
$sock = IO::Socket::INET->new(PeerAddr => $js[0]);
last if $sock;
}
die "No jobserver available" unless $sock;
my $send = sub {
print $sock Gearman::Util::pack_req_command(@_);
};
my $err;
my $get = sub {
return Gearman::Util::read_res_packet($sock, \$err);;
};
my $argc = scalar @_;
ARG:
foreach (@_) {
$send->("submit_job", join("\0", "dmap", "", Storable::freeze([ $code, $_ ])));
}
my $waiting = $argc;
my %handle; # n -> handle
my $hct = 0;
my %partial_res;
while ($waiting) {
my $res = $get->()
or die "Failure: $err";
if ($res->{type} eq "job_created") {
$handle{$hct} = ${$res->{blobref}};
$hct++;
next;
}
if ($res->{type} eq "work_complete") {
my $br = $res->{blobref};
$$br =~ s/^(.+?)\0//;
my $handle = $1;
$partial_res{$handle} = Storable::thaw($$br);
$waiting--;
}
}
return map { @{ $partial_res{$handle{$_}} } } (0..$argc-1);
}
1;

40
wcmtools/gearman/dev/client.pl Executable file
View File

@@ -0,0 +1,40 @@
#!/usr/bin/perl
use strict;
use Gearman::Util;
use IO::Socket::INET;
use Data::Dumper;
my $sock = IO::Socket::INET->new(PeerAddr => "localhost:7003")
or die "no socket.";
my $send = sub {
print $sock Gearman::Util::pack_req_command(@_);
};
my $err;
my $get = sub {
return Gearman::Util::read_res_packet($sock, \$err);;
};
#$send->("submit_job_bg", join("\0", "add", "", "5,3"));
$send->("get_status", "FOO");
my $res = $get->() or die "no handle";
die "not a status_res packet" unless $res->{type} eq "status_res";
while (1) {
$send->("submit_job", join("\0", "add", "-", "5,3"));
$res = $get->() or die "no handle";
print Dumper($res);
die "not a job_created res" unless $res->{type} eq "job_created";
while ($res = $get->()) {
print "New packet: " . Dumper($res);
}
print "Error: $err\n";
exit 0;
}

View File

@@ -0,0 +1,76 @@
#!/usr/bin/perl
use strict;
use Gearman::Util;
use IO::Socket::INET;
use Data::Dumper;
use Storable;
$Storable::Eval = 1;
my $server = shift;
$server ||= "localhost";
my $sock = IO::Socket::INET->new(PeerAddr => "$server:7003")
or die "no socket.";
my $send = sub {
print $sock Gearman::Util::pack_req_command(@_);
};
my $err;
my $get = sub {
my $res;
while (1) {
$res = Gearman::Util::read_res_packet($sock, \$err);
return undef unless $res;
return $res unless $res->{type} eq "noop";
}
};
$send->("can_do", "dmap");
while (1) {
$send->("grab_job");
my $res = $get->();
die "ERROR: $err\n" unless $res;
print " res.type = $res->{type}\n";
if ($res->{type} eq "error") {
print "ERROR: " . Dumper($res);
exit 0;
}
if ($res->{type} eq "no_job") {
$send->("pre_sleep");
print "Sleeping.\n";
my $rin;
vec($rin, fileno($sock), 1) = 1;
my $nfound = select($rin, undef, undef, 2.0);
print " select returned = $nfound\n";
next;
}
if ($res->{type} eq "job_assign") {
my $ar = $res->{blobref};
die "uh, bogus res" unless
$$ar =~ s/^(.+?)\0(.+?)\0//;
my ($handle, $func) = ($1, $2);
print "GOT JOB: $handle -- $func\n";
if ($func eq "dmap") {
my $rq = Storable::thaw($$ar);
my $code = $rq->[0];
my @val = map { &$code; } $rq->[1];
print "VALS: [@val]\n";
$send->("work_complete", join("\0", $handle, Storable::freeze(\@val)));
}
next;
}
print "RES: ", Dumper($res);
}

13
wcmtools/gearman/dev/dmap.pl Executable file
View File

@@ -0,0 +1,13 @@
#!/usr/bin/perl
use strict;
use DMap;
DMap::set_job_servers("localhost", "sammy", "kenny");
my @foo = dmap { "$_ = " . `hostname` } (1..10);
print "dmap says:\n @foo";

View File

@@ -0,0 +1,23 @@
#!/usr/bin/perl
use strict;
use Gearman::Util;
use IO::Socket::INET;
my $sock = IO::Socket::INET->new(PeerAddr => "localhost:7003")
or die "no socket.";
print $sock "gibberish_cmd\r\n";
my $res = <$sock>;
die "bogus response" unless $res =~ /^ERR unknown_command /;
my $cmd;
my $echo_val = "The time is " . time() . " \r\n and a null\0 is fun.";
print $sock Gearman::Util::pack_req_command("echo_req", $echo_val);
my $err;
my $res = Gearman::Util::read_res_packet($sock, \$err);
use Data::Dumper;
print "ERROR: $err\n";
print Dumper($res);

76
wcmtools/gearman/dev/worker.pl Executable file
View File

@@ -0,0 +1,76 @@
#!/usr/bin/perl
use strict;
use Gearman::Util;
use IO::Socket::INET;
use Data::Dumper;
my $sock = IO::Socket::INET->new(PeerAddr => "localhost:7003")
or die "no socket.";
my $send = sub {
print $sock Gearman::Util::pack_req_command(@_);
};
my $err;
my $get = sub {
my $res;
while (1) {
$res = Gearman::Util::read_res_packet($sock, \$err);
return undef unless $res;
return $res unless $res->{type} eq "noop";
}
};
$send->("can_do", "frob");
$send->("cant_do", "frob");
$send->("can_do", "bar");
$send->("reset_abilities");
$send->("can_do", "add");
while (1) {
$send->("grab_job");
my $res = $get->();
die "ERROR: $err\n" unless $res;
print " res.type = $res->{type}\n";
if ($res->{type} eq "error") {
print "ERROR: " . Dumper($res);
exit 0;
}
if ($res->{type} eq "no_job") {
$send->("pre_sleep");
print "Sleeping.\n";
my $rin;
vec($rin, fileno($sock), 1) = 1;
my $nfound = select($rin, undef, undef, 2.0);
print " select returned = $nfound\n";
next;
}
if ($res->{type} eq "job_assign") {
my $ar = $res->{blobref};
die "uh, bogus res" unless
$$ar =~ s/^(.+)\0(.+)\0//;
my ($handle, $func) = ($1, $2);
print " GOT: handle=$handle, func=$func, args=($$ar)\n";
if ($func eq "add") {
for (1..10) {
$send->("work_status", join("\0", $handle, $_, 10));
select undef, undef, undef, 0.5;
}
my ($n1, $n2) = split(/,/, $$ar);
$send->("work_complete", join("\0", $handle, $n1+$n2));
}
next;
}
print "RES: ", Dumper($res);
}

225
wcmtools/gearman/doc/overview.txt Executable file
View File

@@ -0,0 +1,225 @@
[ WARNING: EXTREMELY PRELIMINARY! ]
GearMan: A distributed job system
Brad Whitaker <whitaker@danga.com>
==================================
TODO: error responses to malformed/unexpected packets?
priorities, expirations of old/irrelevant jobs
upper-layer handling of async system going down + reopulation of jobs
Architecture:
[ job_server_1 ] \
[ job_server_2 ] | <====> application
[ job_server_n ] /
\ | /
-*- persistent tcp connections between job servers/workers
/ | \
[ worker_1 ]
[ worker_2 ]
[ worker_n ]
Guarantees:
1) Each job server will kill dups within that job server (not global)
2) Jobs will be retried as specified, as long as the job server is running
3) Each worker will have exactly one task at a time
... ?
Non-Guarantees:
1) Global duplicate checking is not provided
2) No job is guaranteed to complete
3) Loss of any job server will lose any jobs registered with it
... ?
Work flow:
1) Job server starts up, all workers connect and announce their
2) Application sends job to random job server, noting the job record so it can
be recreated in the future if necessary.
a) Synchronous: Application stays connected and waits for response
b) Asynchronous:
3) Job server handles job:
Possible messages:
[worker => job server]
"here's what i can do."
"goodbye."
"i'm about to sleep."
"got a job for me?"
"i'm 1/100 complete now."
"i've completed my job."
[job server => worker]
"noop." / "wake up."
"here's a job to do."
[application => job server]
"create this new job."
"how far along is this job?"
"is this job finished?"
[job server => application]
"okay (here is its handle)."
"job is 1/100 complete."
"job completed as follows: ..."
Request/Response cycles:
[ worker <=> job server ]
"here's what i can do" => (announcement)
"goodbye" => (announcement)
"i'm about to sleep" => (announcement)
"i'm 1/100 complete now" => (announcement)
"i've completed my job" => (announcement)
"got a job for me?" => "here's a job to do."
[ application <=> job server ]
"create this new job." => "okay (here is its handle)."
"how far along is this job?" => "job is 1/100 complete."
"is this job finished?" => "job completed as follows: ..."
[ job server <=> worker ]
"wake up." => (worker wakes up from sleep)
"here is a job to do" => "i'm 1/100 complete now."
=> "i've completed my job"
[ job server <=> application ]
(only speaks in response to application requests)
Best case conversation example:
worker_n => job_server_n: "got a job for me?"
job_server_n => worker_n: "yes, here is a job i've locked for you"
worker_n => job_server_n: "here is the result"
Worse case:
while ($time < $sleep_threshold) {
for $js (1..n) {
worker => job_server_$js: "got a job for me?"
job_server_$js => worker: "no, sorry"
}
}
worker => all_job_servers: "going to sleep"
[ worker receives wakeup packet ] or [ t seconds elapse ]
worker wakes up and resumes loop
Packet types:
Generic header:
[ 4 byte magic
4 byte packet type
4 byte length ]
Magic:
4 opaque bytes to verify state machine "\0REQ" or "\0RES"
Packet type:
(see Gearman::Util)
Length:
Post-header data length
Properties of a job:
func -- what function name
opaque scalar arg (passed through end-to-end, no interpretation by libraries/server)
uniq key -- for merging (default: don't merge, "-" means merge on opaque scalar)
retry count
fail after time -- treat a timeout as a failure
do job if dequeued and no listeners ("submit_job_bg")
priority ("submit_job_high")
on_* handlers
behavior when there's no worker registered for that job type?
Notes:
-- document whether on_fail gets called on all failures, or just last one, when retry_count is in use
-- document that uniq merging isn't guaranteed, just that it's permitted. if two tasks must not run
at the same time, the task itself needs to do appropriate locking with itself / other tasks.
-- the uniq merging will generally work in practice with multiple Job servers because the client
hashes the (func + opaque_arg) onto the set of servers
Task summary:
1) mail
name => mail
dupkey => '' (don't check dups)
type => async+handle
args => storable MIME::Lite/etc
2) gal resize
name => resize
dupkey => uid-gallid-w-h
type => async+handle
args => storable of images to resize/how
3) thumb pregen
name => thumbgen
dupkey => uid-upicid-w-h
type => async+handle
args => storable of images to resize
4) LJ crons
name => pay_updateaccounts/etc
dupkey => '' (no dup checking)
type => async
args => @ARGV to pass to job?
6) Dirty Flushing
name => dirty
dupkey => friends-uid, backup-uid, etc
type => async+handle
args => none
7) CmdBuffer jobs
name => weblogscom
dupkey => uid
type => async+throw-away
args => none
8) RSS fetching
name => rss
dupkey => uid
type => async+handle
args => none
9) captcha generation
name => captcha
dupkey => dd-hh ? maybe '1' or something
type => async+throw-away
args => none
10) birthday emails
name => bday
dupkey => yyyy-mm-dd
type => async+handle
args => none
11) restart web nodes
-- ask brad about this?

View File

@@ -0,0 +1,303 @@
#!/usr/bin/perl
#TODO: fail_after_idle
package Gearman::Client;
use strict;
use IO::Socket::INET;
use Socket qw(IPPROTO_TCP TCP_NODELAY SOL_SOCKET);
use Gearman::Objects;
use Gearman::Task;
use Gearman::Taskset;
use Gearman::JobStatus;
sub new {
my ($class, %opts) = @_;
my $self = $class;
$self = fields::new($class) unless ref $self;
$self->{job_servers} = [];
$self->{js_count} = 0;
$self->{sock_cache} = {};
$self->job_servers(@{ $opts{job_servers} })
if $opts{job_servers};
return $self;
}
sub new_task_set {
my Gearman::Client $self = shift;
return Gearman::Taskset->new($self);
}
# getter/setter
sub job_servers {
my Gearman::Client $self = shift;
return $self->{job_servers} unless @_;
my $list = [ @_ ];
$self->{js_count} = scalar @$list;
foreach (@$list) {
$_ .= ":7003" unless /:/;
}
return $self->{job_servers} = $list;
}
sub _get_task_from_args {
my Gearman::Task $task;
if (ref $_[0]) {
$task = $_[0];
Carp::croak("Argument isn't a Gearman::Task") unless ref $_[0] eq "Gearman::Task";
} else {
my ($func, $arg_p, $opts) = @_;
my $argref = ref $arg_p ? $arg_p : \$arg_p;
Carp::croak("Function argument must be scalar or scalarref")
unless ref $argref eq "SCALAR";
$task = Gearman::Task->new($func, $argref, $opts);
}
return $task;
}
# given a (func, arg_p, opts?), returns either undef (on fail) or scalarref of result
sub do_task {
my Gearman::Client $self = shift;
my Gearman::Task $task = &_get_task_from_args;
my $ret = undef;
my $did_err = 0;
$task->{on_complete} = sub {
$ret = shift;
};
$task->{on_fail} = sub {
$did_err = 1;
};
my $ts = $self->new_task_set;
$ts->add_task($task);
$ts->wait;
return $did_err ? undef : $ret;
}
# given a (func, arg_p, opts?) or
# Gearman::Task, dispatches job in background. returns the handle from the jobserver, or false if any failure
sub dispatch_background {
my Gearman::Client $self = shift;
my Gearman::Task $task = &_get_task_from_args;
my ($jst, $jss) = $self->_get_random_js_sock;
return 0 unless $jss;
my $req = $task->pack_submit_packet("background");
my $len = length($req);
my $rv = $jss->write($req, $len);
my $err;
my $res = Gearman::Util::read_res_packet($jss, \$err);
return 0 unless $res && $res->{type} eq "job_created";
return "$jst//${$res->{blobref}}";
}
sub get_status {
my Gearman::Client $self = shift;
my $handle = shift;
my ($hostport, $shandle) = split(m!//!, $handle);
return undef unless grep { $hostport eq $_ } @{ $self->{job_servers} };
my $sock = $self->_get_js_sock($hostport)
or return undef;
my $req = Gearman::Util::pack_req_command("get_status",
$shandle);
my $len = length($req);
my $rv = $sock->write($req, $len);
my $err;
my $res = Gearman::Util::read_res_packet($sock, \$err);
return undef unless $res && $res->{type} eq "status_res";
my @args = split(/\0/, ${ $res->{blobref} });
return undef unless $args[0];
shift @args;
$self->_put_js_sock($hostport, $sock);
return Gearman::JobStatus->new(@args);
}
# returns a socket from the cache. it should be returned to the
# cache with _put_js_sock. the hostport isn't verified. the caller
# should verify that $hostport is in the set of jobservers.
sub _get_js_sock {
my Gearman::Client $self = shift;
my $hostport = shift;
if (my $sock = delete $self->{sock_cache}{$hostport}) {
return $sock if $sock->connected;
}
my $sock = IO::Socket::INET->new(PeerAddr => $hostport,
Timeout => 1)
or return undef;
setsockopt($sock, IPPROTO_TCP, TCP_NODELAY, pack("l", 1)) or die;
$sock->autoflush(1);
return $sock;
}
# way for a caller to give back a socket it previously requested.
# the $hostport isn't verified, so the caller should verify the
# $hostport is still in the set of jobservers.
sub _put_js_sock {
my Gearman::Client $self = shift;
my ($hostport, $sock) = @_;
$self->{sock_cache}{$hostport} ||= $sock;
}
sub _get_random_js_sock {
my Gearman::Client $self = shift;
my $getter = shift;
return undef unless $self->{js_count};
$getter ||= sub { my $hostport = shift; return $self->_get_js_sock($hostport); };
my $ridx = int(rand($self->{js_count}));
for (my $try = 0; $try < $self->{js_count}; $try++) {
my $aidx = ($ridx + $try) % $self->{js_count};
my $hostport = $self->{job_servers}[$aidx];
my $sock = $getter->($hostport) or next;
return ($hostport, $sock);
}
return ();
}
1;
__END__
=head1 NAME
Gearman::Client - Client for gearman distributed job system
=head1 SYNOPSIS
use Gearman::Client;
my $client = Gearman::Client->new;
$client->job_servers('127.0.0.1', '10.0.0.1');
# running a single task
my $result_ref = $client->do_task("add", "1+2");
print "1 + 2 = $$result_ref\n";
# waiting on a set of tasks in parallel
my $taskset = $client->new_task_set;
$taskset->add_task( "add" => "1+2", {
on_complete => sub { ... }
});
$taskset->add_task( "divide" => "5/0", {
on_fail => sub { print "divide by zero error!\n"; },
});
$taskset->wait;
=head1 DESCRIPTION
I<Gearman::Client> is a client class for the Gearman distributed job
system, providing a framework for sending jobs to one or more Gearman
servers. These jobs are then distributed out to a farm of workers.
Callers instantiate a I<Gearman::Client> object and from it dispatch
single tasks, sets of tasks, or check on the status of tasks.
=head1 USAGE
=head2 Gearman::Client->new(\%options)
Creates a new I<Gearman::Client> object, and returns the object.
If I<%options> is provided, initializes the new client object with the
settings in I<%options>, which can contain:
=over 4
=item * job_servers
Calls I<job_servers> (see below) to initialize the list of job
servers. Value in this case should be an arrayref.
=back
=head2 $client->job_servers(@servers)
Initializes the client I<$client> with the list of job servers in I<@servers>.
I<@servers> should contain a list of IP addresses, with optional port
numbers. For example:
$client->job_servers('127.0.0.1', '192.168.1.100:7003');
If the port number is not provided, C<7003> is used as the default.
=head2 $client-E<gt>do_task($task)
=head2 $client-E<gt>do_task($funcname, $arg, \%options)
Dispatches a task and waits on the results. May either provide a
L<Gearman::Task> object, or the 3 arguments that the Gearman::Task
constructor takes.
Returns a scalar reference to the result, or undef on failure.
If you provide on_complete and on_fail handlers, they're ignored, as
this function currently overrides them.
=head2 $client-E<gt>dispatch_background($task)
=head2 $client-E<gt>dispatch_background($funcname, $arg, \%options)
Dispatches a task and doesn't wait for the result. Return value
is
=head2 $taskset = $client-E<gt>new_task_set
Creates and returns a new I<Gearman::Taskset> object.
=head2 $taskset-E<gt>add_task($task)
=head2 $taskset-E<gt>add_task($funcname, $arg, $uniq)
=head2 $taskset-E<gt>add_task($funcname, $arg, \%options)
Adds a task to a taskset. Three different calling conventions are
available.
=head2 $taskset-E<gt>wait
Waits for a response from the job server for any of the tasks listed
in the taskset. Will call the I<on_*> handlers for each of the tasks
that have been completed, updated, etc. Doesn't return until
everything has finished running or failing.
=head1 EXAMPLES
=head2 Summation
This is an example client that sends off a request to sum up a list of
integers.
use Gearman::Client;
use Storable qw( freeze );
my $client = Gearman::Client->new;
$client->job_servers('127.0.0.1');
my $tasks = $client->new_task_set;
my $handle = $tasks->add_task(sum => freeze([ 3, 5 ]), {
on_complete => sub { print ${ $_[0] }, "\n" }
});
$tasks->wait;
See the I<Gearman::Worker> documentation for the worker for the I<sum>
function.
=cut

View File

@@ -0,0 +1,20 @@
package Gearman::JobStatus;
use strict;
sub new {
my ($class, $known, $running, $nu, $de) = @_;
undef $nu unless length($nu);
undef $de unless length($de);
my $self = [ $known, $running, $nu, $de ];
bless $self;
return $self;
}
sub known { my $self = shift; return $self->[0]; }
sub running { my $self = shift; return $self->[1]; }
sub progress { my $self = shift; return defined $self->[2] ? [ $self->[2], $self->[3] ] : undef; }
sub percent { my $self = shift; return (defined $self->[2] && $self->[3]) ? ($self->[2] / $self->[3]) : undef; }
1;

View File

@@ -0,0 +1,52 @@
use strict;
package Gearman::Client;
use fields (
'job_servers',
'js_count',
'sock_cache', # hostport -> socket
);
package Gearman::Taskset;
use fields (
'waiting', # { handle => [Task, ...] }
'client', # Gearman::Client
'need_handle', # arrayref
'default_sock', # default socket (non-merged requests)
'default_sockaddr', # default socket's ip/port
'loaned_sock', # { hostport => socket }
);
package Gearman::Task;
use fields (
# from client:
'func',
'argref',
# opts from client:
'uniq',
'on_complete',
'on_fail',
'on_retry',
'on_status',
'retry_count',
'fail_after_idle',
'high_priority',
# from server:
'handle',
# maintained by this module:
'retries_done',
'taskset',
'jssock', # jobserver socket. shared by other tasks in the same taskset,
# but not w/ tasks in other tasksets using the same Gearman::Client
);
1;

View File

@@ -0,0 +1,210 @@
package Gearman::Task;
use strict;
use Carp ();
use String::CRC32 ();
# constructor, given: ($func, $argref, $opts);
sub new {
my $class = shift;
my $self = $class;
$self = fields::new($class) unless ref $self;
$self->{func} = shift
or Carp::croak("No function given");
$self->{argref} = shift || do { my $empty = ""; \$empty; };
Carp::croak("Argref not a scalar reference") unless ref $self->{argref} eq "SCALAR";
my $opts = shift || {};
for my $k (qw( uniq
on_complete on_fail on_retry on_status
retry_count fail_after_idle high_priority
)) {
$self->{$k} = delete $opts->{$k};
}
if (%{$opts}) {
Carp::croak("Unknown option(s): " . join(", ", sort keys %$opts));
}
$self->{retries_done} = 0;
return $self;
}
sub taskset {
my Gearman::Task $task = shift;
# getter
return $task->{taskset} unless @_;
# setter
my Gearman::Taskset $ts = shift;
$task->{taskset} = $ts;
my $merge_on = $task->{uniq} && $task->{uniq} eq "-" ?
$task->{argref} : \ $task->{uniq};
if ($$merge_on) {
my $hash_num = _hashfunc($merge_on);
$task->{jssock} = $ts->_get_hashed_sock($hash_num);
} else {
$task->{jssock} = $ts->_get_default_sock;
}
return $task->{taskset};
}
# returns number in range [0,32767] given a scalarref
sub _hashfunc {
return (String::CRC32::crc32(${ shift() }) >> 16) & 0x7fff;
}
sub pack_submit_packet {
my Gearman::Task $task = shift;
my $is_background = shift;
my $mode = $is_background ?
"submit_job_bg" :
($task->{high_priority} ?
"submit_job_high" :
"submit_job");
return Gearman::Util::pack_req_command($mode,
join("\0", $task->{func}, $task->{uniq}, ${ $task->{argref} }));
}
sub fail {
my Gearman::Task $task = shift;
# try to retry, if we can
if ($task->{retries_done} < $task->{retry_count}) {
$task->{retries_done}++;
$task->{on_retry}->($task->{retries_done}) if $task->{on_retry};
$task->handle(undef);
return $task->{taskset}->add_task($task);
}
return undef unless $task->{on_fail};
$task->{on_fail}->();
return undef;
}
sub complete {
my Gearman::Task $task = shift;
return unless $task->{on_complete};
my $result_ref = shift;
$task->{on_complete}->($result_ref);
}
sub status {
my Gearman::Task $task = shift;
return unless $task->{on_status};
my ($nu, $de) = @_;
$task->{on_status}->($nu, $de);
}
# getter/setter for the fully-qualified handle of form "IP:port//shandle" where
# shandle is an opaque handle specific to the job server running on IP:port
sub handle {
my Gearman::Task $task = shift;
return $task->{handle} unless @_;
return $task->{handle} = shift;
}
1;
__END__
=head1 NAME
Gearman::Task - a task in Gearman, from the point of view of a client
=head1 SYNOPSIS
my $task = Gearman::Task->new("add", "1+2", {
.....
};
$taskset->add_task($task);
$client->do_task($task);
$client->dispatch_background($task);
=head1 DESCRIPTION
I<Gearman::Task> is a Gearman::Client's representation of a task to be
done.
=head1 USAGE
=head2 Gearman::Task->new($func, $arg, \%options)
Creates a new I<Gearman::Task> object, and returns the object.
I<$func> is the function name to be run. (that you have a worker registered to process)
I<$arg> is an opaque scalar or scalarref representing the argument(s)
to pass to the distributed function. If you want to pass multiple
arguments, you must encode them somehow into this one. That's up to
you and your worker.
I<%options> can contain:
=over 4
=item * uniq
A key which indicates to the server that other tasks with the same
function name and key will be merged into one. That is, the task
will be run just once, but all the listeners waiting on that job
will get the response multiplexed back to them.
Uniq may also contain the magic value "-" (a single hyphen) which
means the uniq key is the contents of the args.
=item * on_complete
A subroutine reference to be invoked when the task is completed. The
subroutine will be passed a reference to the return value from the worker
process.
=item * on_fail
A subroutine reference to be invoked when the task fails (or fails for
the last time, if retries were specified). No arguments are
passed to this callback. This callback won't be called after a failure
if more retries are still possible.
=item * on_retry
A subroutine reference to be invoked when the task fails, but is about
to be retried.
Is passed one argument, what retry attempt number this is. (starts with 1)
=item * on_status
A subroutine reference to be invoked if the task emits status updates.
Arguments passed to the subref are ($numerator, $denominator), where those
are left up to the client and job to determine.
=item * retry_count
Number of times job will be retried if there are failures. Defaults to 0.
=item * high_priority
Boolean, whether this job should take priority over other jobs already
enqueued.
=item * fail_after_idle
Automatically fail after this many seconds have elapsed. Defaults to 0,
which means never.
=back
=cut

View File

@@ -0,0 +1,269 @@
package Gearman::Taskset;
use strict;
use Carp ();
use Gearman::Util;
sub new {
my $class = shift;
my Gearman::Client $client = shift;
my $self = $class;
$self = fields::new($class) unless ref $self;
$self->{waiting} = {};
$self->{need_handle} = [];
$self->{client} = $client;
$self->{loaned_sock} = {};
return $self;
}
sub DESTROY {
my Gearman::Taskset $ts = shift;
if ($ts->{default_sock}) {
$ts->{client}->_put_js_sock($ts->{default_sockaddr}, $ts->{default_sock});
}
while (my ($hp, $sock) = each %{ $ts->{loaned_sock} }) {
$ts->{client}->_put_js_sock($hp, $sock);
}
}
sub _get_loaned_sock {
my Gearman::Taskset $ts = shift;
my $hostport = shift;
if (my $sock = $ts->{loaned_sock}{$hostport}) {
return $sock if $sock->connected;
delete $ts->{loaned_sock}{$hostport};
}
my $sock = $ts->{client}->_get_js_sock($hostport);
return $ts->{loaned_sock}{$hostport} = $sock;
}
sub wait {
my Gearman::Taskset $ts = shift;
while (keys %{$ts->{waiting}}) {
$ts->_wait_for_packet();
# TODO: timeout jobs that have been running too long. the _wait_for_packet
# loop only waits 0.5 seconds.
}
}
# ->add_task($func, <$scalar | $scalarref>, <$uniq | $opts_hashref>
# opts:
# -- uniq
# -- on_complete
# -- on_fail
# -- on_status
# -- retry_count
# -- fail_after_idle
# -- high_priority
# ->add_task(Gearman::Task)
#
sub add_task {
my Gearman::Taskset $ts = shift;
my $task;
if (ref $_[0]) {
$task = shift;
} else {
my $func = shift;
my $arg_p = shift; # scalar or scalarref
my $opts = shift; # $uniq or hashref of opts
my $argref = ref $arg_p ? $arg_p : \$arg_p;
unless (ref $opts eq "HASH") {
$opts = { uniq => $opts };
}
$task = Gearman::Task->new($func, $argref, $opts);
}
$task->taskset($ts);
my $req = $task->pack_submit_packet;
my $len = length($req);
my $rv = $task->{jssock}->syswrite($req, $len);
die "Wrote $rv but expected to write $len" unless $rv == $len;
push @{ $ts->{need_handle} }, $task;
while (@{ $ts->{need_handle} }) {
my $rv = $ts->_wait_for_packet($task->{jssock});
if (! $rv) {
shift @{ $ts->{need_handle} }; # ditch it, it failed.
# this will resubmit it if it failed.
print " INITIAL SUBMIT FAILED\n";
return $task->fail;
}
}
return $task->handle;
}
sub _get_default_sock {
my Gearman::Taskset $ts = shift;
return $ts->{default_sock} if $ts->{default_sock};
my $getter = sub {
my $hostport = shift;
return
$ts->{loaned_sock}{$hostport} ||
$ts->{client}->_get_js_sock($hostport);
};
my ($jst, $jss) = $ts->{client}->_get_random_js_sock($getter);
$ts->{loaned_sock}{$jst} ||= $jss;
$ts->{default_sock} = $jss;
$ts->{default_sockaddr} = $jst;
return $jss;
}
sub _get_hashed_sock {
my Gearman::Taskset $ts = shift;
my $hv = shift;
my Gearman::Client $cl = $ts->{client};
for (my $off = 0; $off < $cl->{js_count}; $off++) {
my $idx = ($hv + $off) % ($cl->{js_count});
my $sock = $ts->_get_loaned_sock($cl->{job_servers}[$idx]);
return $sock if $sock;
}
return undef;
}
# returns boolean when given a sock to wait on.
# otherwise, return value is undefined.
sub _wait_for_packet {
my Gearman::Taskset $ts = shift;
my $sock = shift; # optional socket to singularly read from
my ($res, $err);
if ($sock) {
$res = Gearman::Util::read_res_packet($sock, \$err);
return 0 unless $res;
return $ts->_process_packet($res, $sock);
} else {
# TODO: cache this vector?
my ($rin, $rout, $eout);
my %watching;
for my $sock ($ts->{default_sock}, values %{ $ts->{loaned_sock} }) {
next unless $sock;
my $fd = $sock->fileno;
vec($rin, $fd, 1) = 1;
$watching{$fd} = $sock;
}
my $nfound = select($rout=$rin, undef, $eout=$rin, 0.5);
return 0 if ! $nfound;
foreach my $fd (keys %watching) {
next unless vec($rout, $fd, 1);
# TODO: deal with error vector
my $sock = $watching{$fd};
$res = Gearman::Util::read_res_packet($sock, \$err);
$ts->_process_packet($res, $sock) if $res;
}
return 1;
}
}
sub _ip_port {
my $sock = shift;
return undef unless $sock;
my $pn = getpeername($sock) or return undef;
my ($port, $iaddr) = Socket::sockaddr_in($pn);
return Socket::inet_ntoa($iaddr) . ":$port";
}
# note the failure of a task given by its jobserver-specific handle
sub _fail_jshandle {
my Gearman::Taskset $ts = shift;
my $shandle = shift;
my $task_list = $ts->{waiting}{$shandle} or
die "Uhhhh: got work_fail for unknown handle: $shandle\n";
my Gearman::Task $task = shift @$task_list or
die "Uhhhh: task_list is empty on work_fail for handle $shandle\n";
$task->fail;
delete $ts->{waiting}{$shandle} unless @$task_list;
}
sub _process_packet {
my Gearman::Taskset $ts = shift;
my ($res, $sock) = @_;
if ($res->{type} eq "job_created") {
my Gearman::Task $task = shift @{ $ts->{need_handle} } or
die "Um, got an unexpected job_created notification";
my $shandle = ${ $res->{'blobref'} };
my $ipport = _ip_port($sock);
# did sock become disconnected in the meantime?
if (! $ipport) {
$ts->_fail_jshandle($shandle);
return 1;
}
$task->handle("$ipport//$shandle");
push @{ $ts->{waiting}{$shandle} ||= [] }, $task;
return 1;
}
if ($res->{type} eq "work_fail") {
my $shandle = ${ $res->{'blobref'} };
$ts->_fail_jshandle($shandle);
return 1;
}
if ($res->{type} eq "work_complete") {
${ $res->{'blobref'} } =~ s/^(.+?)\0//
or die "Bogus work_complete from server";
my $shandle = $1;
my $task_list = $ts->{waiting}{$shandle} or
die "Uhhhh: got work_complete for unknown handle: $shandle\n";
my Gearman::Task $task = shift @$task_list or
die "Uhhhh: task_list is empty on work_complete for handle $shandle\n";
$task->complete($res->{'blobref'});
delete $ts->{waiting}{$shandle} unless @$task_list;
return 1;
}
if ($res->{type} eq "work_status") {
my ($shandle, $nu, $de) = split(/\0/, ${ $res->{'blobref'} });
my $task_list = $ts->{waiting}{$shandle} or
die "Uhhhh: got work_status for unknown handle: $shandle\n";
# FIXME: the server is (probably) sending a work_status packet for each
# interested client, even if the clients are the same, so probably need
# to fix the server not to do that. just put this FIXME here for now,
# though really it's a server issue.
foreach my Gearman::Task $task (@$task_list) {
$task->status($nu, $de);
}
return 1;
}
die "Unknown/unimplemented packet type: $res->{type}";
}
1;

View File

@@ -0,0 +1,128 @@
package Gearman::Util;
use strict;
# I: to jobserver
# O: out of job server
# W: worker
# C: client of job server
# J : jobserver
our %cmd = (
1 => [ 'I', "can_do" ], # from W: [FUNC]
2 => [ 'I', "cant_do" ], # from W: [FUNC]
3 => [ 'I', "reset_abilities" ], # from W: ---
22 => [ 'I', "set_client_id" ], # W->J: [RANDOM_STRING_NO_WHITESPACE]
4 => [ 'I', "pre_sleep" ], # from W: ---
6 => [ 'O', "noop" ], # J->W ---
7 => [ 'I', "submit_job" ], # C->J FUNC[0]UNIQ[0]ARGS
21 => [ 'I', "submit_job_high" ], # C->J FUNC[0]UNIQ[0]ARGS
18 => [ 'I', "submit_job_bg" ], # C->J " " " " "
8 => [ 'O', "job_created" ], # J->C HANDLE
9 => [ 'I', "grab_job" ], # W->J --
10 => [ 'O', "no_job" ], # J->W --
11 => [ 'O', "job_assign" ], # J->W HANDLE[0]FUNC[0]ARG
12 => [ 'IO', "work_status" ], # W->J/C: HANDLE[0]NUMERATOR[0]DENOMINATOR
13 => [ 'IO', "work_complete" ], # W->J/C: HANDLE[0]RES
14 => [ 'IO', "work_fail" ], # W->J/C: HANDLE
15 => [ 'I', "get_status" ], # C->J: HANDLE
20 => [ 'O', "status_res" ], # C->J: HANDLE[0]KNOWN[0]RUNNING[0]NUM[0]DENOM
16 => [ 'I', "echo_req" ], # ?->J TEXT
17 => [ 'O', "echo_res" ], # J->? TEXT
19 => [ 'O', "error" ], # J->? ERRCODE[0]ERR_TEXT
);
our %num; # name -> num
while (my ($num, $ary) = each %cmd) {
die if $num{$ary->[1]};
$num{$ary->[1]} = $num;
}
sub cmd_name {
my $num = shift;
my $c = $cmd{$num};
return $c ? $c->[1] : undef;
}
sub pack_req_command {
my $type_arg = shift;
my $type = $num{$type_arg} || $type_arg;
die "Bogus type arg of '$type_arg'" unless $type;
my $arg = $_[0] || '';
my $len = length($arg);
return "\0REQ" . pack("NN", $type, $len) . $arg;
}
sub pack_res_command {
my $type_arg = shift;
my $type = int($type_arg) || $num{$type_arg};
die "Bogus type arg of '$type_arg'" unless $type;
my $len = length($_[0]);
return "\0RES" . pack("NN", $type, $len) . $_[0];
}
# returns undef on closed socket or malformed packet
sub read_res_packet {
my $sock = shift;
my $err_ref = shift;
my $buf;
my $rv;
my $err = sub {
my $code = shift;
$$err_ref = $code if ref $err_ref;
return undef;
};
return $err->("malformed_header") unless sysread($sock, $buf, 12) == 12;
my ($magic, $type, $len) = unpack("a4NN", $buf);
return $err->("malformed_magic") unless $magic eq "\0RES";
if ($len) {
$rv = sysread($sock, $buf, $len);
return $err->("short_body") unless $rv == $len;
}
$type = $cmd{$type};
return $err->("bogus_command") unless $type;
return $err->("bogus_command_type") unless index($type->[0], "O") != -1;
return {
'type' => $type->[1],
'len' => $len,
'blobref' => \$buf,
};
}
sub send_req {
my ($sock, $reqref) = @_;
return 0 unless $sock;
my $len = length($$reqref);
#TODO: catch SIGPIPE
my $rv = $sock->syswrite($$reqref, $len);
return 0 unless $rv == $len;
return 1;
}
# given a file descriptor number and a timeout, wait for that descriptor to
# become readable; returns 0 or 1 on if it did or not
sub wait_for_readability {
my ($fileno, $timeout) = @_;
return 0 unless $fileno && $timeout;
my $rin = 0;
vec($rin, $fileno, 1) = 1;
my $nfound = select($rin, undef, undef, $timeout);
# nfound can be undef or 0, both failures, or 1, a success
return $nfound ? 1 : 0;
}
1;

View File

@@ -0,0 +1,366 @@
#!/usr/bin/perl
#TODO: retries?
use strict;
use Gearman::Util;
use Carp ();
use IO::Socket::INET;
# this is the object that's handed to the worker subrefs
package Gearman::Job;
use fields (
'func',
'argref',
'handle',
'jss', # job server's socket
);
sub new {
my ($class, $func, $argref, $handle, $jss) = @_;
my $self = $class;
$self = fields::new($class) unless ref $self;
$self->{func} = $func;
$self->{handle} = $handle;
$self->{argref} = $argref;
$self->{jss} = $jss;
return $self;
}
# ->set_status($numerator, $denominator) : $bool_sent_to_jobserver
sub set_status {
my Gearman::Job $self = shift;
my ($nu, $de) = @_;
my $req = Gearman::Util::pack_req_command("work_status",
join("\0", $self->{handle}, $nu, $de));
return Gearman::Util::send_req($self->{jss}, \$req);
}
sub argref {
my Gearman::Job $self = shift;
return $self->{argref};
}
sub arg {
my Gearman::Job $self = shift;
return ${ $self->{argref} };
}
package Gearman::Worker;
use Socket qw(IPPROTO_TCP TCP_NODELAY SOL_SOCKET PF_INET SOCK_STREAM);
use fields (
'job_servers',
'js_count',
'sock_cache', # host:port -> IO::Socket::INET
'last_connect_fail', # host:port -> unixtime
'down_since', # host:port -> unixtime
'connecting', # host:port -> unixtime connect started at
'can', # func -> subref
'client_id', # random identifer string, no whitespace
);
sub new {
my ($class, %opts) = @_;
my $self = $class;
$self = fields::new($class) unless ref $self;
$self->{job_servers} = [];
$self->{js_count} = 0;
$self->{sock_cache} = {};
$self->{last_connect_fail} = {};
$self->{down_since} = {};
$self->{can} = {};
$self->{client_id} = join("", map { chr(int(rand(26)) + 97) } (1..30));
$self->job_servers(@{ $opts{job_servers} })
if $opts{job_servers};
return $self;
}
sub _get_js_sock {
my Gearman::Worker $self = shift;
my $ipport = shift;
if (my $sock = $self->{sock_cache}{$ipport}) {
return $sock if getpeername($sock);
delete $self->{sock_cache}{$ipport};
}
my $now = time;
my $down_since = $self->{down_since}{$ipport};
if ($down_since) {
my $down_for = $now - $down_since;
my $retry_period = $down_for > 60 ? 30 : (int($down_for / 2) + 1);
if ($self->{last_connect_fail}{$ipport} > $now - $retry_period) {
return undef;
}
}
return undef unless $ipport =~ /(^\d+\..+):(\d+)/;
my ($ip, $port) = ($1, $2);
my $sock;
socket $sock, PF_INET, SOCK_STREAM, IPPROTO_TCP;
#IO::Handle::blocking($sock, 0);
connect $sock, Socket::sockaddr_in($port, Socket::inet_aton($ip));
#my $sock = IO::Socket::INET->new(PeerAddr => $ip,
# Timeout => 1);
unless ($sock) {
$self->{down_since}{$ipport} ||= $now;
$self->{last_connect_fail}{$ipport} = $now;
return undef;
}
delete $self->{last_connect_fail}{$ipport};
delete $self->{down_since}{$ipport};
$sock->autoflush(1);
setsockopt($sock, IPPROTO_TCP, TCP_NODELAY, pack("l", 1)) or die;
$self->{sock_cache}{$ipport} = $sock;
my $cid_req = Gearman::Util::pack_req_command("set_client_id", $self->{client_id});
Gearman::Util::send_req($sock, \$cid_req);
# get this socket's state caught-up
foreach my $func (keys %{$self->{can}}) {
unless (_set_capability($sock, $func, 1)) {
delete $self->{sock_cache}{$ipport};
return undef;
}
}
return $sock;
}
sub _set_capability {
my ($sock, $func, $can) = @_;
my $req = Gearman::Util::pack_req_command($can ? "can_do" : "cant_do",
$func);
return Gearman::Util::send_req($sock, \$req);
}
# tell all the jobservers that this worker can't do anything
sub reset_abilities {
my Gearman::Worker $self = shift;
my $req = Gearman::Util::pack_req_command("reset_abilities");
foreach my $js (@{ $self->{job_servers} }) {
my $jss = $self->_get_js_sock($js);
unless (Gearman::Util::send_req($jss, \$req)) {
delete $self->{sock_cache}{$js};
}
}
$self->{can} = {};
}
# does one job and returns. no return value.
sub work {
my Gearman::Worker $self = shift;
my $grab_req = Gearman::Util::pack_req_command("grab_job");
my $presleep_req = Gearman::Util::pack_req_command("pre_sleep");
my %fd_map;
while (1) {
my @jss;
my $need_sleep = 1;
foreach my $js (@{ $self->{job_servers} }) {
my $jss = $self->_get_js_sock($js)
or next;
unless (Gearman::Util::send_req($jss, \$grab_req) &&
Gearman::Util::wait_for_readability($jss->fileno, 0.50)) {
delete $self->{sock_cache}{$js};
next;
}
push @jss, [$js, $jss];
my ($res, $err);
do {
$res = Gearman::Util::read_res_packet($jss, \$err);
} while ($res && $res->{type} eq "noop");
next unless $res;
if ($res->{type} eq "no_job") {
next;
}
die "Uh, wasn't expecting a $res->{type} packet" unless $res->{type} eq "job_assign";
${ $res->{'blobref'} } =~ s/^(.+?)\0(.+?)\0//
or die "Uh, regexp on job_assign failed";
my ($handle, $func) = ($1, $2);
my $job = Gearman::Job->new($func, $res->{'blobref'}, $handle, $jss);
my $handler = $self->{can}{$func};
my $ret = eval { $handler->($job); };
my $work_req;
if (defined $ret) {
$work_req = Gearman::Util::pack_req_command("work_complete", "$handle\0" . (ref $ret ? $$ret : $ret));
} else {
$work_req = Gearman::Util::pack_req_command("work_fail", $handle);
}
unless (Gearman::Util::send_req($jss, \$work_req)) {
delete $self->{sock_cache}{$js};
}
return;
}
if ($need_sleep) {
my $wake_vec = 0;
foreach my $j (@jss) {
my ($js, $jss) = @$j;
unless (Gearman::Util::send_req($jss, \$presleep_req)) {
delete $self->{sock_cache}{$js};
next;
}
my $fd = $jss->fileno;
vec($wake_vec, $fd, 1) = 1;
}
# chill for some arbitrary time until we're woken up again
select($wake_vec, undef, undef, 10);
}
}
}
sub register_function {
my Gearman::Worker $self = shift;
my $func = shift;
my $subref = shift;
my $req = Gearman::Util::pack_req_command("can_do", $func);
foreach my $js (@{ $self->{job_servers} }) {
my $jss = $self->_get_js_sock($js);
unless (Gearman::Util::send_req($jss, \$req)) {
delete $self->{sock_cache}{$js};
}
}
$self->{can}{$func} = $subref;
}
# getter/setter
sub job_servers {
my Gearman::Worker $self = shift;
return $self->{job_servers} unless @_;
my $list = [ @_ ];
$self->{js_count} = scalar @$list;
foreach (@$list) {
$_ .= ":7003" unless /:/;
}
return $self->{job_servers} = $list;
}
1;
__END__
=head1 NAME
Gearman::Worker - Worker for gearman distributed job system
=head1 SYNOPSIS
use Gearman::Worker;
my $worker = Gearman::Worker->new;
$worker->job_servers('127.0.0.1');
$worker->register_function($funcname => $subref);
$worker->work while 1;
=head1 DESCRIPTION
I<Gearman::Worker> is a worker class for the Gearman distributed job system,
providing a framework for receiving and serving jobs from a Gearman server.
Callers instantiate a I<Gearman::Worker> object, register a list of functions
and capabilities that they can handle, then enter an event loop, waiting
for the server to send jobs.
The worker can send a return value back to the server, which then gets
sent back to the client that requested the job; or it can simply execute
silently.
=head1 USAGE
=head2 Gearman::Worker->new(\%options)
Creates a new I<Gearman::Worker> object, and returns the object.
If I<%options> is provided, initializes the new worker object with the
settings in I<%options>, which can contain:
=over 4
=item * job_servers
Calls I<job_servers> (see below) to initialize the list of job servers.
=back
=head2 $worker->job_servers(@servers)
Initializes the worker I<$worker> with the list of job servers in I<@servers>.
I<@servers> should contain a list of IP addresses, with optional port numbers.
For example:
$worker->job_servers('127.0.0.1', '192.168.1.100:7003');
If the port number is not provided, 7003 is used as the default.
=head2 $worker->register_function($funcname, $subref)
Registers the function I<$funcname> as being provided by the worker
I<$worker>, and advertises these capabilities to all of the job servers
defined in this worker.
I<$subref> must be a subroutine reference that will be invoked when the
worker receives a request for this function. It will be passed a
I<Gearman::Job> object representing the job that has been received by the
worker.
The subroutine reference can return a return value, which will be sent back
to the job server.
=head2 Gearman::Job->arg
Returns the scalar argument that the client sent to the job server.
=head2 Gearman::Job->set_status($numerator, $denominator)
Updates the status of the job (most likely, a long-running job) and sends
it back to the job server. I<$numerator> and I<$denominator> should
represent the percentage completion of the job.
=head1 EXAMPLES
=head2 Summation
This is an example worker that receives a request to sum up a list of
integers.
use Gearman::Worker;
use Storable qw( thaw );
use List::Util qw( sum );
my $worker = Gearman::Worker->new;
$worker->job_servers('127.0.0.1');
$worker->register_function(sum => sub { sum @{ thaw($_[0]->arg) } });
$worker->work while 1;
See the I<Gearman::Client> documentation for a sample client sending the
I<sum> job.
=cut

694
wcmtools/gearman/server/gearmand Executable file
View File

@@ -0,0 +1,694 @@
#!/usr/bin/perl
#
# Gearman
#
# Status: 2005-04-13
#
# Copyright 2005, Danga Interactive
#
# Authors:
# Brad Fitzpatrick <brad@danga.com>
# Brad Whitaker <whitaker@danga.com>
#
# License:
# terms of Perl itself.
#
use strict;
use Getopt::Long;
use Carp;
use Danga::Socket;
use IO::Socket::INET;
use POSIX ();
use lib '../lib';
use Gearman::Util;
use vars qw($DEBUG);
$DEBUG = 0;
my (
$daemonize,
$nokeepalive,
);
my $conf_port = 7003;
Getopt::Long::GetOptions(
'd|daemon' => \$daemonize,
'p|port=i' => \$conf_port,
'debug=i' => \$DEBUG,
);
daemonize() if $daemonize;
use Socket qw(IPPROTO_TCP TCP_NODELAY SOL_SOCKET);
$SIG{'PIPE'} = "IGNORE"; # handled manually
# establish SERVER socket, bind and listen.
my $server = IO::Socket::INET->new(LocalPort => $conf_port,
Type => SOCK_STREAM,
Proto => IPPROTO_TCP,
Blocking => 0,
Reuse => 1,
Listen => 10 )
or die "Error creating socket: $@\n";
# Not sure if I'm crazy or not, but I can't see in strace where/how
# Perl 5.6 sets blocking to 0 without this. In Perl 5.8, IO::Socket::INET
# obviously sets it from watching strace.
IO::Handle::blocking($server, 0);
my $accept_handler = sub {
my $csock = $server->accept();
return unless $csock;
printf("Listen child making a Client for %d.\n", fileno($csock))
if $DEBUG;
IO::Handle::blocking($csock, 0);
setsockopt($csock, IPPROTO_TCP, TCP_NODELAY, pack("l", 1)) or die;
my $client = Client->new($csock);
$client->watch_read(1);
};
Client->OtherFds(fileno($server) => $accept_handler);
sub daemonize {
my($pid, $sess_id, $i);
## Fork and exit parent
if ($pid = fork) { exit 0; }
## Detach ourselves from the terminal
croak "Cannot detach from controlling terminal"
unless $sess_id = POSIX::setsid();
## Prevent possibility of acquiring a controling terminal
$SIG{'HUP'} = 'IGNORE';
if ($pid = fork) { exit 0; }
## Change working directory
chdir "/";
## Clear file creation mask
umask 0;
## Close open file descriptors
close(STDIN);
close(STDOUT);
close(STDERR);
## Reopen stderr, stdout, stdin to /dev/null
open(STDIN, "+>/dev/null");
open(STDOUT, "+>&STDIN");
open(STDERR, "+>&STDIN");
}
#####################################################################
### Job definition
package Job;
use Sys::Hostname;
use fields (
'func',
'uniq',
'argref',
'listeners', # arrayref of interested Clients
'worker',
'handle',
'status', # [1, 100]
'require_listener',
);
our $handle_ct = 0;
our $handle_base = "H:" . hostname() . ":";
our %job_queue; # job_name -> [Job, Job*] (key only exists if non-empty)
our %jobOfHandle; # handle -> Job
our %jobOfUniq; # func -> uniq -> Job
#####################################################################
### Client definition
package Client;
use Danga::Socket;
use base 'Danga::Socket';
use fields (
'can_do', # { $job_name => 1 }
'can_do_list',
'can_do_iter',
'read_buf',
'sleeping', # 0/1: they've said they're sleeping and we haven't woken them up
'doing', # { $job_handle => Job }
'client_id', # opaque string, no whitespace. workers give this so checker scripts
# can tell apart the same worker connected to multiple jobservers.
);
#####################################################################
### J O B C L A S S
#####################################################################
package Job;
sub new {
my Job $self = shift;
my ($func, $uniq, $argref, $highpri) = @_;
$self = fields::new($self) unless ref $self;
# if they specified a uniq, see if we have a dup job running already
# to merge with
if (length($uniq)) {
# a unique value of "-" means "use my args as my unique key"
$uniq = $$argref if $uniq eq "-";
if ($jobOfUniq{$func} && $jobOfUniq{$func}{$uniq}) {
# found a match
return $jobOfUniq{$func}{$uniq};
} else {
# create a new key
$jobOfUniq{$func} ||= {};
$jobOfUniq{$func}{$uniq} = $self;
}
}
$self->{'func'} = $func;
$self->{'uniq'} = $uniq;
$self->{'require_listener'} = 1;
$self->{'argref'} = $argref;
$self->{'listeners'} = [];
$handle_ct++;
$self->{'handle'} = $handle_base . $handle_ct;
my $jq = ($job_queue{$func} ||= []);
if ($highpri) {
unshift @$jq, $self;
} else {
push @$jq, $self;
}
$jobOfHandle{$self->{'handle'}} = $self;
return $self;
}
sub Grab {
my ($class, $func) = @_;
return undef unless $job_queue{$func};
my $empty = sub {
delete $job_queue{$func};
return undef;
};
my Job $job;
while (1) {
$job = shift @{$job_queue{$func}};
return $empty->() unless $job;
return $job unless $job->{require_listener};
foreach my Client $c (@{$job->{listeners}}) {
return $job unless $c->{closed};
}
$job->note_finished(0);
}
}
sub GetByHandle {
my ($class, $handle) = @_;
return $jobOfHandle{$handle};
}
sub add_listener {
my Job $self = shift;
my Client $li = shift;
push @{$self->{listeners}}, $li;
}
sub relay_to_listeners {
my Job $self = shift;
foreach my Client $c (@{$self->{listeners}}) {
next if $c->{closed};
$c->write($_[0]);
}
}
sub note_finished {
my Job $self = shift;
my $success = shift;
if (length($self->{uniq})) {
delete $jobOfUniq{$self->{func}}{$self->{uniq}};
}
delete $jobOfHandle{$self->{handle}};
}
# accessors:
sub worker {
my Job $self = shift;
return $self->{'worker'} unless @_;
return $self->{'worker'} = shift;
}
sub require_listener {
my Job $self = shift;
return $self->{'require_listener'} unless @_;
return $self->{'require_listener'} = shift;
}
# takes arrayref of [numerator,denominator]
sub status {
my Job $self = shift;
return $self->{'status'} unless @_;
return $self->{'status'} = shift;
}
sub handle {
my Job $self = shift;
return $self->{'handle'};
}
sub func {
my Job $self = shift;
return $self->{'func'};
}
sub argref {
my Job $self = shift;
return $self->{'argref'};
}
#####################################################################
### C L I E N T C L A S S
#####################################################################
package Client;
our %sleepers; # func -> [ sleepers ] (wiped on wakeup)
our %client_map; # fd -> Client object
# Class Method:
sub new {
my Client $self = shift;
$self = fields::new($self) unless ref $self;
$self->SUPER::new( @_ );
$self->{read_buf} = '';
$self->{sleeping} = 0;
$self->{can_do} = {};
$self->{doing} = {}; # handle -> Job
$self->{can_do_list} = [];
$self->{can_do_iter} = 0; # numeric iterator for where we start looking for jobs
$self->{client_id} = "-";
$client_map{$self->{fd}} = $self;
return $self;
}
# Class Method:
sub WakeUpSleepers {
my ($class, $func) = @_;
return unless $sleepers{$func};
my Client $c;
foreach $c (@{$sleepers{$func}}) {
next if $c->{closed} || ! $c->{sleeping};
$c->res_packet("noop");
$c->{sleeping} = 0;
}
delete $sleepers{$func};
return;
}
sub close {
my Client $self = shift;
while (my ($handle, $job) = each %{$self->{doing}}) {
my $msg = Gearman::Util::pack_res_command("work_fail", $handle);
$job->relay_to_listeners($msg);
$job->note_finished(0);
}
delete $client_map{$self->{fd}};
$self->CMD_reset_abilities;
$self->SUPER::close;
}
# Client
sub event_read {
my Client $self = shift;
my $bref = $self->read(1024);
return $self->close unless defined $bref;
$self->{read_buf} .= $$bref;
my $found_cmd;
do {
$found_cmd = 1;
my $blen = length($self->{read_buf});
if ($self->{read_buf} =~ /^\0REQ(.{8,8})/s) {
my ($cmd, $len) = unpack("NN", $1);
if ($blen < $len + 12) {
# not here yet.
$found_cmd = 0;
return;
}
$self->process_cmd($cmd, substr($self->{read_buf}, 12, $len));
# and slide down buf:
$self->{read_buf} = substr($self->{read_buf}, 12+$len);
} elsif ($self->{read_buf} =~ s/^(\w.+?)?\r?\n//) {
# ASCII command case (useful for telnetting in)
my $line = $1;
$self->process_line($line);
} else {
$found_cmd = 0;
}
} while ($found_cmd);
}
# line-based commands
sub process_line {
my Client $self = shift;
my $line = shift;
if ($line =~ /^(\w+)\s*(.*)/) {
my ($cmd, $args) = ($1, $2);
$cmd = lc($cmd);
no strict 'refs';
my $cmd_handler = *{"TXTCMD_$cmd"}{CODE};
if ($cmd_handler) {
my $args = decode_url_args(\$args);
$cmd_handler->($self, $args);
next;
}
}
return $self->err_line('unknown_command');
}
sub TXTCMD_workers {
my Client $self = shift;
my $args = shift;
foreach my $fd (sort { $a <=> $b } keys %client_map) {
my Client $cl = $client_map{$fd};
$self->write("$fd " . $cl->peer_ip_string . " $cl->{client_id} : @{$cl->{can_do_list}}\n");
}
$self->write(".\n");
}
sub CMD_echo_req {
my Client $self = shift;
my $blobref = shift;
return $self->res_packet("echo_res", $$blobref);
}
sub CMD_work_status {
my Client $self = shift;
my $ar = shift;
my ($handle, $nu, $de) = split(/\0/, $$ar);
my $job = $self->{doing}{$handle};
return $self->error_packet("not_worker") unless $job && $job->worker == $self;
my $msg = Gearman::Util::pack_res_command("work_status", $$ar);
$job->relay_to_listeners($msg);
$job->status([$nu, $de]);
return 1;
}
sub CMD_work_complete {
my Client $self = shift;
my $ar = shift;
$$ar =~ s/^(.+?)\0//;
my $handle = $1;
my $job = delete $self->{doing}{$handle};
return $self->error_packet("not_worker") unless $job && $job->worker == $self;
my $msg = Gearman::Util::pack_res_command("work_complete", join("\0", $handle, $$ar));
$job->relay_to_listeners($msg);
$job->note_finished(1);
return 1;
}
sub CMD_work_fail {
my Client $self = shift;
my $ar = shift;
my $handle = $$ar;
my $job = delete $self->{doing}{$handle};
return $self->error_packet("not_worker") unless $job && $job->worker == $self;
my $msg = Gearman::Util::pack_res_command("work_fail", $handle);
$job->relay_to_listeners($msg);
$job->note_finished(1);
return 1;
}
sub CMD_pre_sleep {
my Client $self = shift;
$self->{'sleeping'} = 1;
foreach my $cd (@{$self->{can_do_list}}) {
# immediately wake the sleeper up if there are things to be done
if ($job_queue{$cd}) {
$self->res_packet("noop");
$self->{sleeping} = 0;
return;
}
push @{$sleepers{$cd} ||= []}, $self;
}
return 1;
}
sub CMD_grab_job {
my Client $self = shift;
my $job;
my $can_do_size = scalar @{$self->{can_do_list}};
unless ($can_do_size) {
$self->res_packet("no_job");
return;
}
# the offset where we start asking for jobs, to prevent starvation
# of some job types.
$self->{can_do_iter} = ($self->{can_do_iter} + 1) % $can_do_size;
my $tried = 0;
while ($tried < $can_do_size) {
my $idx = ($tried + $self->{can_do_iter}) % $can_do_size;
$tried++;
my $job_to_grab = $self->{can_do_list}->[$idx];
$job = Job->Grab($job_to_grab);
if ($job) {
$job->worker($self);
$self->{doing}{$job->handle} = $job;
return $self->res_packet("job_assign",
join("\0",
$job->handle,
$job->func,
${$job->argref},
));
}
}
$self->res_packet("no_job");
}
sub CMD_can_do {
my Client $self = shift;
my $ar = shift;
$self->{can_do}->{$$ar} = 1;
$self->_setup_can_do_list;
}
sub CMD_set_client_id {
my Client $self = shift;
my $ar = shift;
$self->{client_id} = $$ar;
$self->{client_id} =~ s/\s+//g;
$self->{client_id} = "-" unless length $self->{client_id};
}
sub CMD_cant_do {
my Client $self = shift;
my $ar = shift;
delete $self->{can_do}->{$$ar};
$self->_setup_can_do_list;
}
sub CMD_get_status {
my Client $self = shift;
my $ar = shift;
my $job = Job->GetByHandle($$ar);
# handles can't contain nulls
return if $$ar =~ /\0/;
my ($known, $running, $num, $den);
$known = 0;
$running = 0;
if ($job) {
$known = 1;
$running = $job->worker ? 1 : 0;
if (my $stat = $job->status) {
($num, $den) = @$stat;
}
}
$self->res_packet("status_res", join("\0",
$$ar,
$known,
$running,
$num,
$den));
}
sub CMD_reset_abilities {
my Client $self = shift;
$self->{can_do} = {};
$self->_setup_can_do_list;
}
sub _setup_can_do_list {
my Client $self = shift;
$self->{can_do_list} = [ keys %{$self->{can_do}} ];
$self->{can_do_iter} = 0;
}
sub CMD_submit_job { push @_, 1; &_cmd_submit_job; }
sub CMD_submit_job_bg { push @_, 0; &_cmd_submit_job; }
sub CMD_submit_job_high { push @_, 1, 1; &_cmd_submit_job; }
sub _cmd_submit_job {
my Client $self = shift;
my $ar = shift;
my $subscribe = shift;
my $high_pri = shift;
return $self->error_packet("invalid_args", "No func/uniq header [$$ar].")
unless $$ar =~ s/^(.+?)\0(.*?)\0//;
my ($func, $uniq) = ($1, $2);
my $job = Job->new($func, $uniq, $ar, $high_pri);
if ($subscribe) {
$job->add_listener($self);
} else {
# background mode
$job->require_listener(0);
}
$self->res_packet("job_created", $job->handle);
Client->WakeUpSleepers($func);
}
sub res_packet {
my Client $self = shift;
my ($code, $arg) = @_;
$self->write(Gearman::Util::pack_res_command($code, $arg));
return 1;
}
sub error_packet {
my Client $self = shift;
my ($code, $msg) = @_;
$self->write(Gearman::Util::pack_res_command("error", "$code\0$msg"));
return 0;
}
sub process_cmd {
my Client $self = shift;
my $cmd = shift;
my $blob = shift;
my $cmd_name = "CMD_" . Gearman::Util::cmd_name($cmd);
my $ret = eval {
$self->$cmd_name(\$blob);
};
return $ret unless $@;
print "Error: $@\n";
return $self->error_packet("server_error", $@);
}
# Client
sub event_err { my $self = shift; $self->close; }
sub event_hup { my $self = shift; $self->close; }
sub err_line {
my Client $self = shift;
my $err_code = shift;
my $err_text = {
'unknown_command' => "Unknown server command",
}->{$err_code};
$self->write("ERR $err_code " . eurl($err_text) . "\r\n");
return 0;
}
sub eurl
{
my $a = $_[0];
$a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg;
$a =~ tr/ /+/;
return $a;
}
sub durl
{
my ($a) = @_;
$a =~ tr/+/ /;
$a =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
return $a;
}
sub decode_url_args
{
my $a = shift;
my $buffer = ref $a ? $a : \$a;
my $ret = {};
my $pair;
my @pairs = split(/&/, $$buffer);
my ($name, $value);
foreach $pair (@pairs)
{
($name, $value) = split(/=/, $pair);
$value =~ tr/+/ /;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$name =~ tr/+/ /;
$name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$ret->{$name} .= $ret->{$name} ? "\0$value" : $value;
}
return $ret;
}
package main;
Client->EventLoop();
# Local Variables:
# mode: perl
# c-basic-indent: 4
# indent-tabs-mode: nil
# End:

6
wcmtools/gearman/t/00-use.t Executable file
View File

@@ -0,0 +1,6 @@
use strict;
use Test::More tests => 3;
use_ok('Gearman::Util');
use_ok('Gearman::Worker');
use_ok('Gearman::Client');

198
wcmtools/gearman/t/10-all.t Executable file
View File

@@ -0,0 +1,198 @@
use strict;
our $Bin;
use FindBin qw( $Bin );
use File::Spec;
use Gearman::Client;
use Storable qw( freeze );
use Test::More tests => 20;
use IO::Socket::INET;
use POSIX qw( :sys_wait_h );
use constant PORT => 9000;
our %Children;
END { kill_children() }
start_server(PORT);
start_server(PORT + 1);
## Sleep, wait for servers to start up before connecting workers.
wait_for_port(PORT);
wait_for_port(PORT + 1);
## Look for 2 job servers, starting at port number PORT.
start_worker(PORT, 2);
start_worker(PORT, 2);
my $client = Gearman::Client->new;
isa_ok($client, 'Gearman::Client');
$client->job_servers('127.0.0.1:' . PORT, '127.0.0.1:' . (PORT + 1));
eval { $client->do_task(sum => []) };
like($@, qr/scalar or scalarref/, 'do_task does not accept arrayref argument');
my $out = $client->do_task(sum => freeze([ 3, 5 ]));
is($$out, 8, 'do_task returned 8 for sum');
my $tasks = $client->new_task_set;
isa_ok($tasks, 'Gearman::Taskset');
my $sum;
my $failed = 0;
my $completed = 0;
my $handle = $tasks->add_task(sum => freeze([ 3, 5 ]), {
on_complete => sub { $sum = ${ $_[0] } },
on_fail => sub { $failed = 1 }
});
$tasks->wait;
is($sum, 8, 'add_task/wait returned 8 for sum');
is($failed, 0, 'on_fail not called on a successful result');
## Now try a task set with 2 tasks, and make sure they are both completed.
$tasks = $client->new_task_set;
my @sums;
$tasks->add_task(sum => freeze([ 1, 1 ]), {
on_complete => sub { $sums[0] = ${ $_[0] } },
});
$tasks->add_task(sum => freeze([ 2, 2 ]), {
on_complete => sub { $sums[1] = ${ $_[0] } },
});
$tasks->wait;
is($sums[0], 2, 'First task completed (sum is 2)');
is($sums[1], 4, 'Second task completed (sum is 4)');
## Test some failure conditions:
## Normal failure (worker returns undef or dies within eval).
is($client->do_task('fail'), undef, 'Job that failed naturally returned undef');
## Worker process exits.
is($client->do_task('fail_exit'), undef,
'Job that failed via exit returned undef');
pid_is_dead(wait());
## Worker process times out (takes longer than fail_after_idle seconds).
TODO: {
todo_skip 'fail_after_idle is not yet implemented', 1;
is($client->do_task('sleep', 5, { fail_after_idle => 3 }), undef,
'Job that timed out after 3 seconds returns failure (fail_after_idle)');
}
## Test retry_count.
my $retried = 0;
is($client->do_task('fail' => '', {
on_retry => sub { $retried++ },
retry_count => 3,
}), undef, 'Failure response is still failure, even after retrying');
is($retried, 3, 'Retried 3 times');
my $tasks = $client->new_task_set;
$completed = 0;
$failed = 0;
$tasks->add_task(fail => '', {
on_complete => sub { $completed = 1 },
on_fail => sub { $failed = 1 },
});
$tasks->wait;
is($completed, 0, 'on_complete not called on failed result');
is($failed, 1, 'on_fail called on failed result');
## Test high_priority.
## Create a taskset with 4 tasks, and have the 3rd fail.
## In on_fail, add a new task with high priority set, and make sure it
## gets executed before task 4. To make this reliable, we need to first
## kill off all but one of the worker processes.
my @worker_pids = grep $Children{$_} eq 'W', keys %Children;
kill INT => @worker_pids[1..$#worker_pids];
$tasks = $client->new_task_set;
$out = '';
$tasks->add_task(echo_ws => 1, { on_complete => sub { $out .= ${ $_[0] } } });
$tasks->add_task(echo_ws => 2, { on_complete => sub { $out .= ${ $_[0] } } });
$tasks->add_task(echo_ws => 'x', {
on_fail => sub {
$tasks->add_task(echo_ws => 'p', {
on_complete => sub { $out .= ${ $_[0] } },
high_priority => 1
});
},
});
$tasks->add_task(echo_ws => 3, { on_complete => sub { $out .= ${ $_[0] } } });
$tasks->add_task(echo_ws => 4, { on_complete => sub { $out .= ${ $_[0] } } });
$tasks->add_task(echo_ws => 5, { on_complete => sub { $out .= ${ $_[0] } } });
$tasks->add_task(echo_ws => 6, { on_complete => sub { $out .= ${ $_[0] } } });
$tasks->wait;
like($out, qr/p.+6/, 'High priority tasks executed in priority order.');
## We just killed off all but one worker--make sure they get respawned.
respawn_children();
## Test dispatch_background and get_status.
my $out;
my $handle = $client->dispatch_background(long => undef, {
on_complete => sub { $out = ${ $_[0] } },
});
ok($handle, 'Got a handle back from dispatching background job');
my $status = $client->get_status($handle);
isa_ok($status, 'Gearman::JobStatus');
ok($status->running, 'Job is still running');
is($status->percent, .5, 'Job is 50 percent complete');
do {
sleep 1;
$status = $client->get_status($handle);
} until $status->percent == 1;
sub pid_is_dead {
my($pid) = @_;
return if $pid == -1;
my $type = delete $Children{$pid};
if ($type eq 'W') {
## Right now we can only restart workers.
start_worker(PORT, 2);
}
}
sub respawn_children {
for my $pid (keys %Children) {
if (waitpid($pid, WNOHANG) > 0) {
pid_is_dead($pid);
}
}
}
sub start_server {
my($port) = @_;
my $server = File::Spec->catfile($Bin, '..', 'server', 'gearmand');
my $pid = start_child([ $server, '-p', $port ]);
$Children{$pid} = 'S';
}
sub start_worker {
my($port, $num) = @_;
my $worker = File::Spec->catfile($Bin, 'worker.pl');
my $servers = join ',',
map '127.0.0.1:' . (PORT + $_),
0..$num-1;
my $pid = start_child([ $worker, '-s', $servers ]);
$Children{$pid} = 'W';
}
sub start_child {
my($cmd) = @_;
my $pid = fork();
die $! unless defined $pid;
unless ($pid) {
exec 'perl', '-Iblib/lib', '-Ilib', @$cmd or die $!;
}
$pid;
}
sub kill_children {
kill INT => keys %Children;
}
sub wait_for_port {
my($port) = @_;
my $start = time;
while (1) {
my $sock = IO::Socket::INET->new(PeerAddr => "127.0.0.1:$port");
return 1 if $sock;
select undef, undef, undef, 0.25;
die "Timeout waiting for port $port to startup" if time > $start + 5;
}
}

41
wcmtools/gearman/t/worker.pl Executable file
View File

@@ -0,0 +1,41 @@
#!/usr/bin/perl -w
use strict;
use Gearman::Worker;
use Storable qw( thaw );
use Getopt::Long qw( GetOptions );
GetOptions(
's|servers=s', \my($servers),
);
die "usage: $0 -s <servers>" unless $servers;
my @servers = split /,/, $servers;
my $worker = Gearman::Worker->new;
$worker->job_servers(@servers);
$worker->register_function(sum => sub {
my $sum = 0;
$sum += $_ for @{ thaw($_[0]->arg) };
$sum;
});
$worker->register_function(fail => sub { undef });
$worker->register_function(fail_exit => sub { exit 255 });
$worker->register_function(sleep => sub { sleep $_[0]->arg });
$worker->register_function(echo_ws => sub {
select undef, undef, undef, 0.25;
$_[0]->arg eq 'x' ? undef : $_[0]->arg;
});
$worker->register_function(long => sub {
my($job) = @_;
$job->set_status(50, 100);
sleep 2;
$job->set_status(100, 100);
sleep 2;
});
$worker->work while 1;