340 lines
9.4 KiB
Perl
340 lines
9.4 KiB
Perl
|
#!/usr/bin/perl
|
||
|
#
|
||
|
|
||
|
use strict;
|
||
|
use DBI;
|
||
|
use Getopt::Long;
|
||
|
|
||
|
my $help = 0;
|
||
|
my $opt_fh = 0;
|
||
|
my $opt_fix = 0;
|
||
|
my $opt_start = 0;
|
||
|
my $opt_stop = 0;
|
||
|
my $opt_err = 0;
|
||
|
my $opt_all = 0;
|
||
|
my $opt_tablestatus;
|
||
|
my $opt_checkreport = 0;
|
||
|
my $opt_rates;
|
||
|
my @opt_run;
|
||
|
exit 1 unless GetOptions('help' => \$help,
|
||
|
'flushhosts' => \$opt_fh,
|
||
|
'start' => \$opt_start,
|
||
|
'stop' => \$opt_stop,
|
||
|
'checkreport' => \$opt_checkreport,
|
||
|
'rates' => \$opt_rates,
|
||
|
'fix' => \$opt_fix,
|
||
|
'run=s' => \@opt_run,
|
||
|
'onlyerrors' => \$opt_err,
|
||
|
'all' => \$opt_all,
|
||
|
'tablestatus' => \$opt_tablestatus,
|
||
|
);
|
||
|
|
||
|
unless (-d $ENV{'LJHOME'}) {
|
||
|
die "\$LJHOME not set.\n";
|
||
|
}
|
||
|
|
||
|
if ($help) {
|
||
|
die ("Usage: dbcheck.pl [opts] [[cmd] args...]\n" .
|
||
|
" --all Check all hosts, even those with no weight assigned.\n" .
|
||
|
" --help Get this help\n" .
|
||
|
" --flushhosts Send 'FLUSH HOSTS' to each db as root.\n".
|
||
|
" --fix Fix (once) common problems.\n".
|
||
|
" --checkreport Show tables that haven't been checked in a while.\n".
|
||
|
" --stop Stop replication.\n".
|
||
|
" --start Start replication.\n".
|
||
|
" --run <sql> Run arbitrary SQL.\n".
|
||
|
" --onlyerrors Will be silent unless there are errors.\n".
|
||
|
" --tablestatus Show warnings about full/sparse tables.\n".
|
||
|
"\n".
|
||
|
"Commands\n".
|
||
|
" (none) Shows replication status.\n".
|
||
|
" queries <host> Shows active queries on host, sorted by running time.\n"
|
||
|
);
|
||
|
}
|
||
|
|
||
|
require "$ENV{'LJHOME'}/cgi-bin/ljdb.pl";
|
||
|
|
||
|
my $dbh = LJ::DB::dbh_by_role("master");
|
||
|
die "Can't get master db handle\n" unless $dbh;
|
||
|
|
||
|
my %dbinfo; # dbid -> hashref
|
||
|
my %name2id; # name -> dbid
|
||
|
my $sth;
|
||
|
my $masterid = 0;
|
||
|
|
||
|
my %subclust; # id -> name of parent (pork-85 -> "pork")
|
||
|
|
||
|
$sth = $dbh->prepare("SELECT dbid, name, masterid, rootfdsn FROM dbinfo");
|
||
|
$sth->execute;
|
||
|
while ($_ = $sth->fetchrow_hashref) {
|
||
|
if ($_->{name} =~ /(.+)\-\d\d$/) {
|
||
|
$subclust{$_->{dbid}} = $1;
|
||
|
next;
|
||
|
}
|
||
|
next unless $_->{'dbid'};
|
||
|
$dbinfo{$_->{'dbid'}} = $_;
|
||
|
$name2id{$_->{'name'}} = $_->{'dbid'};
|
||
|
}
|
||
|
|
||
|
my %role; # rolename -> dbid -> [ norm, curr ]
|
||
|
my %rolebyid; # dbid -> rolename -> [ norm, curr ]
|
||
|
$sth = $dbh->prepare("SELECT dbid, role, norm, curr FROM dbweights");
|
||
|
$sth->execute;
|
||
|
while ($_ = $sth->fetchrow_hashref) {
|
||
|
my $id = $_->{dbid};
|
||
|
if ($subclust{$id}) {
|
||
|
$id = $name2id{$subclust{$id}};
|
||
|
}
|
||
|
next unless defined $dbinfo{$id};
|
||
|
$dbinfo{$id}->{'totalweight'} += $_->{'curr'};
|
||
|
$role{$_->{role}}->{$id} = [ $_->{norm}, $_->{curr} ];
|
||
|
$rolebyid{$id}->{$_->{role}} = [ $_->{norm}, $_->{curr} ];
|
||
|
}
|
||
|
|
||
|
check_report() if $opt_checkreport;
|
||
|
rate_report() if $opt_rates;
|
||
|
|
||
|
my @errors;
|
||
|
my %master_status; # dbid -> [ $file, $pos ]
|
||
|
|
||
|
my $check_master_status = sub {
|
||
|
my $dbid = shift;
|
||
|
my $d = $dbinfo{$dbid};
|
||
|
die "Bogus DB: $dbid" unless $d;
|
||
|
my $db = LJ::DB::root_dbh_by_name($d->{name});
|
||
|
next unless $db;
|
||
|
|
||
|
my ($masterfile, $masterpos) = $db->selectrow_array("SHOW MASTER STATUS");
|
||
|
$master_status{$dbid} = [ $masterfile, $masterpos ];
|
||
|
};
|
||
|
|
||
|
my $check = sub {
|
||
|
my $dbid = shift;
|
||
|
my $d = $dbinfo{$dbid};
|
||
|
die "Bogus DB: $dbid" unless $d;
|
||
|
|
||
|
# calculate roles to show
|
||
|
my $roles;
|
||
|
{
|
||
|
my %drole; # display role -> 1
|
||
|
foreach my $role (grep { $role{$_}{$dbid}[1] } keys %{$rolebyid{$dbid}}) {
|
||
|
my $drole = $role;
|
||
|
$drole =~ s/cluster(\d+)\d/cluster${1}0/;
|
||
|
$drole{$drole} = 1;
|
||
|
}
|
||
|
$roles = join(", ", sort keys %drole);
|
||
|
}
|
||
|
|
||
|
my $db = LJ::DB::root_dbh_by_name($d->{name});
|
||
|
unless ($db) {
|
||
|
printf("%4d %-15s %4s %16s %14s ($roles)\n",
|
||
|
$dbid,
|
||
|
$d->{name},
|
||
|
$d->{masterid} ? $d->{masterid} : "",
|
||
|
) unless $opt_err;
|
||
|
push @errors, "Can't connect to $d->{'name'}";
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
my $tzone;
|
||
|
(undef, $tzone) = $db->selectrow_array("show variables like 'timezone'");
|
||
|
|
||
|
$sth = $db->prepare("SHOW PROCESSLIST");
|
||
|
$sth->execute;
|
||
|
my $pcount_total = 0;
|
||
|
my $pcount_busy = 0;
|
||
|
while (my $r = $sth->fetchrow_hashref) {
|
||
|
next if $r->{'State'} =~ /waiting for/i;
|
||
|
next if $r->{'State'} eq "Reading master update";
|
||
|
next if $r->{'State'} =~ /^(Has (sent|read) all)|(Sending binlog)/;
|
||
|
$pcount_total++;
|
||
|
$pcount_busy++ if $r->{'State'};
|
||
|
}
|
||
|
|
||
|
my @master_logs;
|
||
|
my $log_count = 0;
|
||
|
if ($master_status{$dbid} && $master_status{$dbid}->[1]) {
|
||
|
$sth = $db->prepare("SHOW MASTER LOGS");
|
||
|
$sth->execute;
|
||
|
while (my ($log) = $sth->fetchrow_array) {
|
||
|
push @master_logs, $log;
|
||
|
$log_count++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
my $ss = $db->selectrow_hashref("show slave status");
|
||
|
if ($ss) {
|
||
|
foreach my $k (sort keys %$ss) {
|
||
|
$ss->{lc $k} = $ss->{$k};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
my $diff;
|
||
|
if ($ss) {
|
||
|
if ($ss->{'slave_io_running'} eq "Yes" && $ss->{'slave_sql_running'} eq "Yes") {
|
||
|
if ($ss->{'master_log_file'} eq $ss->{'relay_master_log_file'}) {
|
||
|
$diff = $ss->{'read_master_log_pos'} - $ss->{'exec_master_log_pos'};
|
||
|
} else {
|
||
|
$diff = "XXXXXXX";
|
||
|
push @errors, "Wrong log file: $d->{name}";
|
||
|
}
|
||
|
} else {
|
||
|
$diff = "XXXXXXX";
|
||
|
$ss->{last_error} =~ s/[^\n\r\t\x20-\x7e]//g;
|
||
|
push @errors, "Slave not running: $d->{name}: $ss->{last_error}";
|
||
|
}
|
||
|
|
||
|
my $ms = $master_status{$d->{masterid}} || [];
|
||
|
#print " master: [@$ms], slave at: [$ss->{master_log_file}, $ss->{read_master_log_pos}]\n";
|
||
|
if ($ss->{master_log_file} ne $ms->[0] || $ss->{read_master_log_pos} < $ms->[1] - 20_000) {
|
||
|
push @errors, "$d->{name}: Relay log behind: master=[@$ms], $d->{name}=[$ss->{master_log_file}, $ss->{read_master_log_pos}]";
|
||
|
}
|
||
|
|
||
|
} else {
|
||
|
$diff = "-"; # not applicable
|
||
|
}
|
||
|
|
||
|
#print "$dbid of $d->{masterid}: $d->{name} ($roles)\n";
|
||
|
printf("%4d %-15s %4s repl:%7s %4s conn:%4d/%4d $tzone ($roles)\n",
|
||
|
$dbid,
|
||
|
$d->{name},
|
||
|
$d->{masterid} ? $d->{masterid} : "",
|
||
|
$diff,
|
||
|
$log_count ? sprintf("<%2s>", $log_count) : "",
|
||
|
$pcount_busy, $pcount_total) unless $opt_err;
|
||
|
};
|
||
|
|
||
|
$check_master_status->($_) foreach (sorted_dbids());
|
||
|
|
||
|
$check->($_) foreach (sorted_dbids());
|
||
|
|
||
|
if (@errors) {
|
||
|
if ($opt_err) {
|
||
|
my %ignore;
|
||
|
open(EX, "$ENV{'HOME'}/.dbcheck.ignore");
|
||
|
while (<EX>) {
|
||
|
s/\s+$//;
|
||
|
$ignore{$_} = 1;
|
||
|
}
|
||
|
close EX;
|
||
|
@errors = grep { ! $ignore{$_} } @errors;
|
||
|
}
|
||
|
print STDERR "\nERRORS:\n" if @errors;
|
||
|
foreach (@errors) {
|
||
|
print STDERR " * $_\n";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
my $sorted_cache;
|
||
|
sub sorted_dbids {
|
||
|
return @$sorted_cache if $sorted_cache;
|
||
|
$sorted_cache = [ _sorted_dbids() ];
|
||
|
return @$sorted_cache;
|
||
|
}
|
||
|
|
||
|
sub _sorted_dbids {
|
||
|
my @ids;
|
||
|
my %added; # dbid -> 1
|
||
|
|
||
|
my $add = sub {
|
||
|
my $dbid = shift;
|
||
|
$added{$dbid} = 1;
|
||
|
push @ids, $dbid;
|
||
|
};
|
||
|
|
||
|
my $masterid = (keys %{$role{'master'}})[0];
|
||
|
$add->($masterid);
|
||
|
|
||
|
# then slaves
|
||
|
foreach my $id (sort { $dbinfo{$a}->{name} cmp $dbinfo{$b}->{name} }
|
||
|
grep { ! $added{$_} && $rolebyid{$_}->{slave} } keys %dbinfo) {
|
||
|
$add->($id);
|
||
|
}
|
||
|
|
||
|
# now, figure out which remaining are associated with cluster roles (user clusters)
|
||
|
my %minclust; # dbid -> minimum cluster number associated
|
||
|
my %is_master; # dbid -> bool (is cluster master)
|
||
|
foreach my $dbid (grep { ! $added{$_} } keys %dbinfo) {
|
||
|
foreach my $role (keys %{ $rolebyid{$dbid} || {} }) {
|
||
|
next unless $role =~ /^cluster(\d+)(.*)/;
|
||
|
$minclust{$dbid} = $1 if ! $minclust{$dbid} || $1 < $minclust{$dbid};
|
||
|
$is_master{$dbid} ||= $2 eq "" || $2 eq "a" || $2 eq "b";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# then misc
|
||
|
foreach my $id (sort { $dbinfo{$a}->{name} cmp $dbinfo{$b}->{name} }
|
||
|
grep { ! $added{$_} && ! $minclust{$_} } keys %dbinfo) {
|
||
|
$add->($id);
|
||
|
}
|
||
|
|
||
|
|
||
|
# then clusters, in order
|
||
|
foreach my $id (sort { $minclust{$a} <=> $minclust{$b} ||
|
||
|
$is_master{$b} <=> $is_master{$a} }
|
||
|
grep { ! $added{$_} && $minclust{$_} } keys %dbinfo) {
|
||
|
$add->($id);
|
||
|
}
|
||
|
return @ids;
|
||
|
}
|
||
|
|
||
|
sub check_report {
|
||
|
foreach my $dbid (sort { $dbinfo{$a}->{name} cmp $dbinfo{$b}->{name} }
|
||
|
keys %dbinfo) {
|
||
|
my $d = $dbinfo{$dbid};
|
||
|
die "Bogus DB: $dbid" unless $d;
|
||
|
my $db = LJ::DB::root_dbh_by_name($d->{name});
|
||
|
|
||
|
unless ($db) {
|
||
|
print "$d->{name}\t?\t?\t?\n";
|
||
|
next;
|
||
|
}
|
||
|
|
||
|
my $dbs = $db->selectcol_arrayref("SHOW DATABASES");
|
||
|
foreach my $dbname (@$dbs) {
|
||
|
$db->do("USE $dbname");
|
||
|
my $ts = $db->selectall_hashref("SHOW TABLE STATUS", "Name");
|
||
|
foreach my $tn (sort keys %$ts) {
|
||
|
my $v = $ts->{$tn};
|
||
|
my $ut = $v->{Check_time} || "0000-00-00 00:00:00";
|
||
|
$ut =~ s/ /,/;
|
||
|
print "$d->{name}\t$dbname\t$tn\t$ut\t$v->{Type}-$v->{Row_format}\t$v->{Rows}\n";
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
exit 0;
|
||
|
}
|
||
|
|
||
|
use Time::HiRes ();
|
||
|
|
||
|
sub rate_report {
|
||
|
my %prev; # dbid -> [ time, questions ]
|
||
|
|
||
|
while (1) {
|
||
|
print "\n";
|
||
|
my $sum = 0;
|
||
|
foreach my $dbid (sorted_dbids()) {
|
||
|
my $d = $dbinfo{$dbid};
|
||
|
die "Bogus DB: $dbid" unless $d;
|
||
|
my $db = LJ::DB::root_dbh_by_name($d->{name});
|
||
|
|
||
|
next unless $db;
|
||
|
my (undef, $qs) = $db->selectrow_array("SHOW STATUS LIKE 'Questions'");
|
||
|
my $now = Time::HiRes::time();
|
||
|
my $cur = [ $now, $qs ];
|
||
|
if (my $old = $prev{$dbid}) {
|
||
|
my $dt = $now - $old->[0];
|
||
|
my $qnew = $qs - $old->[1];
|
||
|
my $rate = ($qnew / $dt);
|
||
|
$sum += $rate;
|
||
|
printf "%20s: %7.01f q/s\n", $d->{name}, $rate;
|
||
|
}
|
||
|
$prev{$dbid} ||= $cur;
|
||
|
}
|
||
|
printf "%20s: %7.01f q/s\n", "SUM", $sum;
|
||
|
|
||
|
sleep 1;
|
||
|
}
|
||
|
}
|