#!/usr/bin/perl # use strict; use DBI; use Getopt::Long; my $help = 0; my $opt_fh = 0; my $opt_fix = 0; my $opt_start = 0; my $opt_stop = 0; my $opt_err = 0; my $opt_all = 0; my $opt_tablestatus; my $opt_checkreport = 0; my $opt_rates; my @opt_run; exit 1 unless GetOptions('help' => \$help, 'flushhosts' => \$opt_fh, 'start' => \$opt_start, 'stop' => \$opt_stop, 'checkreport' => \$opt_checkreport, 'rates' => \$opt_rates, 'fix' => \$opt_fix, 'run=s' => \@opt_run, 'onlyerrors' => \$opt_err, 'all' => \$opt_all, 'tablestatus' => \$opt_tablestatus, ); unless (-d $ENV{'LJHOME'}) { die "\$LJHOME not set.\n"; } if ($help) { die ("Usage: dbcheck.pl [opts] [[cmd] args...]\n" . " --all Check all hosts, even those with no weight assigned.\n" . " --help Get this help\n" . " --flushhosts Send 'FLUSH HOSTS' to each db as root.\n". " --fix Fix (once) common problems.\n". " --checkreport Show tables that haven't been checked in a while.\n". " --stop Stop replication.\n". " --start Start replication.\n". " --run Run arbitrary SQL.\n". " --onlyerrors Will be silent unless there are errors.\n". " --tablestatus Show warnings about full/sparse tables.\n". "\n". "Commands\n". " (none) Shows replication status.\n". " queries Shows active queries on host, sorted by running time.\n" ); } require "$ENV{'LJHOME'}/cgi-bin/ljdb.pl"; my $dbh = LJ::DB::dbh_by_role("master"); die "Can't get master db handle\n" unless $dbh; my %dbinfo; # dbid -> hashref my %name2id; # name -> dbid my $sth; my $masterid = 0; my %subclust; # id -> name of parent (pork-85 -> "pork") $sth = $dbh->prepare("SELECT dbid, name, masterid, rootfdsn FROM dbinfo"); $sth->execute; while ($_ = $sth->fetchrow_hashref) { if ($_->{name} =~ /(.+)\-\d\d$/) { $subclust{$_->{dbid}} = $1; next; } next unless $_->{'dbid'}; $dbinfo{$_->{'dbid'}} = $_; $name2id{$_->{'name'}} = $_->{'dbid'}; } my %role; # rolename -> dbid -> [ norm, curr ] my %rolebyid; # dbid -> rolename -> [ norm, curr ] $sth = $dbh->prepare("SELECT dbid, role, norm, curr FROM dbweights"); $sth->execute; while ($_ = $sth->fetchrow_hashref) { my $id = $_->{dbid}; if ($subclust{$id}) { $id = $name2id{$subclust{$id}}; } next unless defined $dbinfo{$id}; $dbinfo{$id}->{'totalweight'} += $_->{'curr'}; $role{$_->{role}}->{$id} = [ $_->{norm}, $_->{curr} ]; $rolebyid{$id}->{$_->{role}} = [ $_->{norm}, $_->{curr} ]; } check_report() if $opt_checkreport; rate_report() if $opt_rates; my @errors; my %master_status; # dbid -> [ $file, $pos ] my $check_master_status = sub { my $dbid = shift; my $d = $dbinfo{$dbid}; die "Bogus DB: $dbid" unless $d; my $db = LJ::DB::root_dbh_by_name($d->{name}); next unless $db; my ($masterfile, $masterpos) = $db->selectrow_array("SHOW MASTER STATUS"); $master_status{$dbid} = [ $masterfile, $masterpos ]; }; my $check = sub { my $dbid = shift; my $d = $dbinfo{$dbid}; die "Bogus DB: $dbid" unless $d; # calculate roles to show my $roles; { my %drole; # display role -> 1 foreach my $role (grep { $role{$_}{$dbid}[1] } keys %{$rolebyid{$dbid}}) { my $drole = $role; $drole =~ s/cluster(\d+)\d/cluster${1}0/; $drole{$drole} = 1; } $roles = join(", ", sort keys %drole); } my $db = LJ::DB::root_dbh_by_name($d->{name}); unless ($db) { printf("%4d %-15s %4s %16s %14s ($roles)\n", $dbid, $d->{name}, $d->{masterid} ? $d->{masterid} : "", ) unless $opt_err; push @errors, "Can't connect to $d->{'name'}"; return 0; } my $tzone; (undef, $tzone) = $db->selectrow_array("show variables like 'timezone'"); $sth = $db->prepare("SHOW PROCESSLIST"); $sth->execute; my $pcount_total = 0; my $pcount_busy = 0; while (my $r = $sth->fetchrow_hashref) { next if $r->{'State'} =~ /waiting for/i; next if $r->{'State'} eq "Reading master update"; next if $r->{'State'} =~ /^(Has (sent|read) all)|(Sending binlog)/; $pcount_total++; $pcount_busy++ if $r->{'State'}; } my @master_logs; my $log_count = 0; if ($master_status{$dbid} && $master_status{$dbid}->[1]) { $sth = $db->prepare("SHOW MASTER LOGS"); $sth->execute; while (my ($log) = $sth->fetchrow_array) { push @master_logs, $log; $log_count++; } } my $ss = $db->selectrow_hashref("show slave status"); if ($ss) { foreach my $k (sort keys %$ss) { $ss->{lc $k} = $ss->{$k}; } } my $diff; if ($ss) { if ($ss->{'slave_io_running'} eq "Yes" && $ss->{'slave_sql_running'} eq "Yes") { if ($ss->{'master_log_file'} eq $ss->{'relay_master_log_file'}) { $diff = $ss->{'read_master_log_pos'} - $ss->{'exec_master_log_pos'}; } else { $diff = "XXXXXXX"; push @errors, "Wrong log file: $d->{name}"; } } else { $diff = "XXXXXXX"; $ss->{last_error} =~ s/[^\n\r\t\x20-\x7e]//g; push @errors, "Slave not running: $d->{name}: $ss->{last_error}"; } my $ms = $master_status{$d->{masterid}} || []; #print " master: [@$ms], slave at: [$ss->{master_log_file}, $ss->{read_master_log_pos}]\n"; if ($ss->{master_log_file} ne $ms->[0] || $ss->{read_master_log_pos} < $ms->[1] - 20_000) { push @errors, "$d->{name}: Relay log behind: master=[@$ms], $d->{name}=[$ss->{master_log_file}, $ss->{read_master_log_pos}]"; } } else { $diff = "-"; # not applicable } #print "$dbid of $d->{masterid}: $d->{name} ($roles)\n"; printf("%4d %-15s %4s repl:%7s %4s conn:%4d/%4d $tzone ($roles)\n", $dbid, $d->{name}, $d->{masterid} ? $d->{masterid} : "", $diff, $log_count ? sprintf("<%2s>", $log_count) : "", $pcount_busy, $pcount_total) unless $opt_err; }; $check_master_status->($_) foreach (sorted_dbids()); $check->($_) foreach (sorted_dbids()); if (@errors) { if ($opt_err) { my %ignore; open(EX, "$ENV{'HOME'}/.dbcheck.ignore"); while () { s/\s+$//; $ignore{$_} = 1; } close EX; @errors = grep { ! $ignore{$_} } @errors; } print STDERR "\nERRORS:\n" if @errors; foreach (@errors) { print STDERR " * $_\n"; } } my $sorted_cache; sub sorted_dbids { return @$sorted_cache if $sorted_cache; $sorted_cache = [ _sorted_dbids() ]; return @$sorted_cache; } sub _sorted_dbids { my @ids; my %added; # dbid -> 1 my $add = sub { my $dbid = shift; $added{$dbid} = 1; push @ids, $dbid; }; my $masterid = (keys %{$role{'master'}})[0]; $add->($masterid); # then slaves foreach my $id (sort { $dbinfo{$a}->{name} cmp $dbinfo{$b}->{name} } grep { ! $added{$_} && $rolebyid{$_}->{slave} } keys %dbinfo) { $add->($id); } # now, figure out which remaining are associated with cluster roles (user clusters) my %minclust; # dbid -> minimum cluster number associated my %is_master; # dbid -> bool (is cluster master) foreach my $dbid (grep { ! $added{$_} } keys %dbinfo) { foreach my $role (keys %{ $rolebyid{$dbid} || {} }) { next unless $role =~ /^cluster(\d+)(.*)/; $minclust{$dbid} = $1 if ! $minclust{$dbid} || $1 < $minclust{$dbid}; $is_master{$dbid} ||= $2 eq "" || $2 eq "a" || $2 eq "b"; } } # then misc foreach my $id (sort { $dbinfo{$a}->{name} cmp $dbinfo{$b}->{name} } grep { ! $added{$_} && ! $minclust{$_} } keys %dbinfo) { $add->($id); } # then clusters, in order foreach my $id (sort { $minclust{$a} <=> $minclust{$b} || $is_master{$b} <=> $is_master{$a} } grep { ! $added{$_} && $minclust{$_} } keys %dbinfo) { $add->($id); } return @ids; } sub check_report { foreach my $dbid (sort { $dbinfo{$a}->{name} cmp $dbinfo{$b}->{name} } keys %dbinfo) { my $d = $dbinfo{$dbid}; die "Bogus DB: $dbid" unless $d; my $db = LJ::DB::root_dbh_by_name($d->{name}); unless ($db) { print "$d->{name}\t?\t?\t?\n"; next; } my $dbs = $db->selectcol_arrayref("SHOW DATABASES"); foreach my $dbname (@$dbs) { $db->do("USE $dbname"); my $ts = $db->selectall_hashref("SHOW TABLE STATUS", "Name"); foreach my $tn (sort keys %$ts) { my $v = $ts->{$tn}; my $ut = $v->{Check_time} || "0000-00-00 00:00:00"; $ut =~ s/ /,/; print "$d->{name}\t$dbname\t$tn\t$ut\t$v->{Type}-$v->{Row_format}\t$v->{Rows}\n"; } } } exit 0; } use Time::HiRes (); sub rate_report { my %prev; # dbid -> [ time, questions ] while (1) { print "\n"; my $sum = 0; foreach my $dbid (sorted_dbids()) { my $d = $dbinfo{$dbid}; die "Bogus DB: $dbid" unless $d; my $db = LJ::DB::root_dbh_by_name($d->{name}); next unless $db; my (undef, $qs) = $db->selectrow_array("SHOW STATUS LIKE 'Questions'"); my $now = Time::HiRes::time(); my $cur = [ $now, $qs ]; if (my $old = $prev{$dbid}) { my $dt = $now - $old->[0]; my $qnew = $qs - $old->[1]; my $rate = ($qnew / $dt); $sum += $rate; printf "%20s: %7.01f q/s\n", $d->{name}, $rate; } $prev{$dbid} ||= $cur; } printf "%20s: %7.01f q/s\n", "SUM", $sum; sleep 1; } }