ljr/livejournal/bin/logsummarize.pl

111 lines
2.8 KiB
Perl
Raw Normal View History

2019-02-05 21:49:12 +00:00
#!/usr/bin/perl
#
use strict;
unless (-d $ENV{'LJHOME'}) {
die "\$LJHOME not set.\n";
}
require "$ENV{'LJHOME'}/cgi-bin/ljlib.pl";
my $db = LJ::get_dbh("logs");
unless ($db) {
die "No 'logs' db handle found.\n";
}
$db->{'InactiveDestroy'} = 1;
my $sth;
my %table;
$sth = $db->prepare("SHOW TABLES LIKE 'access%'");
$sth->execute;
while (my $t = $sth->fetchrow_array) {
$table{$t} = 1;
}
my @need_summary = sort grep { /^access\d{8,8}$/ } keys %table;
pop @need_summary; # don't summarize the current day yet.
my $nsum_total = @need_summary;
my $nsum_ct = 0;
use constant F_SERVER => 0;
use constant F_LANG => 1;
use constant F_METHOD => 2;
use constant F_VHOST => 3;
use constant F_URI => 4;
use constant F_STATUS => 5;
use constant F_CTYPE => 6;
use constant F_BYTES => 7;
use constant F_BROWSER => 8;
use constant F_REF => 9;
foreach my $table (@need_summary)
{
$nsum_ct++;
print "Summarizing $table ($nsum_ct/$nsum_total)\n";
my $row_total = $db->selectrow_array("SELECT COUNT(*) FROM $table");
print " rows = $row_total\n";
my $sth = $db->prepare("SELECT server, langpref, method, vhost, uri, ".
" status, ctype, bytes, browser, ref ".
"FROM $table");
$sth->{'mysql_use_result'} = 1;
$sth->execute;
my ($r, $row_ct);
my %st;
while ($r = $sth->fetchrow_arrayref) {
$row_ct++;
if ($row_ct % 10000 == 0) {
printf " $row_ct/$row_total (%%%.02f)\n", 100*$row_ct/$row_total;
}
next if ($r->[F_URI] =~ m!^/userpic!);
$st{'count'}->{'total'}++;
$st{'count'}->{'bytes'} += $r->[F_BYTES];
$st{'http_meth'}->{$r->[F_METHOD]}++;
$st{'http_status'}->{$r->[F_STATUS]}++;
$st{'browser'}->{$r->[F_BROWSER]}++;
$st{'host'}->{$r->[F_VHOST]}++;
if ($r->[F_URI] =~ s!^/(users/|~)\w+/?!/users/*/!) {
$r->[F_URI] =~ s!day/\d\d\d\d/\d\d/\d\d!day!;
$r->[F_URI] =~ s!calendar/\d\d\d\d!calendar!;
}
if ($r->[F_VHOST] =~ /^(www\.)livejournal\.com$/) {
$st{'uri'}->{$r->[F_URI]}++;
} else {
$r->[F_URI] =~ s!day/\d\d\d\d/\d\d/\d\d!day!;
$r->[F_URI] =~ s!calendar/\d\d\d\d!calendar!;
$st{'uri'}->{"user:" . $r->[F_URI]}++;
}
my $ref = $r->[F_REF];
if ($ref =~ m!^http://([^/]+)!) {
$ref = $1;
$st{'referer'}->{$ref}++ unless ($ref =~ /livejournal\.com$/);
}
}
my $tabledate = $table;
$tabledate =~ s/^access//;
print " Writing stats file.\n";
open (S, "| gzip -c > $ENV{'LJHOME'}/var/stats-$tabledate.gz") or die "Can't open stats file";
foreach my $cat (sort keys %st) {
print "Writing cat: $cat\n";
foreach my $k (sort { $st{$cat}->{$b} <=> $st{$cat}->{$a} } keys %{$st{$cat}}) {
print S "$cat\t$k\t$st{$cat}->{$k}\n"
or die "Failed writing to stats-$tabledate.gz. Device full?\n";
}
}
close S;
$db->do("DROP TABLE $table");
}