ljr/ljcom/bin/maint/interests.pl

85 lines
2.7 KiB
Perl

#!/usr/bin/perl
#
$maint{'clean_intdups'} = sub
{
my $dbh = LJ::get_dbh("master");
my ($sth);
my @dups;
print "-I- Cleaning duplicates.\n";
foreach my $let ('a'..'z', '0'..'9')
{
print "-I- Letter $let\n";
$sth = $dbh->prepare("SELECT interest, COUNT(*) AS 'count' FROM interests WHERE interest LIKE '$let%' GROUP BY 1 HAVING count > 1");
$sth->execute;
while (($interest, $count) = $sth->fetchrow_array)
{
print " $interest has $count\n";
push @dups, $interest;
}
}
foreach my $dup (@dups) {
print "Fixing: $dup\n";
my $min = 0;
my @fix = ();
my $qdup = $dbh->quote($dup);
$sth = $dbh->prepare("SELECT intid FROM interests WHERE interest=$qdup ORDER BY intid");
$sth->execute;
while (my ($id) = $sth->fetchrow_array) {
if ($min) { push @fix, $id; }
else { $min = $id; }
}
if (@fix) {
my $in = join(",", @fix);
# change duplicate interests to the minimum, ignoring duplicates.
$sth = $dbh->prepare("UPDATE IGNORE userinterest SET intid=$min WHERE intid IN ($in)");
$sth->execute;
# delete ones that had duplicate key conflicts and didn't change
$sth = $dbh->prepare("DELETE FROM userinterest WHERE intid IN ($in)");
$sth->execute;
# update the intcount column
$sth = $dbh->prepare("REPLACE INTO interests (intid, interest, intcount) SELECT intid, $qdup, COUNT(*) FROM userinterests WHERE intid=$min GROUP BY 1, 2");
$sth->execute;
# delete from interests table
$sth = $dbh->prepare("DELETE FROM interests WHERE intid IN ($in)");
$sth->execute;
}
print " @fix --> $min\n";
}
};
$maint{'clean_intcounts'} = sub
{
my $dbh = LJ::get_dbh("master");
my ($sth);
$sth = $dbh->prepare("SELECT MAX(intid) FROM userinterests");
$sth->execute;
my ($max) = $sth->fetchrow_array;
print "Fixing intcounts, up to intid=$max\n";
for (my $i=1; $i < $max; $i += 5000)
{
my $low = $i;
my $high = $i+4999;
print "$low..$high:\n";
$sth = $dbh->prepare("SELECT ui.intid, i.intcount, COUNT(*) AS 'count' FROM userinterests ui, interests i WHERE i.intid=ui.intid AND ui.intid BETWEEN $low AND $high GROUP BY 1, 2 HAVING i.intcount<>COUNT(*)");
$sth->execute;
while (my ($intid, $wrong, $count) = $sth->fetchrow_array) {
print " $intid: $count, not $wrong\n";
$dbh->do("UPDATE interests SET intcount=$count WHERE intid=$intid");
}
}
};
1;