ljr/local/cgi-bin/CSS/Cleaner.pm

153 lines
4.5 KiB
Perl

#
# Note: this is a very early version of a CSS cleaner. The plan is to eventually
# make it a white-listing CSS cleaner (deny by default) with a nice
# interface where you can build policy about what's allowed, like
# HTML::Sanitize/::Scrub/etc, but for now this is almost a null cleaner,
# just parsing and reserializing the CSS, removing two trivial ways to
# inject javascript.
#
# The plan now is to integrate this interface into LiveJournal, then improve
# this module over time.
#
# Note2: we tried 4 different CSS parsers for this module to use, and all 4 sucked.
# so for now this module sucks, until we can find a suitable parser. for the
# record, CSS::Tiny, CSS, and CSS::SAC all didn't work. and csstidy wasn't
# incredibly hot either. CSS.pm's grammar was buggy, and CSS::SAC had the
# best interface (SAC) but terrible parsing of selectors. we'll probably
# have to write our own, based on the Mozilla CSS parsing code.
package CSS::Cleaner;
use strict;
use vars qw($VERSION);
$VERSION = '0.01';
sub new {
my $class = shift;
my %opts = @_;
my $self = bless {}, $class;
if (defined( $opts{rule_handler} )) {
my $rule_handler = $opts{rule_handler};
die "rule_handler needs to be a coderef if supplied" unless ref( $rule_handler ) eq 'CODE';
$self->{rule_handler} = $rule_handler;
}
if (defined( $opts{pre_hook} )) {
my $pre_hook = $opts{pre_hook};
die "pre_hook needs to be a coderef if supplied" unless ref( $pre_hook ) eq 'CODE';
$self->{pre_hook} = $pre_hook;
}
return $self;
}
# cleans CSS
sub clean {
my ($self, $target) = @_;
$self->_stupid_clean(\$target);
return $target;
}
# cleans CSS properties, as if it were in a style="" attribute
sub clean_property {
my ($self, $target) = @_;
$self->_stupid_clean(\$target);
return $target;
}
# this is so stupid. see notes at top.
# returns 1 if it was okay, 0 if possibly malicious
sub _stupid_clean {
my ($self, $ref) = @_;
my $reduced = $$ref;
if (defined( $self->{pre_hook} )) {
$self->{pre_hook}->( \$reduced );
}
$reduced =~ s/&\#(\d+);?/chr($1)/eg;
$reduced =~ s/&\#x(\w+);?/chr(hex($1))/eg;
if ($reduced =~ /[\x00-\x08\x0B\x0C\x0E-\x1F]/) {
$$ref = "/* suspect CSS: low bytes */";
return;
}
if ($reduced =~ /[\x7f-\xff]/) {
$$ref = "/* suspect CSS: high bytes */";
return;
}
# returns 1 if something bad was found
my $check_for_bad = sub {
if ($reduced =~ m!<\w!) {
$$ref = "/* suspect CSS: start HTML tag? */";
return 1;
}
my $with_white = $reduced;
$reduced =~ s/[\s\x0b]+//g;
if ($reduced =~ m!\\[a-f0-9]!i) {
$$ref = "/* suspect CSS: backslash hex */";
return;
}
$reduced =~ s/\\//g;
if ($reduced =~ /\@(import|charset)([\s\x0A\x0D]*[^\x0A\x0D]*)/i) {
my $what = $1;
my $value = $2;
if (defined( $self->{rule_handler} )) {
return $self->{rule_handler}->( $ref, $what, $value );
} else {
$$ref = "/* suspect CSS: $what rule */";
return;
}
}
if ($reduced =~ /&\#/) {
$$ref = "/* suspect CSS: found irregular &# */";
return;
}
if ($reduced =~ m!</!) {
$$ref = "/* suspect CSS: close HTML tag */";
return;
}
# returns 1 if bad phrases found
my $check_phrases = sub {
my $str = shift;
if ($$str =~ m/(\bdata:\b|javascript|jscript|livescript|vbscript|expression|eval|cookie
|\bwindow\b|\bparent\b|\bthis\b|behaviou?r|moz-binding)/ix) {
my $what = lc $1;
$$ref = "/* suspect CSS: potential scripting: $what */";
return 1;
}
return 0;
};
return 1 if $check_phrases->(\$reduced);
# restore whitespace
$reduced = $with_white;
$reduced =~ s!/\*.*?\*/!!sg;
$reduced =~ s!\<\!--.*?--\>!!sg;
$reduced =~ s/[\s\x0b]+//g;
$reduced =~ s/\\//g;
return 1 if $check_phrases->(\$reduced);
return 0;
};
# check for bad stuff before/after removing comment lines
return 0 if $check_for_bad->();
$reduced =~ s!//.*!!g;
return 0 if $check_for_bad->();
return 1;
}
1;