' onmouseover="javascript:alert(document/**/.cookie)" >
# is returned by HTML::Parser as P_tag("='" => "='") Text( onmouseover...)
# which leads to reconstruction of valid HTML. Clever!
# detect this, and fail.
$total_fail->("$tag $attr");
last TOKEN;
}
# ignore attributes that do not fit this strict scheme
unless ($attr =~ /^[\w_:-]+$/) {
$total_fail->("$tag " . (%$hash > 1 ? "[...] " : "") . "$attr");
last TOKEN;
}
$hash->{$attr} =~ s/[\t\n]//g;
# IE ignores the null character, so strip it out
$hash->{$attr} =~ s/\x0//g;
# IE sucks:
my $nowhite = $hash->{$attr};
$nowhite =~ s/[\s\x0b]+//g;
if ($nowhite =~ /(?:jscript|livescript|javascript|vbscript|about):/ix) {
delete $hash->{$attr};
next;
}
if ($attr eq 'style') {
if ($opts->{'cleancss'}) {
# css2 spec, section 4.1.3
# position === p\osition :(
# strip all slashes no matter what.
$hash->{style} =~ s/\\//g;
# and catch the obvious ones ("[" is for things like document["coo"+"kie"]
foreach my $css ("/*", "[", qw(margin absolute fixed expression eval behavior cookie document window javascript -moz-binding)) {
if ($hash->{style} =~ /\Q$css\E/i) {
delete $hash->{style};
next ATTR;
}
}
# remove specific CSS definitions
if ($remove_colors) {
$hash->{style} =~ s/(?:background-)?color:.*?(?:;|$)//gi;
}
if ($remove_sizes) {
$hash->{style} =~ s/font-size:.*?(?:;|$)//gi;
}
if ($remove_fonts) {
$hash->{style} =~ s/font-family:.*?(?:;|$)//gi;
}
# Added to prevent the new div exploit (August 2008) - M. V.
$hash->{style} =~s/(content|background-image|background|position|top|left|width|height):.*?(?:;|$)//gi;
#modified March 2014 to remove backgrounds and content
# and in July 2014 to remove background-image
}
if ($opts->{'clean_js_css'} && ! $LJ::DISABLED{'css_cleaner'}) {
# and then run it through a harder CSS cleaner that does a full parse
my $css = LJ::CSS::Cleaner->new;
$hash->{style} = $css->clean_property($hash->{style});
}
}
# reserve ljs_* ids for divs, etc so users can't override them to replace content
if ($attr eq 'id' && $hash->{$attr} =~ /^ljs_/i) {
delete $hash->{$attr};
next;
}
if ($s1var) {
if ($attr =~ /%%/) {
delete $hash->{$attr};
next ATTR;
}
my $props = $LJ::S1::PROPS->{$s1var};
if ($hash->{$attr} =~ /^%%([\w:]+:)?(\S+?)%%$/ && $props->{$2} =~ /[aud]/) {
# don't change it.
} elsif ($hash->{$attr} =~ /^%%cons:\w+%%[^\%]*$/) {
# a site constant with something appended is also fine.
} elsif ($hash->{$attr} =~ /%%/) {
my $clean_var = sub {
my ($mods, $prop) = @_;
# HTML escape and kill line breaks
$mods = "attr:$mods" unless
$mods =~ /^(color|cons|siteroot|sitename|img):/ ||
$props->{$prop} =~ /[ud]/;
return '%%' . $mods . $prop . '%%';
};
$hash->{$attr} =~ s/[\n\r]//g;
$hash->{$attr} =~ s/%%([\w:]+:)?(\S+?)%%/$clean_var->(lc($1), $2)/eg;
if ($attr =~ /^(href|src|lowsrc|style)$/) {
$hash->{$attr} = "\%\%[attr[$hash->{$attr}]]\%\%";
}
}
}
# remove specific attributes
if (($remove_colors && ($attr eq "color" || $attr eq "bgcolor" || $attr eq "fgcolor" || $attr eq "text")) ||
($remove_sizes && $attr eq "size") ||
($remove_fonts && $attr eq "face")) {
delete $hash->{$attr};
next ATTR;
}
}
if (exists $hash->{href}) {
## links to some resources will be completely blocked
## and replaced by value of 'blocked_link_substitute' param
if ($blocked_links) {
foreach my $re (@$blocked_links) {
if ($hash->{href} =~ $re) {
$hash->{href} = sprintf($blocked_link_substitute, LJ::eurl($hash->{href}));
last;
}
}
}
unless ($hash->{href} =~ s/^lj:(?:\/\/)?(.*)$/ExpandLJURL($1)/ei) {
$hash->{href} = canonical_url($hash->{href}, 1);
}
}
if ($tag eq "img")
{
$imagecount++;
my $img_bad = 0;
if ((defined $opts->{'maximgwidth'} &&
(! defined $hash->{'width'} ||
$hash->{'width'} > $opts->{'maximgwidth'}))
# I replaced 1 to 33 temporarily
# as an anti-macaque measure. Really stupid, in fact. - MV, Sept 2014
|| (defined $hash->{'width'} && $hash->{'width'} <= 33))
# to avoid bombing with billion 1px images
{ $img_bad = 1; }
if ((defined $opts->{'maximgheight'} &&
(! defined $hash->{'height'} ||
$hash->{'height'} > $opts->{'maximgheight'}))
|| (defined $hash->{'height'} && $hash->{'height'} <= 33))
{ $img_bad = 1; }
if ($opts->{'extractimages'}) { $img_bad = 1; }
# anti-makaka: prohibit putting more than $MAXIMAGES images to comments
my $MAXIMAGES = 5; # maximal number of images in comments
# we should put this to ljconfig.pl!
if (($imagecount > $MAXIMAGES) && $opts->{'maximages'})
{ $img_bad = 1; }
# remove img src="data:image/..." images
$hash->{src} = canonical_url($hash->{src}, 1);
if ("$hash->{src}" =~ "^data:") {
$img_bad=1;
$hash->{src} = "data:image is not allowed";
}
# Anon and OpenID commenters are not allowed to post images
if ($img_bad) {
$newdata .= "{'src'}) . "\">" .
LJ::img('placeholder') . '';
$alt_output = 1;
$opencount{"img"}++;
}
}
if ($tag eq "a" && $extractlinks)
{
push @canonical_urls, canonical_url($token->[2]->{href}, 1);
$newdata .= "";
next;
}
# Through the xsl namespace in XML, it is possible to embed scripting lanaguages
# as elements which will then be executed by the browser. Combining this with
# customview.cgi makes it very easy for someone to replace their entire journal
# in S1 with a page that embeds scripting as well. An example being an AJAX
# six degrees tool, while cool it should not be allowed.
#
# Example syntax:
#
and