54 lines
1.1 KiB
Perl
54 lines
1.1 KiB
Perl
|
use strict;
|
||
|
|
||
|
package LJR::unicode;
|
||
|
|
||
|
use XML::Parser;
|
||
|
use Unicode::MapUTF8 qw(to_utf8 from_utf8 utf8_supported_charset);
|
||
|
|
||
|
|
||
|
sub utf8ize {
|
||
|
my $text_in = shift;
|
||
|
$$text_in = pack("C*", unpack("C*", $$text_in)) if $$text_in;
|
||
|
}
|
||
|
|
||
|
|
||
|
sub force_utf8 {
|
||
|
my $xdata = shift;
|
||
|
my %error_lines;
|
||
|
my $finished = 0;
|
||
|
my @xlines;
|
||
|
my $orig_xdata = $$xdata;
|
||
|
|
||
|
my $p1 = new XML::Parser ();
|
||
|
|
||
|
while (!$finished) {
|
||
|
eval { $p1->parse($$xdata); };
|
||
|
|
||
|
if ($@ && $@ =~ /not\ well\-formed\ \(invalid\ token\)\ at\ line\ (\d+)\,/) {
|
||
|
my $error_line = $1;
|
||
|
$error_lines{$error_line} ++;
|
||
|
|
||
|
if ($error_lines{$error_line} > 1) {
|
||
|
$$xdata = $orig_xdata;
|
||
|
$finished = 1;
|
||
|
}
|
||
|
else {
|
||
|
@xlines = split(/\n/, $$xdata);
|
||
|
my $output = to_utf8({ -string => $xlines[$error_line - 1], -charset => 'latin1' });
|
||
|
$xlines[$error_line - 1] = $output;
|
||
|
$$xdata = join("\n", @xlines);
|
||
|
}
|
||
|
}
|
||
|
# unknown error or no error, doesn't matter
|
||
|
elsif ($@) {
|
||
|
$$xdata = $orig_xdata;
|
||
|
$finished = 1;
|
||
|
}
|
||
|
else {
|
||
|
$finished = 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 1;
|