#!/usr/bin/perl use strict; package LJR::Links; sub get_server_url { my ($canonical_url, $type) = @_; if ($canonical_url eq "http://www.livejournal.com") { if ($type eq "base") { return "livejournal.com"; } if ($type eq "userpic_base") { return "http://userpic.livejournal.com"; } } } sub make_ljr_hrefs { my ($server_patt, $server_full, $text) = @_; my $content = $$text; $$text = ""; my $url; my $orig_url; my $orig_url_text; return unless $content; # replace valid html hyperlinks (url_text) # with url_text # while ($content =~ /\G(.*?)(\(.*?)\<\/a\>)(.*)/sgi ) { $$text .= $1; $orig_url = $2; $orig_url_text = $6; $url = $4; $content = $7; # relative link (to the server from which we're importing) if ($url =~ /^(\/users\/.*?\/\d*?\.html(.*?$))/) { # (\?thread=\d*\#t\d*)|$ $$text .= "$orig_url_text"; } # relative link to oldstyle talkread.bml elsif ($url =~ /^\/talkread.bml?\?journal=(.*?)\&itemid=(\d+)/) { $$text .= "$orig_url_text"; } # absolute link to oldstyle talkread.bml elsif ($url =~ /^http:\/\/(www\.|)$server_patt\/talkread.bml\?journal=(.*?)\&itemid=(\d+)/) { $$text .= "$orig_url_text"; } # free users own two types of urls (first is canonical) # http://www.livejournal.com/users/free_user/123456.html # http://www.livejournal.com/~free_user/123456.html elsif ($url =~ /^http:\/\/(www\.|)$server_patt(((\/~(\w*?)\/)|(\/users\/.*?\/))(\d*?\.html(.*?$)))/) { # (\?thread=\d*\#t\d*)|$ if ($5) { $$text .= "$orig_url_text"; } else { $$text .= "$orig_url_text"; } } # payed users might own http://payeduser.livejournal.com/123456.html urls elsif ($url =~ /^http:\/\/(\w*?)\.$server_patt\/(\d*?\.html(.*?$))/) { # (\?thread=\d*\#t\d*)|$ $$text .= "$orig_url_text"; } else { $$text .= $orig_url; } } $$text .= $content; $content = $$text; $$text = ""; # replace strings like http://www.livejournal.com/users/lookslikeentry/123456.html with # http://www.livejournal.com/users/lookslikeentry/123456.html # # now these can be only absolute links starting with http:// while ($content =~ /\G(.*?(^|[\ \t\r\n\f]))(http:\/\/.*?)(($|[\ \t\r\n\f]).*)/sg ) { $$text .= $1; $orig_url = $3; $orig_url_text = $3; $url = $3; $content = $4; # free users (copied from above) if ($url =~ /^http:\/\/(www\.|)$server_patt(((\/~(\w*?)\/)|(\/users\/.*?\/))(\d*?\.html(.*?$)))/) { # (\?thread=\d*\#t\d*)|$ if ($5) { $$text .= "$orig_url_text"; } else { $$text .= "$orig_url_text"; } } # oldstyle talkread.bml elsif ($url =~ /^http:\/\/(www\.|)$server_patt\/talkread.bml\?journal=(.*?)\&itemid=(\d+)/) { $$text .= "$orig_url_text"; } # payed users (copied from above) elsif ($url =~ /^http:\/\/(\w*?)\.$server_patt\/(\d*?\.html(.*?$))/) { # (\?thread=\d*\#t\d*)|$ $$text .= "$orig_url_text"; } else { $$text .= $orig_url; } } $$text .= $content; } return 1;