2016-12-15 21:24:41 +00:00
|
|
|
<?php
|
|
|
|
/**
|
2023-07-27 18:34:46 +00:00
|
|
|
* mySQL image replacer - https://gitlab.com/zlax/mysqlimagereplacer
|
|
|
|
* adopted for php8.2
|
2023-07-30 12:40:43 +00:00
|
|
|
* v.0.3 beta - uder DWTWL license https://soundragon.su/license/license.html
|
2016-12-15 21:24:41 +00:00
|
|
|
* exec script
|
|
|
|
*
|
|
|
|
* !MAKE BACKUP OF YOUR SQL-TABLE BEFORE YOU TRY THIS SCRIPT!
|
|
|
|
*/
|
|
|
|
ignore_user_abort(true);
|
|
|
|
ini_set('max_execution_time', '6000');
|
|
|
|
ini_set('max_input_time', '6000');
|
|
|
|
|
|
|
|
function SaveStatus ($status,$raws,$updated,$failed,$deleted,$execdir) {
|
|
|
|
$csvstatusndata[]=$raws;
|
|
|
|
$csvstatusndata[]=$updated;
|
|
|
|
$csvstatusndata[]=$failed;
|
|
|
|
$csvstatusndata[]=$deleted;
|
|
|
|
if ($status=="started") {
|
|
|
|
$fhandle=fopen($execdir."/outdata.csv","w");
|
|
|
|
fputcsv($fhandle,$csvstatusndata);
|
|
|
|
fclose($fhandle);
|
|
|
|
} else
|
|
|
|
if ($status=="finished") unlink ($execdir."/outdata.csv");
|
|
|
|
}
|
|
|
|
|
|
|
|
function DownloadFopen($URL) {
|
2023-07-30 12:40:43 +00:00
|
|
|
if (file_exists(rawurldecode(urldecode(pathinfo($URL)['basename'])))) {
|
2016-12-15 21:24:41 +00:00
|
|
|
$fileprefix=mt_rand(1000000,9999999);
|
2023-07-30 12:40:43 +00:00
|
|
|
file_put_contents($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])), file_get_contents($URL));
|
|
|
|
if (md5_file($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])))==md5_file(rawurldecode(urldecode(pathinfo($URL)['basename'])))) {
|
|
|
|
unlink ($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])));
|
|
|
|
$result=rawurldecode(urldecode(pathinfo($URL)['basename']));
|
2016-12-15 21:24:41 +00:00
|
|
|
} else {
|
2023-07-30 12:40:43 +00:00
|
|
|
if (exif_imagetype($fileprefix.rawurldecodeurldecode((pathinfo($URL)['basename'])))>0) $result=$fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename']));
|
2016-12-15 21:24:41 +00:00
|
|
|
else {
|
2023-07-30 12:40:43 +00:00
|
|
|
unlink ($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])));
|
2023-07-27 18:34:46 +00:00
|
|
|
$result=false;
|
|
|
|
}
|
2016-12-15 21:24:41 +00:00
|
|
|
}
|
|
|
|
} else {
|
2023-07-30 12:40:43 +00:00
|
|
|
file_put_contents(rawurldecode(urldecode(pathinfo($URL)['basename'])), file_get_contents($URL));
|
|
|
|
if (exif_imagetype(rawurldecode(urldecode(pathinfo($URL)['basename'])))>0) $result=rawurldecode(urldecode(pathinfo($URL)['basename']));
|
2016-12-15 21:24:41 +00:00
|
|
|
else {
|
2023-07-30 12:40:43 +00:00
|
|
|
unlink (rawurldecode(urldecode(pathinfo($URL)['basename'])));
|
2016-12-15 21:24:41 +00:00
|
|
|
$result=false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
|
|
|
$execdir=__DIR__;
|
|
|
|
$fhandle=fopen("indata.csv","r") or $logmessage="emptyindata";
|
|
|
|
$indatacsv=fgetcsv($fhandle, 1024, ",");
|
|
|
|
fclose($fhandle);
|
|
|
|
$dbhost=$indatacsv[0];
|
|
|
|
$dbuser=$indatacsv[1];
|
|
|
|
$dbpass=$indatacsv[2];
|
|
|
|
$dbname=$indatacsv[3];
|
|
|
|
$dbtable=$indatacsv[4];
|
|
|
|
$dbcolumn=$indatacsv[5];
|
|
|
|
$servername=$indatacsv[6];
|
|
|
|
$fixbroken=$indatacsv[7];
|
|
|
|
$dirname=$indatacsv[8];
|
|
|
|
$subdir=$indatacsv[9];
|
|
|
|
$downloadtype=$indatacsv[10];
|
|
|
|
$htmlprefix=$indatacsv[11];
|
|
|
|
$fixalready=$indatacsv[12];
|
|
|
|
$fixundownload=$indatacsv[13];
|
|
|
|
$correctimagenumber=$indatacsv[14];
|
2023-07-27 18:34:46 +00:00
|
|
|
$filtcolumn=$indatacsv[15];
|
|
|
|
$filtvalue=$indatacsv[16];
|
|
|
|
$ahrefparents=$indatacsv[17];
|
2023-07-30 12:40:43 +00:00
|
|
|
$webarchived=explode (",", $indatacsv[18]);
|
|
|
|
$fromnum=$indatacsv[19];
|
2023-07-27 18:34:46 +00:00
|
|
|
|
2016-12-15 21:24:41 +00:00
|
|
|
unlink ("indata.csv");
|
|
|
|
|
2023-07-27 18:34:46 +00:00
|
|
|
if (isset($logmessage) && $logmessage=="emptyindata")
|
|
|
|
echo "Can't read data file, please check write pirivlegies for ",$execdir," directory";
|
2016-12-15 21:24:41 +00:00
|
|
|
chdir("../".$dirname);
|
2023-07-27 18:34:46 +00:00
|
|
|
if ($subdir)
|
|
|
|
chdir ($subdir);
|
|
|
|
$dbconnection=mysqli_connect($dbhost,$dbuser,$dbpass,$dbname);
|
|
|
|
mysqli_set_charset($dbconnection,'utf8');
|
|
|
|
$resulttable=mysqli_query($dbconnection,"SELECT * FROM ".$dbtable);
|
|
|
|
$totalstatus="started";
|
|
|
|
$updatedurls=0;
|
|
|
|
$failedtoupdate=0;
|
|
|
|
$deletedurls=0;
|
|
|
|
$rawsedit=0;
|
|
|
|
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
|
|
|
$filteredpostnumber=0;
|
2023-07-30 12:40:43 +00:00
|
|
|
$postnumber=0;
|
|
|
|
if (!$fromnum) $fromnum=0;
|
2023-07-27 18:34:46 +00:00
|
|
|
while($row=mysqli_fetch_assoc($resulttable)){
|
2023-07-30 12:40:43 +00:00
|
|
|
if ($postnumber >= $fromnum) {
|
|
|
|
if (!empty($filtcolumn) && !empty($filtvalue)) {
|
|
|
|
if ($row[$filtcolumn] == $filtvalue) {
|
|
|
|
if (!empty($row[$dbcolumn]))
|
|
|
|
$originalcontent=$row[$dbcolumn];
|
|
|
|
$filteredpostnumber++;
|
|
|
|
}
|
|
|
|
} else { if (!empty($row[$dbcolumn]))
|
2023-07-27 18:34:46 +00:00
|
|
|
$originalcontent=$row[$dbcolumn];
|
|
|
|
$filteredpostnumber++;
|
|
|
|
}
|
2023-07-30 12:40:43 +00:00
|
|
|
if (isset($originalcontent)) {
|
|
|
|
$newcontent=$originalcontent;
|
|
|
|
if ($fixbroken) {
|
|
|
|
$contentpost=new DOMDocument();
|
|
|
|
$xmlEncodding = '<?xml version="1.0" encoding="UTF-8"?>';
|
|
|
|
$contentpost->loadHTML($xmlEncodding.$originalcontent, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
|
|
|
$imageTags=$contentpost->getElementsByTagName('img');
|
|
|
|
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
|
|
|
|
$tag = $imageTags->item($i);
|
|
|
|
$imgsrctag=$tag->getAttribute('src');
|
|
|
|
$parsedimgsrctag=parse_url($imgsrctag);
|
|
|
|
if (!((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https")))) {
|
|
|
|
$tag->parentNode->removeChild($tag);
|
|
|
|
$deletedurls++;
|
|
|
|
}
|
2016-12-15 21:24:41 +00:00
|
|
|
}
|
2023-07-30 12:40:43 +00:00
|
|
|
$newcontent=str_replace($xmlEncodding,"",$contentpost->saveHTML());
|
2016-12-15 21:24:41 +00:00
|
|
|
}
|
2023-07-30 12:40:43 +00:00
|
|
|
if ($correctimagenumber>0) {
|
|
|
|
$contentpost=new DOMDocument();
|
|
|
|
$contentpost->loadHTML($newcontent);
|
|
|
|
$imageTags=$contentpost->getElementsByTagName('img');
|
|
|
|
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
|
|
|
|
$tag = $imageTags->item($i);
|
|
|
|
if ($ahrefparents) {
|
|
|
|
$aofimg=$contentpost->importNode($tag->parentNode, true);
|
|
|
|
if (isset($aofimg)) {
|
|
|
|
$ahreofimg=$aofimg->getAttribute('href');
|
|
|
|
if ($ahreofimg=="") $ahreofimg=$tag->getAttribute('src');
|
|
|
|
$imgsrctag=$ahreofimg;
|
|
|
|
}
|
|
|
|
} else $imgsrctag=$tag->getAttribute('src');
|
|
|
|
$parsedimgsrctag=parse_url($imgsrctag);
|
|
|
|
if ((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https"))&&(!($parsedimgsrctag['host']==$servername))) {
|
|
|
|
if ($webarchived) {
|
|
|
|
$parsedimgsrctagarch=parse_url($tag->getAttribute('src'));
|
|
|
|
if (in_array($parsedimgsrctagarch['host'], $webarchived)) {
|
|
|
|
$webarchivelink="https://web.archive.org/web/20150101id_/".$tag->getAttribute('src');
|
|
|
|
$newimgsrc=DownloadFopen($webarchivelink);
|
|
|
|
} else
|
|
|
|
$newimgsrc=DownloadFopen($imgsrctag);
|
2023-07-27 18:34:46 +00:00
|
|
|
} else
|
|
|
|
$newimgsrc=DownloadFopen($imgsrctag);
|
2023-07-30 12:40:43 +00:00
|
|
|
if ($newimgsrc) {
|
|
|
|
if ($htmlprefix=="http") {
|
|
|
|
$srcbase="http://".$servername."/".$dirname."/";
|
|
|
|
} else if ($htmlprefix=="https") {
|
|
|
|
$srcbase="https://".$servername."/".$dirname."/";
|
|
|
|
} else {
|
|
|
|
$srcbase="/".$dirname."/";
|
|
|
|
}
|
|
|
|
if ($subdir) $srcbase=$srcbase.rawurlencode($subdir)."/";
|
|
|
|
$newimgsrc=$srcbase.$newimgsrc;
|
|
|
|
$newcontent=str_replace($tag->getAttribute('src'),$newimgsrc,$newcontent);
|
|
|
|
if ($ahrefparents)
|
|
|
|
$newcontent=str_replace($imgsrctag,$newimgsrc,$newcontent);
|
|
|
|
unset($newimgsrc);
|
|
|
|
$updatedurls++;
|
|
|
|
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
|
|
|
} else if ($fixundownload==true) {
|
|
|
|
$tag->parentNode->removeChild($tag);
|
|
|
|
$deletedurls++;
|
|
|
|
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
|
|
|
} else $failedtoupdate++;
|
|
|
|
}
|
2016-12-15 21:24:41 +00:00
|
|
|
}
|
|
|
|
}
|
2023-07-30 12:40:43 +00:00
|
|
|
$stmt = mysqli_prepare($dbconnection, "UPDATE `".$dbtable."` SET `".$dbcolumn."` = ? WHERE `".$dbtable."`.`".$dbcolumn."`=?");
|
|
|
|
mysqli_stmt_bind_param($stmt, 'ss', $newcontent, $originalcontent);
|
|
|
|
mysqli_stmt_execute($stmt);
|
|
|
|
$rawsedit++;
|
|
|
|
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
2016-12-15 21:24:41 +00:00
|
|
|
}
|
|
|
|
}
|
2023-07-30 12:40:43 +00:00
|
|
|
$postnumber++;
|
2023-07-27 18:34:46 +00:00
|
|
|
}
|
|
|
|
$totalstatus="finished";
|
|
|
|
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
|
|
|
echo $updatedurls," urls was downloaded and updated, ",$failedtoupdate," urls was failed to update","<br>\n";
|
|
|
|
if ($fixbroken or $fixundownload) echo $deletedurls, " tags was deleted<br>\n";
|
2016-12-15 21:24:41 +00:00
|
|
|
|
2023-07-27 18:34:46 +00:00
|
|
|
?>
|