upd to 0.3

This commit is contained in:
2023-07-30 15:40:43 +03:00
parent 1225caf012
commit a14bb99b5f
3 changed files with 108 additions and 90 deletions

176
exec.php
View File

@@ -2,7 +2,7 @@
/**
* mySQL image replacer - https://gitlab.com/zlax/mysqlimagereplacer
* adopted for php8.2
* v.0.2 beta - uder DWTWL license https://soundragon.su/license/license.html
* v.0.3 beta - uder DWTWL license https://soundragon.su/license/license.html
* exec script
*
* !MAKE BACKUP OF YOUR SQL-TABLE BEFORE YOU TRY THIS SCRIPT!
@@ -25,24 +25,24 @@ function SaveStatus ($status,$raws,$updated,$failed,$deleted,$execdir) {
}
function DownloadFopen($URL) {
if (file_exists(rawurldecode(pathinfo($URL)['basename']))) {
if (file_exists(rawurldecode(urldecode(pathinfo($URL)['basename'])))) {
$fileprefix=mt_rand(1000000,9999999);
file_put_contents($fileprefix.rawurldecode(pathinfo($URL)['basename']), file_get_contents($URL));
if (md5_file($fileprefix.rawurldecode(pathinfo($URL)['basename']))==md5_file(rawurldecode(pathinfo($URL)['basename']))) {
unlink ($fileprefix.rawurldecode(pathinfo($URL)['basename']));
$result=pathinfo($URL)['basename'];
file_put_contents($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])), file_get_contents($URL));
if (md5_file($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])))==md5_file(rawurldecode(urldecode(pathinfo($URL)['basename'])))) {
unlink ($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])));
$result=rawurldecode(urldecode(pathinfo($URL)['basename']));
} else {
if (exif_imagetype($fileprefix.rawurldecode(pathinfo($URL)['basename']))>0) $result=$fileprefix.pathinfo($URL)['basename'];
if (exif_imagetype($fileprefix.rawurldecodeurldecode((pathinfo($URL)['basename'])))>0) $result=$fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename']));
else {
unlink ($fileprefix.rawurldecode(pathinfo($URL)['basename']));
unlink ($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])));
$result=false;
}
}
} else {
file_put_contents(rawurldecode(pathinfo($URL)['basename']), file_get_contents($URL));
if (exif_imagetype(rawurldecode(pathinfo($URL)['basename']))>0) $result=pathinfo($URL)['basename'];
file_put_contents(rawurldecode(urldecode(pathinfo($URL)['basename'])), file_get_contents($URL));
if (exif_imagetype(rawurldecode(urldecode(pathinfo($URL)['basename'])))>0) $result=rawurldecode(urldecode(pathinfo($URL)['basename']));
else {
unlink (rawurldecode(pathinfo($URL)['basename']));
unlink (rawurldecode(urldecode(pathinfo($URL)['basename'])));
$result=false;
}
}
@@ -71,7 +71,8 @@ $correctimagenumber=$indatacsv[14];
$filtcolumn=$indatacsv[15];
$filtvalue=$indatacsv[16];
$ahrefparents=$indatacsv[17];
$webarchived=$indatacsv[18];
$webarchived=explode (",", $indatacsv[18]);
$fromnum=$indatacsv[19];
unlink ("indata.csv");
@@ -90,88 +91,95 @@ $deletedurls=0;
$rawsedit=0;
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
$filteredpostnumber=0;
$postnumber=0;
if (!$fromnum) $fromnum=0;
while($row=mysqli_fetch_assoc($resulttable)){
if (!empty($filtcolumn) && !empty($filtvalue)) {
if ($row[$filtcolumn] == $filtvalue) {
if (!empty($row[$dbcolumn]))
if ($postnumber >= $fromnum) {
if (!empty($filtcolumn) && !empty($filtvalue)) {
if ($row[$filtcolumn] == $filtvalue) {
if (!empty($row[$dbcolumn]))
$originalcontent=$row[$dbcolumn];
$filteredpostnumber++;
}
} else { if (!empty($row[$dbcolumn]))
$originalcontent=$row[$dbcolumn];
$filteredpostnumber++;
}
} else { if (!empty($row[$dbcolumn]))
$originalcontent=$row[$dbcolumn];
$filteredpostnumber++;
}
if (isset($originalcontent)) {
$newcontent=$originalcontent;
if ($fixbroken) {
$contentpost=new DOMDocument();
$xmlEncodding = '<?xml version="1.0" encoding="UTF-8"?>';
$contentpost->loadHTML($xmlEncodding.$originalcontent, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$imageTags=$contentpost->getElementsByTagName('img');
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
$tag = $imageTags->item($i);
$imgsrctag=$tag->getAttribute('src');
$parsedimgsrctag=parse_url($imgsrctag);
if (!((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https")))) {
$tag->parentNode->removeChild($tag);
$deletedurls++;
}
}
$newcontent=str_replace($xmlEncodding,"",$contentpost->saveHTML());
}
if ($correctimagenumber>0) {
$contentpost=new DOMDocument();
$contentpost->loadHTML($newcontent);
$imageTags=$contentpost->getElementsByTagName('img');
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
$tag = $imageTags->item($i);
if ($ahrefparents) {
$aofimg=$contentpost->importNode($tag->parentNode, true);
if (isset($aofimg)) {
$ahreofimg=$aofimg->getAttribute('href');
if ($ahreofimg=="") $ahreofimg=$tag->getAttribute('src');
$imgsrctag=$ahreofimg;
}
} else $imgsrctag=$tag->getAttribute('src');
$parsedimgsrctag=parse_url($imgsrctag);
if ((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https"))&&(!($parsedimgsrctag['host']==$servername))) {
if ($webarchived) {
$parsedimgsrctagarch=parse_url($tag->getAttribute('src'));
if ($parsedimgsrctagarch['host']==$webarchived) {
$webarchivelink="https://web.archive.org/web/20200101id_/".$tag->getAttribute('src');
$newimgsrc=DownloadFopen($webarchivelink);
} else
$newimgsrc=DownloadFopen($imgsrctag);
} else
$newimgsrc=DownloadFopen($imgsrctag);
if ($newimgsrc) {
if ($htmlprefix=="http") {
$srcbase="http://".$servername."/".$dirname."/";
} else if ($htmlprefix=="https") {
$srcbase="https://".$servername."/".$dirname."/";
} else {
$srcbase="/".$dirname."/";
}
if ($subdir) $srcbase=$srcbase.rawurlencode($subdir)."/";
$newimgsrc=$srcbase.$newimgsrc;
$newcontent=str_replace($tag->getAttribute('src'),$newimgsrc,$newcontent);
unset($newimgsrc);
$updatedurls++;
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
} else if ($fixundownload==true) {
if (isset($originalcontent)) {
$newcontent=$originalcontent;
if ($fixbroken) {
$contentpost=new DOMDocument();
$xmlEncodding = '<?xml version="1.0" encoding="UTF-8"?>';
$contentpost->loadHTML($xmlEncodding.$originalcontent, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
$imageTags=$contentpost->getElementsByTagName('img');
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
$tag = $imageTags->item($i);
$imgsrctag=$tag->getAttribute('src');
$parsedimgsrctag=parse_url($imgsrctag);
if (!((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https")))) {
$tag->parentNode->removeChild($tag);
$deletedurls++;
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
} else $failedtoupdate++;
}
}
$newcontent=str_replace($xmlEncodding,"",$contentpost->saveHTML());
}
if ($correctimagenumber>0) {
$contentpost=new DOMDocument();
$contentpost->loadHTML($newcontent);
$imageTags=$contentpost->getElementsByTagName('img');
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
$tag = $imageTags->item($i);
if ($ahrefparents) {
$aofimg=$contentpost->importNode($tag->parentNode, true);
if (isset($aofimg)) {
$ahreofimg=$aofimg->getAttribute('href');
if ($ahreofimg=="") $ahreofimg=$tag->getAttribute('src');
$imgsrctag=$ahreofimg;
}
} else $imgsrctag=$tag->getAttribute('src');
$parsedimgsrctag=parse_url($imgsrctag);
if ((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https"))&&(!($parsedimgsrctag['host']==$servername))) {
if ($webarchived) {
$parsedimgsrctagarch=parse_url($tag->getAttribute('src'));
if (in_array($parsedimgsrctagarch['host'], $webarchived)) {
$webarchivelink="https://web.archive.org/web/20150101id_/".$tag->getAttribute('src');
$newimgsrc=DownloadFopen($webarchivelink);
} else
$newimgsrc=DownloadFopen($imgsrctag);
} else
$newimgsrc=DownloadFopen($imgsrctag);
if ($newimgsrc) {
if ($htmlprefix=="http") {
$srcbase="http://".$servername."/".$dirname."/";
} else if ($htmlprefix=="https") {
$srcbase="https://".$servername."/".$dirname."/";
} else {
$srcbase="/".$dirname."/";
}
if ($subdir) $srcbase=$srcbase.rawurlencode($subdir)."/";
$newimgsrc=$srcbase.$newimgsrc;
$newcontent=str_replace($tag->getAttribute('src'),$newimgsrc,$newcontent);
if ($ahrefparents)
$newcontent=str_replace($imgsrctag,$newimgsrc,$newcontent);
unset($newimgsrc);
$updatedurls++;
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
} else if ($fixundownload==true) {
$tag->parentNode->removeChild($tag);
$deletedurls++;
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
} else $failedtoupdate++;
}
}
}
$stmt = mysqli_prepare($dbconnection, "UPDATE `".$dbtable."` SET `".$dbcolumn."` = ? WHERE `".$dbtable."`.`".$dbcolumn."`=?");
mysqli_stmt_bind_param($stmt, 'ss', $newcontent, $originalcontent);
mysqli_stmt_execute($stmt);
$rawsedit++;
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
}
$stmt = mysqli_prepare($dbconnection, "UPDATE `".$dbtable."` SET `".$dbcolumn."` = ? WHERE `".$dbtable."`.`".$dbcolumn."`=?");
mysqli_stmt_bind_param($stmt, 'ss', $newcontent, $originalcontent);
mysqli_stmt_execute($stmt);
$rawsedit++;
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
}
$postnumber++;
}
$totalstatus="finished";
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);