From a14bb99b5f14530913e52945674656a894c58787 Mon Sep 17 00:00:00 2001 From: zlax Date: Sun, 30 Jul 2023 15:40:43 +0300 Subject: [PATCH] upd to 0.3 --- README.md | 2 +- exec.php | 176 ++++++++++++++++++++++++++++-------------------------- index.php | 20 +++++-- 3 files changed, 108 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index a152ed5..9ff47e0 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ mySQL image downloader / replacer wizard -v.0.2 beta adopted for php8 +v.0.3 beta adopted for php8 DWTWL license https://soundragon.su/license/license.html diff --git a/exec.php b/exec.php index 4a81a3a..0fd93e0 100644 --- a/exec.php +++ b/exec.php @@ -2,7 +2,7 @@ /** * mySQL image replacer - https://gitlab.com/zlax/mysqlimagereplacer * adopted for php8.2 -* v.0.2 beta - uder DWTWL license https://soundragon.su/license/license.html +* v.0.3 beta - uder DWTWL license https://soundragon.su/license/license.html * exec script * * !MAKE BACKUP OF YOUR SQL-TABLE BEFORE YOU TRY THIS SCRIPT! @@ -25,24 +25,24 @@ function SaveStatus ($status,$raws,$updated,$failed,$deleted,$execdir) { } function DownloadFopen($URL) { - if (file_exists(rawurldecode(pathinfo($URL)['basename']))) { + if (file_exists(rawurldecode(urldecode(pathinfo($URL)['basename'])))) { $fileprefix=mt_rand(1000000,9999999); - file_put_contents($fileprefix.rawurldecode(pathinfo($URL)['basename']), file_get_contents($URL)); - if (md5_file($fileprefix.rawurldecode(pathinfo($URL)['basename']))==md5_file(rawurldecode(pathinfo($URL)['basename']))) { - unlink ($fileprefix.rawurldecode(pathinfo($URL)['basename'])); - $result=pathinfo($URL)['basename']; + file_put_contents($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])), file_get_contents($URL)); + if (md5_file($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])))==md5_file(rawurldecode(urldecode(pathinfo($URL)['basename'])))) { + unlink ($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename']))); + $result=rawurldecode(urldecode(pathinfo($URL)['basename'])); } else { - if (exif_imagetype($fileprefix.rawurldecode(pathinfo($URL)['basename']))>0) $result=$fileprefix.pathinfo($URL)['basename']; + if (exif_imagetype($fileprefix.rawurldecodeurldecode((pathinfo($URL)['basename'])))>0) $result=$fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])); else { - unlink ($fileprefix.rawurldecode(pathinfo($URL)['basename'])); + unlink ($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename']))); $result=false; } } } else { - file_put_contents(rawurldecode(pathinfo($URL)['basename']), file_get_contents($URL)); - if (exif_imagetype(rawurldecode(pathinfo($URL)['basename']))>0) $result=pathinfo($URL)['basename']; + file_put_contents(rawurldecode(urldecode(pathinfo($URL)['basename'])), file_get_contents($URL)); + if (exif_imagetype(rawurldecode(urldecode(pathinfo($URL)['basename'])))>0) $result=rawurldecode(urldecode(pathinfo($URL)['basename'])); else { - unlink (rawurldecode(pathinfo($URL)['basename'])); + unlink (rawurldecode(urldecode(pathinfo($URL)['basename']))); $result=false; } } @@ -71,7 +71,8 @@ $correctimagenumber=$indatacsv[14]; $filtcolumn=$indatacsv[15]; $filtvalue=$indatacsv[16]; $ahrefparents=$indatacsv[17]; -$webarchived=$indatacsv[18]; +$webarchived=explode (",", $indatacsv[18]); +$fromnum=$indatacsv[19]; unlink ("indata.csv"); @@ -90,88 +91,95 @@ $deletedurls=0; $rawsedit=0; SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); $filteredpostnumber=0; +$postnumber=0; +if (!$fromnum) $fromnum=0; while($row=mysqli_fetch_assoc($resulttable)){ - if (!empty($filtcolumn) && !empty($filtvalue)) { - if ($row[$filtcolumn] == $filtvalue) { - if (!empty($row[$dbcolumn])) + if ($postnumber >= $fromnum) { + if (!empty($filtcolumn) && !empty($filtvalue)) { + if ($row[$filtcolumn] == $filtvalue) { + if (!empty($row[$dbcolumn])) + $originalcontent=$row[$dbcolumn]; + $filteredpostnumber++; + } + } else { if (!empty($row[$dbcolumn])) $originalcontent=$row[$dbcolumn]; $filteredpostnumber++; } - } else { if (!empty($row[$dbcolumn])) - $originalcontent=$row[$dbcolumn]; - $filteredpostnumber++; - } - if (isset($originalcontent)) { - $newcontent=$originalcontent; - if ($fixbroken) { - $contentpost=new DOMDocument(); - $xmlEncodding = ''; - $contentpost->loadHTML($xmlEncodding.$originalcontent, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); - $imageTags=$contentpost->getElementsByTagName('img'); - for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390 - $tag = $imageTags->item($i); - $imgsrctag=$tag->getAttribute('src'); - $parsedimgsrctag=parse_url($imgsrctag); - if (!((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https")))) { - $tag->parentNode->removeChild($tag); - $deletedurls++; - } - } - $newcontent=str_replace($xmlEncodding,"",$contentpost->saveHTML()); - } - if ($correctimagenumber>0) { - $contentpost=new DOMDocument(); - $contentpost->loadHTML($newcontent); - $imageTags=$contentpost->getElementsByTagName('img'); - for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390 - $tag = $imageTags->item($i); - if ($ahrefparents) { - $aofimg=$contentpost->importNode($tag->parentNode, true); - if (isset($aofimg)) { - $ahreofimg=$aofimg->getAttribute('href'); - if ($ahreofimg=="") $ahreofimg=$tag->getAttribute('src'); - $imgsrctag=$ahreofimg; - } - } else $imgsrctag=$tag->getAttribute('src'); - $parsedimgsrctag=parse_url($imgsrctag); - if ((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https"))&&(!($parsedimgsrctag['host']==$servername))) { - if ($webarchived) { - $parsedimgsrctagarch=parse_url($tag->getAttribute('src')); - if ($parsedimgsrctagarch['host']==$webarchived) { - $webarchivelink="https://web.archive.org/web/20200101id_/".$tag->getAttribute('src'); - $newimgsrc=DownloadFopen($webarchivelink); - } else - $newimgsrc=DownloadFopen($imgsrctag); - } else - $newimgsrc=DownloadFopen($imgsrctag); - if ($newimgsrc) { - if ($htmlprefix=="http") { - $srcbase="http://".$servername."/".$dirname."/"; - } else if ($htmlprefix=="https") { - $srcbase="https://".$servername."/".$dirname."/"; - } else { - $srcbase="/".$dirname."/"; - } - if ($subdir) $srcbase=$srcbase.rawurlencode($subdir)."/"; - $newimgsrc=$srcbase.$newimgsrc; - $newcontent=str_replace($tag->getAttribute('src'),$newimgsrc,$newcontent); - unset($newimgsrc); - $updatedurls++; - SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); - } else if ($fixundownload==true) { + if (isset($originalcontent)) { + $newcontent=$originalcontent; + if ($fixbroken) { + $contentpost=new DOMDocument(); + $xmlEncodding = ''; + $contentpost->loadHTML($xmlEncodding.$originalcontent, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + $imageTags=$contentpost->getElementsByTagName('img'); + for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390 + $tag = $imageTags->item($i); + $imgsrctag=$tag->getAttribute('src'); + $parsedimgsrctag=parse_url($imgsrctag); + if (!((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https")))) { $tag->parentNode->removeChild($tag); $deletedurls++; - SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); - } else $failedtoupdate++; + } + } + $newcontent=str_replace($xmlEncodding,"",$contentpost->saveHTML()); + } + if ($correctimagenumber>0) { + $contentpost=new DOMDocument(); + $contentpost->loadHTML($newcontent); + $imageTags=$contentpost->getElementsByTagName('img'); + for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390 + $tag = $imageTags->item($i); + if ($ahrefparents) { + $aofimg=$contentpost->importNode($tag->parentNode, true); + if (isset($aofimg)) { + $ahreofimg=$aofimg->getAttribute('href'); + if ($ahreofimg=="") $ahreofimg=$tag->getAttribute('src'); + $imgsrctag=$ahreofimg; + } + } else $imgsrctag=$tag->getAttribute('src'); + $parsedimgsrctag=parse_url($imgsrctag); + if ((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https"))&&(!($parsedimgsrctag['host']==$servername))) { + if ($webarchived) { + $parsedimgsrctagarch=parse_url($tag->getAttribute('src')); + if (in_array($parsedimgsrctagarch['host'], $webarchived)) { + $webarchivelink="https://web.archive.org/web/20150101id_/".$tag->getAttribute('src'); + $newimgsrc=DownloadFopen($webarchivelink); + } else + $newimgsrc=DownloadFopen($imgsrctag); + } else + $newimgsrc=DownloadFopen($imgsrctag); + if ($newimgsrc) { + if ($htmlprefix=="http") { + $srcbase="http://".$servername."/".$dirname."/"; + } else if ($htmlprefix=="https") { + $srcbase="https://".$servername."/".$dirname."/"; + } else { + $srcbase="/".$dirname."/"; + } + if ($subdir) $srcbase=$srcbase.rawurlencode($subdir)."/"; + $newimgsrc=$srcbase.$newimgsrc; + $newcontent=str_replace($tag->getAttribute('src'),$newimgsrc,$newcontent); + if ($ahrefparents) + $newcontent=str_replace($imgsrctag,$newimgsrc,$newcontent); + unset($newimgsrc); + $updatedurls++; + SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); + } else if ($fixundownload==true) { + $tag->parentNode->removeChild($tag); + $deletedurls++; + SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); + } else $failedtoupdate++; + } } } + $stmt = mysqli_prepare($dbconnection, "UPDATE `".$dbtable."` SET `".$dbcolumn."` = ? WHERE `".$dbtable."`.`".$dbcolumn."`=?"); + mysqli_stmt_bind_param($stmt, 'ss', $newcontent, $originalcontent); + mysqli_stmt_execute($stmt); + $rawsedit++; + SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); } - $stmt = mysqli_prepare($dbconnection, "UPDATE `".$dbtable."` SET `".$dbcolumn."` = ? WHERE `".$dbtable."`.`".$dbcolumn."`=?"); - mysqli_stmt_bind_param($stmt, 'ss', $newcontent, $originalcontent); - mysqli_stmt_execute($stmt); - $rawsedit++; - SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); } + $postnumber++; } $totalstatus="finished"; SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); diff --git a/index.php b/index.php index 3f169b8..360ff5e 100644 --- a/index.php +++ b/index.php @@ -2,7 +2,7 @@ /** * mySQL image replacer - https://gitlab.com/zlax/mysqlimagereplacer * adopted for php8.2 -* v.0.2 beta - uder DWTWL license https://soundragon.su/license/license.html +* v.0.3 beta - uder DWTWL license https://soundragon.su/license/license.html * interface script * * !MAKE BACKUP OF YOUR SQL-TABLE BEFORE YOU TRY THIS SCRIPT! @@ -80,6 +80,7 @@ function PageView() { // -- Checking table while($row=mysqli_fetch_assoc($resulttable)) { $contentpost=new DOMDocument(); + libxml_use_internal_errors(true); if (!empty($_SESSION["filtcolumn"]) && !empty($_SESSION["filtvalue"])) { if ($row[$_SESSION["filtcolumn"]] == $_SESSION["filtvalue"]) { if (!empty($row[$_SESSION["dbcolumn"]])) @@ -115,11 +116,13 @@ function PageView() { $_SESSION["alreadyimagenumber"]++; else $_SESSION["brokenimagenumber"]++; } + libxml_clear_errors(); $postnumber++; } echo "
\n"; echo " directory to upload new images:
\n"; echo " ",getcwd(),"/ (leave blank if you don't need subdir)
\n"; + echo " start processing from raw number: (leave blank if all)
\n"; echo " type of html-prefix:
\n"; echo " download parent a hrefs if exists?
\n"; - echo "Domain for webarchive download: (leave blank if you don't need to download images from webarchive)
\n"; + echo "Domains for webarchive download, comma separated: (leave blank if you don't need to download images from webarchive)
\n"; echo " "; else echo " "; - echo $imgs,", ("; + echo rawurldecode(urldecode($imgs)),", ("; if ($_SESSION["imgsrcahrefs"][$postnum][$imgnum]==$_SESSION["imgsrcs"][$postnum][$imgnum]) echo ""; else echo ""; - echo $_SESSION["imgsrcahrefs"][$postnum][$imgnum]; + echo rawurldecode(urldecode($_SESSION["imgsrcahrefs"][$postnum][$imgnum])); if ($_SESSION["imgsrcahrefs"][$postnum][$imgnum]==$_SESSION["imgsrcs"][$postnum][$imgnum]) echo ""; else echo ""; @@ -217,7 +220,7 @@ function PageView() { if ($_SESSION["ahrefparents"]) echo "parent a hrefs will be downloaded (if exists)
\n"; if ($_SESSION["webarchived"]) - echo "this domain will be downloaded via webarchive: ", $_SESSION["webarchived"], "
\n"; + echo "this domains will be downloaded via webarchive: ", $_SESSION["webarchived"], "
\n"; if ($_SESSION["fixbroken"]) echo $_SESSION["brokenimagenumber"]," tags will be deleted
\n"; echo " are you sure to proceed?
\n"; @@ -261,6 +264,7 @@ function PageView() { $csvsessiondata[]=$_SESSION["filtvalue"]; $csvsessiondata[]=$_SESSION["ahrefparents"]; $csvsessiondata[]=$_SESSION["webarchived"]; + $csvsessiondata[]=$_SESSION["fromnum"]; $fhandle=fopen("indata.csv","w"); fputcsv($fhandle,$csvsessiondata); fclose($fhandle); @@ -377,6 +381,9 @@ function PageView() { $_SESSION["subdir"]=$_POST["subdir"]; $_SESSION["htmlprefix"]=$_POST["htmlprefix"]; $_SESSION["downloadtype"]=$_POST["dwnldtype"]; + if (isset($_POST['fromnum'])) + $_SESSION["fromnum"]=$_POST["fromnum"]; + else $_SESSION["fromnum"]=false; if (isset($_POST['ahrefparents']) && ($_POST['ahrefparents'] == 'yes')) $_SESSION["ahrefparents"]=true; else $_SESSION["ahrefparents"]=false; @@ -399,6 +406,9 @@ function PageView() { $_SESSION["subdir"]=$_POST["subdir"]; $_SESSION["htmlprefix"]=$_POST["htmlprefix"]; $_SESSION["downloadtype"]=$_POST["dwnldtype"]; + if (isset($_POST['fromnum'])) + $_SESSION["fromnum"]=$_POST["fromnum"]; + else $_SESSION["fromnum"]=false; if (isset($_POST['ahrefparents']) && ($_POST['ahrefparents'] == 'yes')) $_SESSION["ahrefparents"]=true; else $_SESSION["ahrefparents"]=false;