upd to 0.3
This commit is contained in:
parent
1225caf012
commit
a14bb99b5f
|
@ -2,7 +2,7 @@
|
|||
|
||||
mySQL image downloader / replacer wizard
|
||||
|
||||
v.0.2 beta adopted for php8
|
||||
v.0.3 beta adopted for php8
|
||||
|
||||
DWTWL license https://soundragon.su/license/license.html
|
||||
|
||||
|
|
176
exec.php
176
exec.php
|
@ -2,7 +2,7 @@
|
|||
/**
|
||||
* mySQL image replacer - https://gitlab.com/zlax/mysqlimagereplacer
|
||||
* adopted for php8.2
|
||||
* v.0.2 beta - uder DWTWL license https://soundragon.su/license/license.html
|
||||
* v.0.3 beta - uder DWTWL license https://soundragon.su/license/license.html
|
||||
* exec script
|
||||
*
|
||||
* !MAKE BACKUP OF YOUR SQL-TABLE BEFORE YOU TRY THIS SCRIPT!
|
||||
|
@ -25,24 +25,24 @@ function SaveStatus ($status,$raws,$updated,$failed,$deleted,$execdir) {
|
|||
}
|
||||
|
||||
function DownloadFopen($URL) {
|
||||
if (file_exists(rawurldecode(pathinfo($URL)['basename']))) {
|
||||
if (file_exists(rawurldecode(urldecode(pathinfo($URL)['basename'])))) {
|
||||
$fileprefix=mt_rand(1000000,9999999);
|
||||
file_put_contents($fileprefix.rawurldecode(pathinfo($URL)['basename']), file_get_contents($URL));
|
||||
if (md5_file($fileprefix.rawurldecode(pathinfo($URL)['basename']))==md5_file(rawurldecode(pathinfo($URL)['basename']))) {
|
||||
unlink ($fileprefix.rawurldecode(pathinfo($URL)['basename']));
|
||||
$result=pathinfo($URL)['basename'];
|
||||
file_put_contents($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])), file_get_contents($URL));
|
||||
if (md5_file($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])))==md5_file(rawurldecode(urldecode(pathinfo($URL)['basename'])))) {
|
||||
unlink ($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])));
|
||||
$result=rawurldecode(urldecode(pathinfo($URL)['basename']));
|
||||
} else {
|
||||
if (exif_imagetype($fileprefix.rawurldecode(pathinfo($URL)['basename']))>0) $result=$fileprefix.pathinfo($URL)['basename'];
|
||||
if (exif_imagetype($fileprefix.rawurldecodeurldecode((pathinfo($URL)['basename'])))>0) $result=$fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename']));
|
||||
else {
|
||||
unlink ($fileprefix.rawurldecode(pathinfo($URL)['basename']));
|
||||
unlink ($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])));
|
||||
$result=false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
file_put_contents(rawurldecode(pathinfo($URL)['basename']), file_get_contents($URL));
|
||||
if (exif_imagetype(rawurldecode(pathinfo($URL)['basename']))>0) $result=pathinfo($URL)['basename'];
|
||||
file_put_contents(rawurldecode(urldecode(pathinfo($URL)['basename'])), file_get_contents($URL));
|
||||
if (exif_imagetype(rawurldecode(urldecode(pathinfo($URL)['basename'])))>0) $result=rawurldecode(urldecode(pathinfo($URL)['basename']));
|
||||
else {
|
||||
unlink (rawurldecode(pathinfo($URL)['basename']));
|
||||
unlink (rawurldecode(urldecode(pathinfo($URL)['basename'])));
|
||||
$result=false;
|
||||
}
|
||||
}
|
||||
|
@ -71,7 +71,8 @@ $correctimagenumber=$indatacsv[14];
|
|||
$filtcolumn=$indatacsv[15];
|
||||
$filtvalue=$indatacsv[16];
|
||||
$ahrefparents=$indatacsv[17];
|
||||
$webarchived=$indatacsv[18];
|
||||
$webarchived=explode (",", $indatacsv[18]);
|
||||
$fromnum=$indatacsv[19];
|
||||
|
||||
unlink ("indata.csv");
|
||||
|
||||
|
@ -90,88 +91,95 @@ $deletedurls=0;
|
|||
$rawsedit=0;
|
||||
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
||||
$filteredpostnumber=0;
|
||||
$postnumber=0;
|
||||
if (!$fromnum) $fromnum=0;
|
||||
while($row=mysqli_fetch_assoc($resulttable)){
|
||||
if (!empty($filtcolumn) && !empty($filtvalue)) {
|
||||
if ($row[$filtcolumn] == $filtvalue) {
|
||||
if (!empty($row[$dbcolumn]))
|
||||
if ($postnumber >= $fromnum) {
|
||||
if (!empty($filtcolumn) && !empty($filtvalue)) {
|
||||
if ($row[$filtcolumn] == $filtvalue) {
|
||||
if (!empty($row[$dbcolumn]))
|
||||
$originalcontent=$row[$dbcolumn];
|
||||
$filteredpostnumber++;
|
||||
}
|
||||
} else { if (!empty($row[$dbcolumn]))
|
||||
$originalcontent=$row[$dbcolumn];
|
||||
$filteredpostnumber++;
|
||||
}
|
||||
} else { if (!empty($row[$dbcolumn]))
|
||||
$originalcontent=$row[$dbcolumn];
|
||||
$filteredpostnumber++;
|
||||
}
|
||||
if (isset($originalcontent)) {
|
||||
$newcontent=$originalcontent;
|
||||
if ($fixbroken) {
|
||||
$contentpost=new DOMDocument();
|
||||
$xmlEncodding = '<?xml version="1.0" encoding="UTF-8"?>';
|
||||
$contentpost->loadHTML($xmlEncodding.$originalcontent, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
||||
$imageTags=$contentpost->getElementsByTagName('img');
|
||||
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
|
||||
$tag = $imageTags->item($i);
|
||||
$imgsrctag=$tag->getAttribute('src');
|
||||
$parsedimgsrctag=parse_url($imgsrctag);
|
||||
if (!((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https")))) {
|
||||
$tag->parentNode->removeChild($tag);
|
||||
$deletedurls++;
|
||||
}
|
||||
}
|
||||
$newcontent=str_replace($xmlEncodding,"",$contentpost->saveHTML());
|
||||
}
|
||||
if ($correctimagenumber>0) {
|
||||
$contentpost=new DOMDocument();
|
||||
$contentpost->loadHTML($newcontent);
|
||||
$imageTags=$contentpost->getElementsByTagName('img');
|
||||
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
|
||||
$tag = $imageTags->item($i);
|
||||
if ($ahrefparents) {
|
||||
$aofimg=$contentpost->importNode($tag->parentNode, true);
|
||||
if (isset($aofimg)) {
|
||||
$ahreofimg=$aofimg->getAttribute('href');
|
||||
if ($ahreofimg=="") $ahreofimg=$tag->getAttribute('src');
|
||||
$imgsrctag=$ahreofimg;
|
||||
}
|
||||
} else $imgsrctag=$tag->getAttribute('src');
|
||||
$parsedimgsrctag=parse_url($imgsrctag);
|
||||
if ((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https"))&&(!($parsedimgsrctag['host']==$servername))) {
|
||||
if ($webarchived) {
|
||||
$parsedimgsrctagarch=parse_url($tag->getAttribute('src'));
|
||||
if ($parsedimgsrctagarch['host']==$webarchived) {
|
||||
$webarchivelink="https://web.archive.org/web/20200101id_/".$tag->getAttribute('src');
|
||||
$newimgsrc=DownloadFopen($webarchivelink);
|
||||
} else
|
||||
$newimgsrc=DownloadFopen($imgsrctag);
|
||||
} else
|
||||
$newimgsrc=DownloadFopen($imgsrctag);
|
||||
if ($newimgsrc) {
|
||||
if ($htmlprefix=="http") {
|
||||
$srcbase="http://".$servername."/".$dirname."/";
|
||||
} else if ($htmlprefix=="https") {
|
||||
$srcbase="https://".$servername."/".$dirname."/";
|
||||
} else {
|
||||
$srcbase="/".$dirname."/";
|
||||
}
|
||||
if ($subdir) $srcbase=$srcbase.rawurlencode($subdir)."/";
|
||||
$newimgsrc=$srcbase.$newimgsrc;
|
||||
$newcontent=str_replace($tag->getAttribute('src'),$newimgsrc,$newcontent);
|
||||
unset($newimgsrc);
|
||||
$updatedurls++;
|
||||
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
||||
} else if ($fixundownload==true) {
|
||||
if (isset($originalcontent)) {
|
||||
$newcontent=$originalcontent;
|
||||
if ($fixbroken) {
|
||||
$contentpost=new DOMDocument();
|
||||
$xmlEncodding = '<?xml version="1.0" encoding="UTF-8"?>';
|
||||
$contentpost->loadHTML($xmlEncodding.$originalcontent, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
||||
$imageTags=$contentpost->getElementsByTagName('img');
|
||||
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
|
||||
$tag = $imageTags->item($i);
|
||||
$imgsrctag=$tag->getAttribute('src');
|
||||
$parsedimgsrctag=parse_url($imgsrctag);
|
||||
if (!((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https")))) {
|
||||
$tag->parentNode->removeChild($tag);
|
||||
$deletedurls++;
|
||||
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
||||
} else $failedtoupdate++;
|
||||
}
|
||||
}
|
||||
$newcontent=str_replace($xmlEncodding,"",$contentpost->saveHTML());
|
||||
}
|
||||
if ($correctimagenumber>0) {
|
||||
$contentpost=new DOMDocument();
|
||||
$contentpost->loadHTML($newcontent);
|
||||
$imageTags=$contentpost->getElementsByTagName('img');
|
||||
for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390
|
||||
$tag = $imageTags->item($i);
|
||||
if ($ahrefparents) {
|
||||
$aofimg=$contentpost->importNode($tag->parentNode, true);
|
||||
if (isset($aofimg)) {
|
||||
$ahreofimg=$aofimg->getAttribute('href');
|
||||
if ($ahreofimg=="") $ahreofimg=$tag->getAttribute('src');
|
||||
$imgsrctag=$ahreofimg;
|
||||
}
|
||||
} else $imgsrctag=$tag->getAttribute('src');
|
||||
$parsedimgsrctag=parse_url($imgsrctag);
|
||||
if ((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https"))&&(!($parsedimgsrctag['host']==$servername))) {
|
||||
if ($webarchived) {
|
||||
$parsedimgsrctagarch=parse_url($tag->getAttribute('src'));
|
||||
if (in_array($parsedimgsrctagarch['host'], $webarchived)) {
|
||||
$webarchivelink="https://web.archive.org/web/20150101id_/".$tag->getAttribute('src');
|
||||
$newimgsrc=DownloadFopen($webarchivelink);
|
||||
} else
|
||||
$newimgsrc=DownloadFopen($imgsrctag);
|
||||
} else
|
||||
$newimgsrc=DownloadFopen($imgsrctag);
|
||||
if ($newimgsrc) {
|
||||
if ($htmlprefix=="http") {
|
||||
$srcbase="http://".$servername."/".$dirname."/";
|
||||
} else if ($htmlprefix=="https") {
|
||||
$srcbase="https://".$servername."/".$dirname."/";
|
||||
} else {
|
||||
$srcbase="/".$dirname."/";
|
||||
}
|
||||
if ($subdir) $srcbase=$srcbase.rawurlencode($subdir)."/";
|
||||
$newimgsrc=$srcbase.$newimgsrc;
|
||||
$newcontent=str_replace($tag->getAttribute('src'),$newimgsrc,$newcontent);
|
||||
if ($ahrefparents)
|
||||
$newcontent=str_replace($imgsrctag,$newimgsrc,$newcontent);
|
||||
unset($newimgsrc);
|
||||
$updatedurls++;
|
||||
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
||||
} else if ($fixundownload==true) {
|
||||
$tag->parentNode->removeChild($tag);
|
||||
$deletedurls++;
|
||||
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
||||
} else $failedtoupdate++;
|
||||
}
|
||||
}
|
||||
}
|
||||
$stmt = mysqli_prepare($dbconnection, "UPDATE `".$dbtable."` SET `".$dbcolumn."` = ? WHERE `".$dbtable."`.`".$dbcolumn."`=?");
|
||||
mysqli_stmt_bind_param($stmt, 'ss', $newcontent, $originalcontent);
|
||||
mysqli_stmt_execute($stmt);
|
||||
$rawsedit++;
|
||||
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
||||
}
|
||||
$stmt = mysqli_prepare($dbconnection, "UPDATE `".$dbtable."` SET `".$dbcolumn."` = ? WHERE `".$dbtable."`.`".$dbcolumn."`=?");
|
||||
mysqli_stmt_bind_param($stmt, 'ss', $newcontent, $originalcontent);
|
||||
mysqli_stmt_execute($stmt);
|
||||
$rawsedit++;
|
||||
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
||||
}
|
||||
$postnumber++;
|
||||
}
|
||||
$totalstatus="finished";
|
||||
SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir);
|
||||
|
|
20
index.php
20
index.php
|
@ -2,7 +2,7 @@
|
|||
/**
|
||||
* mySQL image replacer - https://gitlab.com/zlax/mysqlimagereplacer
|
||||
* adopted for php8.2
|
||||
* v.0.2 beta - uder DWTWL license https://soundragon.su/license/license.html
|
||||
* v.0.3 beta - uder DWTWL license https://soundragon.su/license/license.html
|
||||
* interface script
|
||||
*
|
||||
* !MAKE BACKUP OF YOUR SQL-TABLE BEFORE YOU TRY THIS SCRIPT!
|
||||
|
@ -80,6 +80,7 @@ function PageView() {
|
|||
// -- Checking table
|
||||
while($row=mysqli_fetch_assoc($resulttable)) {
|
||||
$contentpost=new DOMDocument();
|
||||
libxml_use_internal_errors(true);
|
||||
if (!empty($_SESSION["filtcolumn"]) && !empty($_SESSION["filtvalue"])) {
|
||||
if ($row[$_SESSION["filtcolumn"]] == $_SESSION["filtvalue"]) {
|
||||
if (!empty($row[$_SESSION["dbcolumn"]]))
|
||||
|
@ -115,11 +116,13 @@ function PageView() {
|
|||
$_SESSION["alreadyimagenumber"]++;
|
||||
else $_SESSION["brokenimagenumber"]++;
|
||||
}
|
||||
libxml_clear_errors();
|
||||
$postnumber++;
|
||||
}
|
||||
echo "<form action=index.php method=post>\n";
|
||||
echo " directory to upload new images:<br>\n";
|
||||
echo " ",getcwd(),"/<input type=text name=\"subdir\" value=\"",$_SESSION["subdir"],"\" maxlength=63> (leave blank if you don't need subdir)<br>\n";
|
||||
echo " start processing from raw number: <input type=text name=\"fromnum\" value=\"",$_SESSION["fromnum"],"\" maxlength=63> (leave blank if all)<br>\n";
|
||||
echo " type of html-prefix:<br>\n";
|
||||
echo " <input type=\"radio\" name=\"htmlprefix\" value=\"http\"";
|
||||
if (!isset($_SESSION["htmlprefix"])) $_SESSION["htmlprefix"]="wo";
|
||||
|
@ -155,7 +158,7 @@ function PageView() {
|
|||
if (!isset($_SESSION["ahrefparents"])) $_SESSION["ahrefparents"]=true;
|
||||
if ($_SESSION["ahrefparents"]) echo " checked";
|
||||
echo ">download parent a hrefs if exists?<br>\n";
|
||||
echo "Domain for webarchive download: <input type=text name=\"webarchived\" value=\"",$_SESSION["webarchived"],"\" maxlength=127> (leave blank if you don't need to download images from webarchive)<br>\n";
|
||||
echo "Domains for webarchive download, comma separated: <input type=text name=\"webarchived\" value=\"",$_SESSION["webarchived"],"\" maxlength=127> (leave blank if you don't need to download images from webarchive)<br>\n";
|
||||
echo " <input type=checkbox name=\"fixbroken\" value=\"yes\"";
|
||||
if (!isset($_SESSION["fixbroken"])) $_SESSION["fixbroken"]=false;
|
||||
if ($_SESSION["fixbroken"]) echo " checked";
|
||||
|
@ -186,11 +189,11 @@ function PageView() {
|
|||
else if (isset($testurl['host']) && ($testurl['host']==$_SERVER['SERVER_NAME']))
|
||||
echo " <font color=yellow>";
|
||||
else echo " <font color=red>";
|
||||
echo $imgs,"</font>, (";
|
||||
echo rawurldecode(urldecode($imgs)),"</font>, (";
|
||||
if ($_SESSION["imgsrcahrefs"][$postnum][$imgnum]==$_SESSION["imgsrcs"][$postnum][$imgnum])
|
||||
echo "<i>";
|
||||
else echo "<b>";
|
||||
echo $_SESSION["imgsrcahrefs"][$postnum][$imgnum];
|
||||
echo rawurldecode(urldecode($_SESSION["imgsrcahrefs"][$postnum][$imgnum]));
|
||||
if ($_SESSION["imgsrcahrefs"][$postnum][$imgnum]==$_SESSION["imgsrcs"][$postnum][$imgnum])
|
||||
echo "</i>";
|
||||
else echo "</b>";
|
||||
|
@ -217,7 +220,7 @@ function PageView() {
|
|||
if ($_SESSION["ahrefparents"])
|
||||
echo "parent a hrefs will be downloaded (if exists)<br>\n";
|
||||
if ($_SESSION["webarchived"])
|
||||
echo "this domain will be downloaded via webarchive: ", $_SESSION["webarchived"], "<br>\n";
|
||||
echo "this domains will be downloaded via webarchive: ", $_SESSION["webarchived"], "<br>\n";
|
||||
if ($_SESSION["fixbroken"])
|
||||
echo $_SESSION["brokenimagenumber"]," tags will be deleted<br>\n";
|
||||
echo " are you sure to proceed?<br>\n";
|
||||
|
@ -261,6 +264,7 @@ function PageView() {
|
|||
$csvsessiondata[]=$_SESSION["filtvalue"];
|
||||
$csvsessiondata[]=$_SESSION["ahrefparents"];
|
||||
$csvsessiondata[]=$_SESSION["webarchived"];
|
||||
$csvsessiondata[]=$_SESSION["fromnum"];
|
||||
$fhandle=fopen("indata.csv","w");
|
||||
fputcsv($fhandle,$csvsessiondata);
|
||||
fclose($fhandle);
|
||||
|
@ -377,6 +381,9 @@ function PageView() {
|
|||
$_SESSION["subdir"]=$_POST["subdir"];
|
||||
$_SESSION["htmlprefix"]=$_POST["htmlprefix"];
|
||||
$_SESSION["downloadtype"]=$_POST["dwnldtype"];
|
||||
if (isset($_POST['fromnum']))
|
||||
$_SESSION["fromnum"]=$_POST["fromnum"];
|
||||
else $_SESSION["fromnum"]=false;
|
||||
if (isset($_POST['ahrefparents']) && ($_POST['ahrefparents'] == 'yes'))
|
||||
$_SESSION["ahrefparents"]=true;
|
||||
else $_SESSION["ahrefparents"]=false;
|
||||
|
@ -399,6 +406,9 @@ function PageView() {
|
|||
$_SESSION["subdir"]=$_POST["subdir"];
|
||||
$_SESSION["htmlprefix"]=$_POST["htmlprefix"];
|
||||
$_SESSION["downloadtype"]=$_POST["dwnldtype"];
|
||||
if (isset($_POST['fromnum']))
|
||||
$_SESSION["fromnum"]=$_POST["fromnum"];
|
||||
else $_SESSION["fromnum"]=false;
|
||||
if (isset($_POST['ahrefparents']) && ($_POST['ahrefparents'] == 'yes'))
|
||||
$_SESSION["ahrefparents"]=true;
|
||||
else $_SESSION["ahrefparents"]=false;
|
||||
|
|
Loading…
Reference in New Issue