0) $result=$fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename'])); else { unlink ($fileprefix.rawurldecode(urldecode(pathinfo($URL)['basename']))); $result=false; } } } else { file_put_contents(rawurldecode(urldecode(pathinfo($URL)['basename'])), file_get_contents($URL)); if (exif_imagetype(rawurldecode(urldecode(pathinfo($URL)['basename'])))>0) $result=rawurldecode(urldecode(pathinfo($URL)['basename'])); else { unlink (rawurldecode(urldecode(pathinfo($URL)['basename']))); $result=false; } } return $result; } $execdir=__DIR__; $fhandle=fopen("indata.csv","r") or $logmessage="emptyindata"; $indatacsv=fgetcsv($fhandle, 1024, ","); fclose($fhandle); $dbhost=$indatacsv[0]; $dbuser=$indatacsv[1]; $dbpass=$indatacsv[2]; $dbname=$indatacsv[3]; $dbtable=$indatacsv[4]; $dbcolumn=$indatacsv[5]; $servername=$indatacsv[6]; $fixbroken=$indatacsv[7]; $dirname=$indatacsv[8]; $subdir=$indatacsv[9]; $downloadtype=$indatacsv[10]; $htmlprefix=$indatacsv[11]; $fixalready=$indatacsv[12]; $fixundownload=$indatacsv[13]; $correctimagenumber=$indatacsv[14]; $filtcolumn=$indatacsv[15]; $filtvalue=$indatacsv[16]; $ahrefparents=$indatacsv[17]; $webarchived=explode (",", $indatacsv[18]); $fromnum=$indatacsv[19]; unlink ("indata.csv"); if (isset($logmessage) && $logmessage=="emptyindata") echo "Can't read data file, please check write pirivlegies for ",$execdir," directory"; chdir("../".$dirname); if ($subdir) chdir ($subdir); $dbconnection=mysqli_connect($dbhost,$dbuser,$dbpass,$dbname); mysqli_set_charset($dbconnection,'utf8'); $resulttable=mysqli_query($dbconnection,"SELECT * FROM ".$dbtable); $totalstatus="started"; $updatedurls=0; $failedtoupdate=0; $deletedurls=0; $rawsedit=0; SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); $filteredpostnumber=0; $postnumber=0; if (!$fromnum) $fromnum=0; while($row=mysqli_fetch_assoc($resulttable)){ if ($postnumber >= $fromnum) { if (!empty($filtcolumn) && !empty($filtvalue)) { if ($row[$filtcolumn] == $filtvalue) { if (!empty($row[$dbcolumn])) $originalcontent=$row[$dbcolumn]; $filteredpostnumber++; } } else { if (!empty($row[$dbcolumn])) $originalcontent=$row[$dbcolumn]; $filteredpostnumber++; } if (isset($originalcontent)) { $newcontent=$originalcontent; if ($fixbroken) { $contentpost=new DOMDocument(); $xmlEncodding = ''; $contentpost->loadHTML($xmlEncodding.$originalcontent, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); $imageTags=$contentpost->getElementsByTagName('img'); for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390 $tag = $imageTags->item($i); $imgsrctag=$tag->getAttribute('src'); $parsedimgsrctag=parse_url($imgsrctag); if (!((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https")))) { $tag->parentNode->removeChild($tag); $deletedurls++; } } $newcontent=str_replace($xmlEncodding,"",$contentpost->saveHTML()); } if ($correctimagenumber>0) { $contentpost=new DOMDocument(); $contentpost->loadHTML($newcontent); $imageTags=$contentpost->getElementsByTagName('img'); for ($i = $imageTags->length; --$i >= 0; ) { // http://php.net/manual/class.domnodelist.php#83390 $tag = $imageTags->item($i); if ($ahrefparents) { $aofimg=$contentpost->importNode($tag->parentNode, true); if (isset($aofimg)) { $ahreofimg=$aofimg->getAttribute('href'); if ($ahreofimg=="") $ahreofimg=$tag->getAttribute('src'); $imgsrctag=$ahreofimg; } } else $imgsrctag=$tag->getAttribute('src'); $parsedimgsrctag=parse_url($imgsrctag); if ((!empty($parsedimgsrctag['host'])&&!empty($parsedimgsrctag['path']))&&(($parsedimgsrctag['scheme']=="http")||($parsedimgsrctag ['scheme']=="https"))&&(!($parsedimgsrctag['host']==$servername))) { if ($webarchived) { $parsedimgsrctagarch=parse_url($tag->getAttribute('src')); if (in_array($parsedimgsrctagarch['host'], $webarchived)) { $webarchivelink="https://web.archive.org/web/20150101id_/".$tag->getAttribute('src'); $newimgsrc=DownloadFopen($webarchivelink); } else $newimgsrc=DownloadFopen($imgsrctag); } else $newimgsrc=DownloadFopen($imgsrctag); if ($newimgsrc) { if ($htmlprefix=="http") { $srcbase="http://".$servername."/".$dirname."/"; } else if ($htmlprefix=="https") { $srcbase="https://".$servername."/".$dirname."/"; } else { $srcbase="/".$dirname."/"; } if ($subdir) $srcbase=$srcbase.rawurlencode($subdir)."/"; $newimgsrc=$srcbase.$newimgsrc; $newcontent=str_replace($tag->getAttribute('src'),$newimgsrc,$newcontent); if ($ahrefparents) $newcontent=str_replace($imgsrctag,$newimgsrc,$newcontent); unset($newimgsrc); $updatedurls++; SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); } else if ($fixundownload==true) { $tag->parentNode->removeChild($tag); $deletedurls++; SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); } else $failedtoupdate++; } } } $stmt = mysqli_prepare($dbconnection, "UPDATE `".$dbtable."` SET `".$dbcolumn."` = ? WHERE `".$dbtable."`.`".$dbcolumn."`=?"); mysqli_stmt_bind_param($stmt, 'ss', $newcontent, $originalcontent); mysqli_stmt_execute($stmt); $rawsedit++; SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); } } $postnumber++; } $totalstatus="finished"; SaveStatus ($totalstatus,$rawsedit,$updatedurls,$failedtoupdate,$deletedurls,$execdir); echo $updatedurls," urls was downloaded and updated, ",$failedtoupdate," urls was failed to update","
\n"; if ($fixbroken or $fixundownload) echo $deletedurls, " tags was deleted
\n"; ?>