From 3a4b9249a99972798ce6285bf79b8ce86971bde7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 29 Apr 2020 19:29:36 +0300 Subject: [PATCH] DiskCache: properly deal with srcset attributes --- classes/diskcache.php | 33 +++++++++++++++++++------ classes/rssutils.php | 57 +++++++++++++++++++++++++++---------------- 2 files changed, 61 insertions(+), 29 deletions(-) diff --git a/classes/diskcache.php b/classes/diskcache.php index ba6aef858..696a7dda9 100644 --- a/classes/diskcache.php +++ b/classes/diskcache.php @@ -79,6 +79,7 @@ class DiskCache { // check for locally cached (media) URLs and rewrite to local versions // this is called separately after sanitize() and plugin render article hooks to allow // plugins work on original source URLs used before caching + // NOTE: URLs should be already absolutized because this is called after sanitize() static public function rewriteUrls($str) { $res = trim($str); @@ -89,29 +90,45 @@ class DiskCache { $xpath = new DOMXPath($doc); $cache = new DiskCache("images"); - $entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video[@src]|//video/source[@src]|//audio/source[@src])'); + $entries = $xpath->query('(//img[@src]|//source[@src|@srcset]|//video[@poster|@src])'); $need_saving = false; foreach ($entries as $entry) { - foreach (array('src', 'poster') as $attr) { if ($entry->hasAttribute($attr)) { - // should be already absolutized because this is called after sanitize() - $src = $entry->getAttribute($attr); - $cached_filename = sha1($src); + $url = $entry->getAttribute($attr); + $cached_filename = sha1($url); if ($cache->exists($cached_filename)) { + $url = $cache->getUrl($cached_filename); - $src = $cache->getUrl(sha1($src)); - - $entry->setAttribute($attr, $src); + $entry->setAttribute($attr, $url); $entry->removeAttribute("srcset"); $need_saving = true; } } } + + if ($entry->hasAttribute("srcset")) { + $tokens = explode(",", $entry->getAttribute('srcset')); + + for ($i = 0; $i < count($tokens); $i++) { + $token = trim($tokens[$i]); + + list ($url, $width) = explode(" ", $token, 2); + $cached_filename = sha1($url); + + if ($cache->exists($cached_filename)) { + $tokens[$i] = $cache->getUrl($cached_filename) . " " . $width; + + $need_saving = true; + } + } + + $entry->setAttribute("srcset", implode(", ", $tokens)); + } } if ($need_saving) { diff --git a/classes/rssutils.php b/classes/rssutils.php index fd6d47375..f307efa4b 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -1226,6 +1226,32 @@ class RSSUtils { } } + static function cache_media_url($cache, $url, $site_url) { + $url = rewrite_relative_url($site_url, $url); + $local_filename = sha1($url); + + Debug::log("cache_media: checking $url", Debug::$LOG_VERBOSE); + + if (!$cache->exists($local_filename)) { + Debug::log("cache_media: downloading: $url to $local_filename", Debug::$LOG_VERBOSE); + + global $fetch_last_error_code; + global $fetch_last_error; + + $file_content = fetch_file_contents(array("url" => $url, + "http_referrer" => $url, + "max_size" => MAX_CACHE_FILE_SIZE)); + + if ($file_content) { + $cache->put($local_filename, $file_content); + } else { + Debug::log("cache_media: failed with $fetch_last_error_code: $fetch_last_error"); + } + } else if ($cache->isWritable($local_filename)) { + $cache->touch($local_filename); + } + } + static function cache_media($html, $site_url) { $cache = new DiskCache("images"); @@ -1234,35 +1260,24 @@ class RSSUtils { if ($doc->loadHTML($html)) { $xpath = new DOMXPath($doc); - $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])|(//video[@poster])|(//video[@src])'); + $entries = $xpath->query('(//img[@src]|//source[@src|@srcset]|//video[@poster|@src])'); foreach ($entries as $entry) { foreach (array('src', 'poster') as $attr) { if ($entry->hasAttribute($attr) && strpos($entry->getAttribute($attr), "data:") !== 0) { - $src = rewrite_relative_url($site_url, $entry->getAttribute($attr)); - - $local_filename = sha1($src); - - Debug::log("cache_media: checking $src", Debug::$LOG_VERBOSE); + RSSUtils::cache_media_url($cache, $entry->getAttribute($attr), $site_url); + } + } - if (!$cache->exists($local_filename)) { - Debug::log("cache_media: downloading: $src to $local_filename", Debug::$LOG_VERBOSE); + if ($entry->hasAttribute("srcset")) { + $tokens = explode(",", $entry->getAttribute('srcset')); - global $fetch_last_error_code; - global $fetch_last_error; + for ($i = 0; $i < count($tokens); $i++) { + $token = trim($tokens[$i]); - $file_content = fetch_file_contents(array("url" => $src, - "http_referrer" => $src, - "max_size" => MAX_CACHE_FILE_SIZE)); + list ($url, $width) = explode(" ", $token, 2); - if ($file_content) { - $cache->put($local_filename, $file_content); - } else { - Debug::log("cache_media: failed with $fetch_last_error_code: $fetch_last_error"); - } - } else if ($cache->isWritable($local_filename)) { - $cache->touch($local_filename); - } + RSSUtils::cache_media_url($cache, $url, $site_url); } } }