diff --git a/classes/api.php b/classes/api.php index e505dcc91..45e4d3062 100755 --- a/classes/api.php +++ b/classes/api.php @@ -379,7 +379,7 @@ class API extends Handler { $article = $p->hook_render_article_api(array("article" => $article)); } - $article['content'] = rewrite_cached_urls($article['content']); + $article['content'] = DiskCache::rewriteUrls($article['content']); array_push($articles, $article); @@ -801,7 +801,7 @@ class API extends Handler { $headline_row = $p->hook_render_article_api(array("headline" => $headline_row)); } - $headline_row['content'] = rewrite_cached_urls($headline_row['content']); + $headline_row['content'] = DiskCache::rewriteUrls($headline_row['content']); array_push($headlines, $headline_row); } diff --git a/classes/article.php b/classes/article.php index 43b25f94f..62ea1f3b9 100755 --- a/classes/article.php +++ b/classes/article.php @@ -446,7 +446,7 @@ class Article extends Handler_Protected { foreach ($result as $line) { foreach (PluginHost::getInstance()->get_hooks(PluginHost::HOOK_ENCLOSURE_ENTRY) as $plugin) { - $line = $plugin->hook_enclosure_entry($line); + $line = $plugin->hook_enclosure_entry($line, $id); } $url = $line["content_url"]; @@ -676,7 +676,7 @@ class Article extends Handler_Protected { while ($line = $sth->fetch()) { if (file_exists(CACHE_DIR . '/images/' . sha1($line["content_url"]))) { - $line["content_url"] = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($line["content_url"]); + $line["content_url"] = DiskCache::getUrl(sha1($line["content_url"])); } array_push($rv, $line); diff --git a/classes/diskcache.php b/classes/diskcache.php new file mode 100644 index 000000000..41609d6b5 --- /dev/null +++ b/classes/diskcache.php @@ -0,0 +1,149 @@ +dir = CACHE_DIR . "/" . basename($dir); + } + + public function getDir() { + return $this->dir; + } + + public function makeDir() { + if (!is_dir($this->dir)) { + return mkdir($this->dir); + } + } + + public function isWritable($filename = "") { + if ($filename) { + if (file_exists($this->getFullPath($filename))) + return is_writable($this->getFullPath($filename)); + else + return is_writable($this->dir); + } else { + return is_writable($this->dir); + } + } + + public function exists($filename) { + return file_exists($this->getFullPath($filename)); + } + + public function getSize($filename) { + if ($this->exists($filename)) + return filesize($this->getFullPath($filename)); + else + return -1; + } + + public function getFullPath($filename) { + $filename = basename($filename); + + return $this->dir . "/" . $filename; + } + + public function put($filename, $data) { + return file_put_contents($this->getFullPath($filename), $data); + } + + public function touch($filename) { + return touch($this->getFullPath($filename)); + } + + public function get($filename) { + if ($this->exists($filename)) + return file_get_contents($this->getFullPath($filename)); + else + return null; + } + + public function getMimeType($filename) { + if ($this->exists($filename)) + return mime_content_type($this->getFullPath($filename)); + else + return null; + } + + public function send($filename) { + header("Content-Disposition: inline; filename=\"$filename\""); + + return send_local_file($this->getFullPath($filename)); + } + + static public function getUrl($filename) { + return get_self_url_prefix() . "/public.php?op=cached_url&file=" . $filename; + } + + // check for locally cached (media) URLs and rewrite to local versions + // this is called separately after sanitize() and plugin render article hooks to allow + // plugins work on original source URLs used before caching + static public function rewriteUrls($str) + { + $res = trim($str); + if (!$res) return ''; + + $doc = new DOMDocument(); + if ($doc->loadHTML('' . $res)) { + $xpath = new DOMXPath($doc); + $cache = new DiskCache("images"); + + $entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video/source[@src]|//audio/source[@src])'); + + $need_saving = false; + + foreach ($entries as $entry) { + + if ($entry->hasAttribute('src') || $entry->hasAttribute('poster')) { + + // should be already absolutized because this is called after sanitize() + $src = $entry->hasAttribute('poster') ? $entry->getAttribute('poster') : $entry->getAttribute('src'); + $cached_filename = sha1($src); + + if ($cache->getSize($cached_filename) > 0) { + + $src = DiskCache::getUrl(sha1($src)); + + if ($entry->hasAttribute('poster')) + $entry->setAttribute('poster', $src); + else + $entry->setAttribute('src', $src); + + $need_saving = true; + } + } + } + + if ($need_saving) { + $doc->removeChild($doc->firstChild); //remove doctype + $res = $doc->saveHTML(); + } + } + return $res; + } + + static function expire() { + $dirs = array_filter(glob(CACHE_DIR . "/*"), "is_dir"); + + foreach ($dirs as $cache_dir) { + $num_deleted = 0; + + if (is_writable($cache_dir) && !file_exists("$cache_dir/.no-auto-expiry")) { + $files = glob("$cache_dir/*"); + + if ($files) { + foreach ($files as $file) { + if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) { + unlink($file); + + ++$num_deleted; + } + } + } + + Debug::log("Expired $cache_dir: removed $num_deleted files."); + } + } + } +} diff --git a/classes/feeds.php b/classes/feeds.php index c1f973830..b89f4e4ca 100755 --- a/classes/feeds.php +++ b/classes/feeds.php @@ -307,7 +307,7 @@ class Feeds extends Handler_Protected { $line = $p->hook_render_article_cdm($line); } - $line['content'] = rewrite_cached_urls($line['content']); + $line['content'] = DiskCache::rewriteUrls($line['content']); if ($line['note']) $line['note'] = Article::format_article_note($id, $line['note']); diff --git a/classes/handler/public.php b/classes/handler/public.php index 0e990bec7..901844e36 100755 --- a/classes/handler/public.php +++ b/classes/handler/public.php @@ -382,7 +382,7 @@ class Handler_Public extends Handler { $line = $p->hook_render_article($line); } - $line['content'] = rewrite_cached_urls($line['content']); + $line['content'] = DiskCache::rewriteUrls($line['content']); $enclosures = Article::get_article_enclosures($line["id"]); @@ -1202,24 +1202,21 @@ class Handler_Public extends Handler { } function cached_url() { - @$req_filename = basename($_GET['hash']); + $filename = $_GET['file']; - // we don't need an extension to find the file, hash is a complete URL - $hash = preg_replace("/\.[^\.]*$/", "", $req_filename); - - if ($hash) { - - $filename = CACHE_DIR . '/images/' . $hash; - - if (file_exists($filename)) { - header("Content-Disposition: inline; filename=\"$req_filename\""); + if (strpos($filename, "/") !== FALSE) { + list ($cache_dir, $filename) = explode("/", $filename, 2); + } else { + $cache_dir = "images"; + } - send_local_file($filename); + $cache = new DiskCache($cache_dir); - } else { - header($_SERVER["SERVER_PROTOCOL"]." 404 Not Found"); - echo "File not found."; - } + if ($cache->exists($filename)) { + $cache->send($filename); + } else { + header($_SERVER["SERVER_PROTOCOL"]." 404 Not Found"); + echo "File not found."; } } diff --git a/classes/pluginhost.php b/classes/pluginhost.php index a3c12ecae..001d5bae2 100755 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -470,4 +470,8 @@ class PluginHost { function get_filter_actions() { return $this->plugin_actions; } + + function get_owner_uid() { + return $this->owner_uid; + } } diff --git a/classes/rssutils.php b/classes/rssutils.php index 4c8da4546..6ba5eaa0b 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -871,7 +871,7 @@ class RSSUtils { $entry_ref_id = $ref_id; if (RSSUtils::find_article_filter($article_filters, "filter")) { - Debug::log("article is filtered out, nothing to do."); + Debug::log("article is filtered out, nothing to do.", Debug::$LOG_VERBOSE); $pdo->commit(); continue; } @@ -1284,32 +1284,6 @@ class RSSUtils { Debug::log("Removed $num_deleted old lock files."); } - static function expire_cached_files() { - foreach (array("feeds", "images", "export", "upload") as $dir) { - $cache_dir = CACHE_DIR . "/$dir"; - - Debug::log("Expiring $cache_dir", Debug::$LOG_VERBOSE); - - $num_deleted = 0; - - if (is_writable($cache_dir)) { - $files = glob("$cache_dir/*"); - - if ($files) { - foreach ($files as $file) { - if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) { - unlink($file); - - ++$num_deleted; - } - } - } - } - - Debug::log("$cache_dir: removed $num_deleted files."); - } - } - /** * Source: http://www.php.net/manual/en/function.parse-url.php#104527 * Returns the url query as associative array @@ -1498,7 +1472,8 @@ class RSSUtils { } static function housekeeping_common() { - RSSUtils::expire_cached_files(); + DiskCache::expire(); + RSSUtils::expire_lock_files(); RSSUtils::expire_error_log(); RSSUtils::expire_feed_archive(); diff --git a/include/functions.php b/include/functions.php index 80ae3fe7e..5a66e53b9 100644 --- a/include/functions.php +++ b/include/functions.php @@ -1233,64 +1233,6 @@ return false; } - // check for locally cached (media) URLs and rewrite to local versions - // this is called separately after sanitize() and plugin render article hooks to allow - // plugins work on original source URLs used before caching - - function rewrite_cached_urls($str) { - $res = trim($str); if (!$res) return ''; - - $doc = new DOMDocument(); - $doc->loadHTML('' . $res); - $xpath = new DOMXPath($doc); - - $entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video/source[@src]|//audio/source[@src])'); - - $need_saving = false; - - foreach ($entries as $entry) { - - if ($entry->hasAttribute('src') || $entry->hasAttribute('poster')) { - - // should be already absolutized because this is called after sanitize() - $src = $entry->hasAttribute('poster') ? $entry->getAttribute('poster') : $entry->getAttribute('src'); - $cached_filename = CACHE_DIR . '/images/' . sha1($src); - - if (file_exists($cached_filename)) { - - // this is strictly cosmetic - if ($entry->tagName == 'img') { - $suffix = ".png"; - } else if ($entry->parentNode && $entry->parentNode->tagName == "picture") { - $suffix = ".png"; - } else if ($entry->parentNode && $entry->parentNode->tagName == "video") { - $suffix = ".mp4"; - } else if ($entry->parentNode && $entry->parentNode->tagName == "audio") { - $suffix = ".ogg"; - } else { - $suffix = ""; - } - - $src = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($src) . $suffix; - - if ($entry->hasAttribute('poster')) - $entry->setAttribute('poster', $src); - else - $entry->setAttribute('src', $src); - - $need_saving = true; - } - } - } - - if ($need_saving) { - $doc->removeChild($doc->firstChild); //remove doctype - $res = $doc->saveHTML(); - } - - return $res; - } - function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) { if (!$owner) $owner = $_SESSION["uid"]; @@ -1315,9 +1257,6 @@ if ($entry->hasAttribute('src')) { $src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src')); - - // cache stuff has gone to rewrite_cached_urls() - $entry->setAttribute('src', $src); } diff --git a/plugins/af_zz_imgproxy/init.php b/plugins/af_zz_imgproxy/init.php index b172d4563..b1281dff0 100755 --- a/plugins/af_zz_imgproxy/init.php +++ b/plugins/af_zz_imgproxy/init.php @@ -4,6 +4,9 @@ class Af_Zz_ImgProxy extends Plugin { /* @var PluginHost $host */ private $host; + /* @var DiskCache $cache */ + private $cache; + function about() { return array(1.0, "Load insecure images via built-in proxy", @@ -18,6 +21,7 @@ class Af_Zz_ImgProxy extends Plugin { function init($host) { $this->host = $host; + $this->cache = new DiskCache("images"); $host->add_hook($host::HOOK_RENDER_ARTICLE, $this); $host->add_hook($host::HOOK_RENDER_ARTICLE_CDM, $this); @@ -50,16 +54,10 @@ class Af_Zz_ImgProxy extends Plugin { return; } - $local_filename = CACHE_DIR . "/images/" . sha1($url); - - if ($_REQUEST["debug"] == "1") { print $url . "\n" . $local_filename; die; } - - header("Content-Disposition: inline; filename=\"".basename($local_filename)."\""); - - if (file_exists($local_filename)) { - - send_local_file($local_filename); + $local_filename = sha1($url); + if ($this->cache->exists($local_filename)) { + $this->cache->send($local_filename); } else { $data = fetch_file_contents(["url" => $url, "max_size" => MAX_CACHE_FILE_SIZE]); @@ -68,8 +66,8 @@ class Af_Zz_ImgProxy extends Plugin { $disable_cache = $this->host->get($this, "disable_cache"); if (!$disable_cache && strlen($data) > MIN_CACHE_FILE_SIZE) { - if (file_put_contents($local_filename, $data)) { - $mimetype = mime_content_type($local_filename); + if ($this->cache->put($local_filename, $data)) { + $mimetype = $this->cache->getMimeType($local_filename); header("Content-type: $mimetype"); } } @@ -110,7 +108,7 @@ class Af_Zz_ImgProxy extends Plugin { } } - function rewrite_url_if_needed($url, $all_remote = false) { + private function rewrite_url_if_needed($url, $all_remote = false) { $scheme = parse_url($url, PHP_URL_SCHEME); if ($all_remote) { diff --git a/plugins/cache_starred_images/init.php b/plugins/cache_starred_images/init.php index 9c64ac3d0..916cedd53 100755 --- a/plugins/cache_starred_images/init.php +++ b/plugins/cache_starred_images/init.php @@ -1,90 +1,80 @@ host = $host; + $this->cache = new DiskCache("starred-images"); - $this->cache_dir = CACHE_DIR . "/starred-images/"; - - if (!is_dir($this->cache_dir)) { - mkdir($this->cache_dir); - } + if ($this->cache->makeDir()) + chmod($this->cache->getDir(), 0777); - if (is_dir($this->cache_dir)) { - - if (!is_writable($this->cache_dir)) - chmod($this->cache_dir, 0777); - - if (is_writable($this->cache_dir)) { - $host->add_hook($host::HOOK_UPDATE_TASK, $this); - $host->add_hook($host::HOOK_HOUSE_KEEPING, $this); - $host->add_hook($host::HOOK_SANITIZE, $this); - $host->add_handler("public", "cache_starred_images_getimage", $this); - - } else { - user_error("Starred cache directory is not writable.", E_USER_WARNING); - } + if (!$this->cache->exists(".no-auto-expiry")) + $this->cache->touch(".no-auto-expiry"); + if ($this->cache->isWritable()) { + $host->add_hook($host::HOOK_HOUSE_KEEPING, $this); + $host->add_hook($host::HOOK_ENCLOSURE_ENTRY, $this); + $host->add_hook($host::HOOK_SANITIZE, $this); } else { - user_error("Unable to create starred cache directory.", E_USER_WARNING); + user_error("Starred cache directory ".$this->cache->getDir()." is not writable.", E_USER_WARNING); } } - function cache_starred_images_getimage() { - ob_end_clean(); + /** + * @SuppressWarnings(PHPMD.UnusedLocalVariable) + */ + function hook_house_keeping() { + /* since HOOK_UPDATE_TASK is not available to user plugins, this hook is a next best thing */ - $hash = basename($_REQUEST["hash"]); + Debug::log("caching media of starred articles for user " . $this->host->get_owner_uid() . "..."); - if ($hash) { + $sth = $this->pdo->prepare("SELECT content, ttrss_entries.title, + ttrss_user_entries.owner_uid, link, site_url, ttrss_entries.id, plugin_data + FROM ttrss_entries, ttrss_user_entries LEFT JOIN ttrss_feeds ON + (ttrss_user_entries.feed_id = ttrss_feeds.id) + WHERE ref_id = ttrss_entries.id AND + marked = true AND + site_url != '' AND + ttrss_user_entries.owner_uid = ? AND + plugin_data NOT LIKE '%starred_cache_images%' + ORDER BY ".sql_random_function()." LIMIT 100"); - $filename = $this->cache_dir . "/" . basename($hash); + if ($sth->execute([$this->host->get_owner_uid()])) { - if (file_exists($filename)) { - header("Content-Disposition: attachment; filename=\"$hash\""); + $usth = $this->pdo->prepare("UPDATE ttrss_entries SET plugin_data = ? WHERE id = ?"); - send_local_file($filename); - } else { - header($_SERVER["SERVER_PROTOCOL"]." 404 Not Found"); - echo "File not found."; + while ($line = $sth->fetch()) { + Debug::log("processing article " . $line["title"], Debug::$LOG_VERBOSE); + + if ($line["site_url"]) { + $success = $this->cache_article_images($line["content"], $line["site_url"], $line["owner_uid"], $line["id"]); + + if ($success) { + $plugin_data = "starred_cache_images,${line['owner_uid']}:" . $line["plugin_data"]; + + $usth->execute([$plugin_data, $line['id']]); + } + } } } - } - /** - * @SuppressWarnings(PHPMD.UnusedLocalVariable) - */ - function hook_house_keeping() { - $files = glob($this->cache_dir . "/*.{png,mp4,status}", GLOB_BRACE); + /* actual housekeeping */ + + Debug::log("expiring " . $this->cache->getDir() . "..."); + + $files = glob($this->cache->getDir() . "/*.{png,mp4,status}", GLOB_BRACE); $last_article_id = 0; $article_exists = 1; @@ -107,6 +97,16 @@ class Cache_Starred_Images extends Plugin implements IHandler { } } + function hook_enclosure_entry($enc, $article_id) { + $local_filename = $article_id . "-" . sha1($enc["content_url"]); + + if ($this->cache->exists($local_filename)) { + $enc["content_url"] = DiskCache::getUrl("starred-images/" . $local_filename); + } + + return $enc; + } + /** * @SuppressWarnings(PHPMD.UnusedFormalParameter) */ @@ -120,15 +120,12 @@ class Cache_Starred_Images extends Plugin implements IHandler { if ($entry->hasAttribute('src')) { $src = rewrite_relative_url($site_url, $entry->getAttribute('src')); - $extension = $entry->tagName == 'source' ? '.mp4' : '.png'; - $local_filename = $this->cache_dir . $article_id . "-" . sha1($src) . $extension; + $local_filename = $article_id . "-" . sha1($src); - if (file_exists($local_filename)) { - $entry->setAttribute("src", get_self_url_prefix() . - "/public.php?op=cache_starred_images_getimage&method=image&hash=" . - $article_id . "-" . sha1($src) . $extension); + if ($this->cache->exists($local_filename)) { + $entry->setAttribute("src", DiskCache::getUrl("starred-images/" . $local_filename)); + $entry->removeAttribute("srcset"); } - } } } @@ -136,42 +133,46 @@ class Cache_Starred_Images extends Plugin implements IHandler { return $doc; } - function hook_update_task() { - $res = $this->pdo->query("SELECT content, ttrss_user_entries.owner_uid, link, site_url, ttrss_entries.id, plugin_data - FROM ttrss_entries, ttrss_user_entries LEFT JOIN ttrss_feeds ON - (ttrss_user_entries.feed_id = ttrss_feeds.id) - WHERE ref_id = ttrss_entries.id AND - marked = true AND - (UPPER(content) LIKE '%pdo->prepare("UPDATE ttrss_entries SET plugin_data = ? WHERE id = ?"); + if (!$this->cache->getSize($local_filename) >= 0) { + Debug::log("cache_images: downloading: $url to $local_filename", Debug::$LOG_VERBOSE); - while ($line = $res->fetch()) { - if ($line["site_url"]) { - $success = $this->cache_article_images($line["content"], $line["site_url"], $line["owner_uid"], $line["id"]); + $data = fetch_file_contents(["url" => $url, "max_size" => MAX_CACHE_FILE_SIZE]); - if ($success) { - $plugin_data = "starred_cache_images,${line['owner_uid']}:" . $line["plugin_data"]; - - $usth->execute([$plugin_data, $line['id']]); + if ($data) { + if (strlen($data) > MIN_CACHE_FILE_SIZE) { + $this->cache->put($local_filename, $data); } + + return true; } + } else { + //Debug::log("cache_images: local file exists for $url", Debug::$LOG_VERBOSE); + + return true; } + + return false; } /** * @SuppressWarnings(PHPMD.UnusedFormalParameter) */ - function cache_article_images($content, $site_url, $owner_uid, $article_id) { - $status_filename = $this->cache_dir . $article_id . "-" . sha1($site_url) . ".status"; + private function cache_article_images($content, $site_url, $owner_uid, $article_id) { + $status_filename = $article_id . "-" . sha1($site_url) . ".status"; - Debug::log("status: $status_filename", Debug::$LOG_EXTENDED); + /* housekeeping might run as a separate user, in this case status/media might not be writable */ + if (!$this->cache->isWritable($status_filename)) { + Debug::log("status not writable: $status_filename", Debug::$LOG_VERBOSE); + return false; + } - if (file_exists($status_filename)) - $status = json_decode(file_get_contents($status_filename), true); + Debug::log("status: $status_filename", Debug::$LOG_VERBOSE); + + if ($this->cache->exists($status_filename)) + $status = json_decode($this->cache->get($status_filename), true); else $status = []; @@ -180,47 +181,48 @@ class Cache_Starred_Images extends Plugin implements IHandler { // only allow several download attempts for article if ($status["attempt"] > $this->max_cache_attempts) { Debug::log("too many attempts for $site_url", Debug::$LOG_VERBOSE); - return; + return false; } - if (!file_put_contents($status_filename, json_encode($status))) { + if (!$this->cache->put($status_filename, json_encode($status))) { user_error("unable to write status file: $status_filename", E_USER_WARNING); - return; + return false; } $doc = new DOMDocument(); - $doc->loadHTML('' . $content); - $xpath = new DOMXPath($doc); - - $entries = $xpath->query('(//img[@src])|(//video/source[@src])'); - $success = false; $has_images = false; + $success = false; + + if ($doc->loadHTML('' . $content)) { + $xpath = new DOMXPath($doc); + $entries = $xpath->query('(//img[@src])|(//video/source[@src])'); - foreach ($entries as $entry) { + foreach ($entries as $entry) { - if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) { + if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) { - $has_images = true; - $src = rewrite_relative_url($site_url, $entry->getAttribute('src')); + $has_images = true; - $extension = $entry->tagName == 'source' ? '.mp4' : '.png'; + $src = rewrite_relative_url($site_url, $entry->getAttribute('src')); - $local_filename = $this->cache_dir . $article_id . "-" . sha1($src) . $extension; + if ($this->cache_url($article_id, $src)) { + $success = true; + } + } + } + } - Debug::log("cache_images: downloading: $src to $local_filename", Debug::$LOG_VERBOSE); + $esth = $this->pdo->prepare("SELECT content_url FROM ttrss_enclosures WHERE post_id = ? AND + (content_type LIKE '%image%' OR content_type LIKE '%video%')"); - if (!file_exists($local_filename)) { - $file_content = fetch_file_contents(["url" => $src, "max_size" => MAX_CACHE_FILE_SIZE]); + if ($esth->execute([$article_id])) { + while ($enc = $esth->fetch()) { - if ($file_content) { - if (strlen($file_content) > MIN_CACHE_FILE_SIZE) { - file_put_contents($local_filename, $file_content); - } + $has_images = true; + $url = rewrite_relative_url($site_url, $enc["content_url"]); - $success = true; - } - } else { + if ($this->cache_url($article_id, $url)) { $success = true; } }