split transparent rewriting of locally cached media URLs to execute after both sanitize() and HOOK_RENDER_ARTICLE to allow plugins work on original source URLs consistently

master
Andrew Dolgov 6 years ago
parent c5c3a0a2a8
commit 2aef804f4b

@ -379,6 +379,8 @@ class API extends Handler {
$article = $p->hook_render_article_api(array("article" => $article)); $article = $p->hook_render_article_api(array("article" => $article));
} }
$article['content'] = rewrite_cached_urls($article['content']);
array_push($articles, $article); array_push($articles, $article);
} }
@ -799,6 +801,8 @@ class API extends Handler {
$headline_row = $p->hook_render_article_api(array("headline" => $headline_row)); $headline_row = $p->hook_render_article_api(array("headline" => $headline_row));
} }
$headline_row['content'] = rewrite_cached_urls($headline_row['content']);
array_push($headlines, $headline_row); array_push($headlines, $headline_row);
} }
} else if (is_numeric($result) && $result == -1) { } else if (is_numeric($result) && $result == -1) {

@ -610,6 +610,8 @@ class Article extends Handler_Protected {
$line = $p->hook_render_article($line); $line = $p->hook_render_article($line);
} }
$line['content'] = rewrite_cached_urls($line['content']);
$num_comments = (int) $line["num_comments"]; $num_comments = (int) $line["num_comments"];
$entry_comments = ""; $entry_comments = "";

@ -477,6 +477,8 @@ class Feeds extends Handler_Protected {
$line = $p->hook_render_article_cdm($line); $line = $p->hook_render_article_cdm($line);
} }
$line['content'] = rewrite_cached_urls($line['content']);
if ($vfeed_group_enabled && $line["feed_title"]) { if ($vfeed_group_enabled && $line["feed_title"]) {
if ($feed_id != $vgroup_last_feed) { if ($feed_id != $vgroup_last_feed) {

@ -1564,38 +1564,31 @@
return false; return false;
} }
function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) { // check for locally cached (media) URLs and rewrite to local versions
if (!$owner) $owner = $_SESSION["uid"]; // this is called separately after sanitize() and plugin render article hooks to allow
// plugins work on original source URLs used before caching
$res = trim($str); if (!$res) return '';
function rewrite_cached_urls($str) {
$charset_hack = '<head> $charset_hack = '<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
</head>'; </head>';
$res = trim($res); if (!$res) return ''; $res = trim($str); if (!$res) return '';
libxml_use_internal_errors(true);
$doc = new DOMDocument(); $doc = new DOMDocument();
$doc->loadHTML($charset_hack . $res); $doc->loadHTML($charset_hack . $res);
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
$rewrite_base_url = $site_url ? $site_url : get_self_url_prefix(); $entries = $xpath->query('(//img[@src]|//video/source[@src]|//audio/source[@src])');
$entries = $xpath->query('(//a[@href]|//img[@src]|//video/source[@src]|//audio/source[@src])'); $need_saving = false;
foreach ($entries as $entry) { foreach ($entries as $entry) {
if ($entry->hasAttribute('href')) {
$entry->setAttribute('href',
rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href')));
$entry->setAttribute('rel', 'noopener noreferrer');
}
if ($entry->hasAttribute('src')) { if ($entry->hasAttribute('src')) {
$src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src'));
// should be already absolutized because this is called after sanitize()
$src = $entry->getAttribute('src');
$cached_filename = CACHE_DIR . '/images/' . sha1($src); $cached_filename = CACHE_DIR . '/images/' . sha1($src);
if (file_exists($cached_filename)) { if (file_exists($cached_filename)) {
@ -1613,14 +1606,54 @@
$src = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($src) . $suffix; $src = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($src) . $suffix;
if ($entry->hasAttribute('srcset')) { $entry->setAttribute('src', $src);
$entry->removeAttribute('srcset'); $need_saving = true;
}
if ($entry->hasAttribute('sizes')) {
$entry->removeAttribute('sizes');
}
} }
}
}
if ($need_saving) {
$doc->removeChild($doc->firstChild); //remove doctype
$res = $doc->saveHTML();
}
return $res;
}
function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) {
if (!$owner) $owner = $_SESSION["uid"];
$res = trim($str); if (!$res) return '';
$charset_hack = '<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
</head>';
$res = trim($res); if (!$res) return '';
libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadHTML($charset_hack . $res);
$xpath = new DOMXPath($doc);
$rewrite_base_url = $site_url ? $site_url : get_self_url_prefix();
$entries = $xpath->query('(//a[@href]|//img[@src]|//video/source[@src]|//audio/source[@src])');
foreach ($entries as $entry) {
if ($entry->hasAttribute('href')) {
$entry->setAttribute('href',
rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href')));
$entry->setAttribute('rel', 'noopener noreferrer');
}
if ($entry->hasAttribute('src')) {
$src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src'));
// cache stuff has gone to rewrite_cached_urls()
$entry->setAttribute('src', $src); $entry->setAttribute('src', $src);
} }

Loading…
Cancel
Save